diff options
author | Hye-Shik Chang <hyeshik@gmail.com> | 2006-03-26 02:34:59 (GMT) |
---|---|---|
committer | Hye-Shik Chang <hyeshik@gmail.com> | 2006-03-26 02:34:59 (GMT) |
commit | e2ac4abd016310b0ce1152ccf65666b98e33a76e (patch) | |
tree | 7e6e8423a0ee27770c0635196a2ccfada73b33f9 /Lib | |
parent | a531e5b84c62e90ff0eaed1a61caf5d952409397 (diff) | |
download | cpython-e2ac4abd016310b0ce1152ccf65666b98e33a76e.zip cpython-e2ac4abd016310b0ce1152ccf65666b98e33a76e.tar.gz cpython-e2ac4abd016310b0ce1152ccf65666b98e33a76e.tar.bz2 |
Patch #1443155: Add the incremental codecs support for CJK codecs.
(reviewed by Walter Dörwald)
Diffstat (limited to 'Lib')
26 files changed, 730 insertions, 506 deletions
diff --git a/Lib/encodings/big5.py b/Lib/encodings/big5.py index d56aa1b..c864b68 100644 --- a/Lib/encodings/big5.py +++ b/Lib/encodings/big5.py @@ -2,10 +2,10 @@ # big5.py: Python Unicode Codec for BIG5 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: big5.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_tw, codecs +import _multibytecodec as mbc codec = _codecs_tw.getcodec('big5') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='big5', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/big5hkscs.py b/Lib/encodings/big5hkscs.py index 443997f..9b812a2 100644 --- a/Lib/encodings/big5hkscs.py +++ b/Lib/encodings/big5hkscs.py @@ -2,10 +2,10 @@ # big5hkscs.py: Python Unicode Codec for BIG5HKSCS # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: big5hkscs.py,v 1.1 2004/06/29 05:14:27 perky Exp $ # import _codecs_hk, codecs +import _multibytecodec as mbc codec = _codecs_hk.getcodec('big5hkscs') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='big5hkscs', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/cp932.py b/Lib/encodings/cp932.py index 38937f5..54d6bb8 100644 --- a/Lib/encodings/cp932.py +++ b/Lib/encodings/cp932.py @@ -2,10 +2,10 @@ # cp932.py: Python Unicode Codec for CP932 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: cp932.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_jp, codecs +import _multibytecodec as mbc codec = _codecs_jp.getcodec('cp932') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='cp932', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/cp949.py b/Lib/encodings/cp949.py index 0f3c847..6012925 100644 --- a/Lib/encodings/cp949.py +++ b/Lib/encodings/cp949.py @@ -2,10 +2,10 @@ # cp949.py: Python Unicode Codec for CP949 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: cp949.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_kr, codecs +import _multibytecodec as mbc codec = _codecs_kr.getcodec('cp949') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='cp949', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/cp950.py b/Lib/encodings/cp950.py index dab3e28..b6517d9 100644 --- a/Lib/encodings/cp950.py +++ b/Lib/encodings/cp950.py @@ -2,10 +2,10 @@ # cp950.py: Python Unicode Codec for CP950 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: cp950.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_tw, codecs +import _multibytecodec as mbc codec = _codecs_tw.getcodec('cp950') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='cp950', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/euc_jis_2004.py b/Lib/encodings/euc_jis_2004.py index 02d55ca..88e605a 100644 --- a/Lib/encodings/euc_jis_2004.py +++ b/Lib/encodings/euc_jis_2004.py @@ -2,10 +2,10 @@ # euc_jis_2004.py: Python Unicode Codec for EUC_JIS_2004 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: euc_jis_2004.py,v 1.1 2004/07/07 16:18:25 perky Exp $ # import _codecs_jp, codecs +import _multibytecodec as mbc codec = _codecs_jp.getcodec('euc_jis_2004') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='euc_jis_2004', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/euc_jisx0213.py b/Lib/encodings/euc_jisx0213.py index 30f173e..10d4b31 100644 --- a/Lib/encodings/euc_jisx0213.py +++ b/Lib/encodings/euc_jisx0213.py @@ -2,10 +2,10 @@ # euc_jisx0213.py: Python Unicode Codec for EUC_JISX0213 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: euc_jisx0213.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_jp, codecs +import _multibytecodec as mbc codec = _codecs_jp.getcodec('euc_jisx0213') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='euc_jisx0213', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/euc_jp.py b/Lib/encodings/euc_jp.py index a3947a3..4dc0b9b 100644 --- a/Lib/encodings/euc_jp.py +++ b/Lib/encodings/euc_jp.py @@ -2,10 +2,10 @@ # euc_jp.py: Python Unicode Codec for EUC_JP # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: euc_jp.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_jp, codecs +import _multibytecodec as mbc codec = _codecs_jp.getcodec('euc_jp') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='euc_jp', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/euc_kr.py b/Lib/encodings/euc_kr.py index bbebee8..30716f3 100644 --- a/Lib/encodings/euc_kr.py +++ b/Lib/encodings/euc_kr.py @@ -2,10 +2,10 @@ # euc_kr.py: Python Unicode Codec for EUC_KR # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: euc_kr.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_kr, codecs +import _multibytecodec as mbc codec = _codecs_kr.getcodec('euc_kr') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='euc_kr', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/gb18030.py b/Lib/encodings/gb18030.py index 7eca319..e685cf6 100644 --- a/Lib/encodings/gb18030.py +++ b/Lib/encodings/gb18030.py @@ -2,10 +2,10 @@ # gb18030.py: Python Unicode Codec for GB18030 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: gb18030.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_cn, codecs +import _multibytecodec as mbc codec = _codecs_cn.getcodec('gb18030') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='gb18030', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/gb2312.py b/Lib/encodings/gb2312.py index 5130efa..e99bf1d 100644 --- a/Lib/encodings/gb2312.py +++ b/Lib/encodings/gb2312.py @@ -2,10 +2,10 @@ # gb2312.py: Python Unicode Codec for GB2312 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: gb2312.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_cn, codecs +import _multibytecodec as mbc codec = _codecs_cn.getcodec('gb2312') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='gb2312', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/gbk.py b/Lib/encodings/gbk.py index 67854bc..09123ae 100644 --- a/Lib/encodings/gbk.py +++ b/Lib/encodings/gbk.py @@ -2,10 +2,10 @@ # gbk.py: Python Unicode Codec for GBK # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: gbk.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_cn, codecs +import _multibytecodec as mbc codec = _codecs_cn.getcodec('gbk') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='gbk', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/hz.py b/Lib/encodings/hz.py index 3940894..06f7d2f 100644 --- a/Lib/encodings/hz.py +++ b/Lib/encodings/hz.py @@ -2,10 +2,10 @@ # hz.py: Python Unicode Codec for HZ # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: hz.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_cn, codecs +import _multibytecodec as mbc codec = _codecs_cn.getcodec('hz') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='hz', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/iso2022_jp.py b/Lib/encodings/iso2022_jp.py index 109658b..fb04159 100644 --- a/Lib/encodings/iso2022_jp.py +++ b/Lib/encodings/iso2022_jp.py @@ -2,10 +2,10 @@ # iso2022_jp.py: Python Unicode Codec for ISO2022_JP # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: iso2022_jp.py,v 1.2 2004/06/28 18:16:03 perky Exp $ # import _codecs_iso2022, codecs +import _multibytecodec as mbc codec = _codecs_iso2022.getcodec('iso2022_jp') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='iso2022_jp', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/iso2022_jp_1.py b/Lib/encodings/iso2022_jp_1.py index 201bd28..fde51c2 100644 --- a/Lib/encodings/iso2022_jp_1.py +++ b/Lib/encodings/iso2022_jp_1.py @@ -2,10 +2,10 @@ # iso2022_jp_1.py: Python Unicode Codec for ISO2022_JP_1 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: iso2022_jp_1.py,v 1.2 2004/06/28 18:16:03 perky Exp $ # import _codecs_iso2022, codecs +import _multibytecodec as mbc codec = _codecs_iso2022.getcodec('iso2022_jp_1') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='iso2022_jp_1', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/iso2022_jp_2.py b/Lib/encodings/iso2022_jp_2.py index 7a61018..766ab46 100644 --- a/Lib/encodings/iso2022_jp_2.py +++ b/Lib/encodings/iso2022_jp_2.py @@ -2,10 +2,10 @@ # iso2022_jp_2.py: Python Unicode Codec for ISO2022_JP_2 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: iso2022_jp_2.py,v 1.2 2004/06/28 18:16:03 perky Exp $ # import _codecs_iso2022, codecs +import _multibytecodec as mbc codec = _codecs_iso2022.getcodec('iso2022_jp_2') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='iso2022_jp_2', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/iso2022_jp_2004.py b/Lib/encodings/iso2022_jp_2004.py index 2497124..236ab4e 100644 --- a/Lib/encodings/iso2022_jp_2004.py +++ b/Lib/encodings/iso2022_jp_2004.py @@ -2,10 +2,10 @@ # iso2022_jp_2004.py: Python Unicode Codec for ISO2022_JP_2004 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: iso2022_jp_2004.py,v 1.1 2004/07/07 16:18:25 perky Exp $ # import _codecs_iso2022, codecs +import _multibytecodec as mbc codec = _codecs_iso2022.getcodec('iso2022_jp_2004') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='iso2022_jp_2004', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/iso2022_jp_3.py b/Lib/encodings/iso2022_jp_3.py index 8b2ed00..e3cf950 100644 --- a/Lib/encodings/iso2022_jp_3.py +++ b/Lib/encodings/iso2022_jp_3.py @@ -2,10 +2,10 @@ # iso2022_jp_3.py: Python Unicode Codec for ISO2022_JP_3 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: iso2022_jp_3.py,v 1.2 2004/06/28 18:16:03 perky Exp $ # import _codecs_iso2022, codecs +import _multibytecodec as mbc codec = _codecs_iso2022.getcodec('iso2022_jp_3') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='iso2022_jp_3', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/iso2022_jp_ext.py b/Lib/encodings/iso2022_jp_ext.py index 97cb4e7..89d35b5 100644 --- a/Lib/encodings/iso2022_jp_ext.py +++ b/Lib/encodings/iso2022_jp_ext.py @@ -2,10 +2,10 @@ # iso2022_jp_ext.py: Python Unicode Codec for ISO2022_JP_EXT # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: iso2022_jp_ext.py,v 1.2 2004/06/28 18:16:03 perky Exp $ # import _codecs_iso2022, codecs +import _multibytecodec as mbc codec = _codecs_iso2022.getcodec('iso2022_jp_ext') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='iso2022_jp_ext', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/iso2022_kr.py b/Lib/encodings/iso2022_kr.py index f5549ca..41f7ce0 100644 --- a/Lib/encodings/iso2022_kr.py +++ b/Lib/encodings/iso2022_kr.py @@ -2,10 +2,10 @@ # iso2022_kr.py: Python Unicode Codec for ISO2022_KR # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: iso2022_kr.py,v 1.2 2004/06/28 18:16:03 perky Exp $ # import _codecs_iso2022, codecs +import _multibytecodec as mbc codec = _codecs_iso2022.getcodec('iso2022_kr') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='iso2022_kr', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/johab.py b/Lib/encodings/johab.py index b6a87d7..6a2c993 100644 --- a/Lib/encodings/johab.py +++ b/Lib/encodings/johab.py @@ -2,10 +2,10 @@ # johab.py: Python Unicode Codec for JOHAB # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: johab.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_kr, codecs +import _multibytecodec as mbc codec = _codecs_kr.getcodec('johab') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='johab', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/shift_jis.py b/Lib/encodings/shift_jis.py index ec5e517..b1f77fc 100644 --- a/Lib/encodings/shift_jis.py +++ b/Lib/encodings/shift_jis.py @@ -2,10 +2,10 @@ # shift_jis.py: Python Unicode Codec for SHIFT_JIS # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: shift_jis.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_jp, codecs +import _multibytecodec as mbc codec = _codecs_jp.getcodec('shift_jis') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='shift_jis', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/shift_jis_2004.py b/Lib/encodings/shift_jis_2004.py index 446cd7c..6078a52 100644 --- a/Lib/encodings/shift_jis_2004.py +++ b/Lib/encodings/shift_jis_2004.py @@ -2,10 +2,10 @@ # shift_jis_2004.py: Python Unicode Codec for SHIFT_JIS_2004 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: shift_jis_2004.py,v 1.1 2004/07/07 16:18:25 perky Exp $ # import _codecs_jp, codecs +import _multibytecodec as mbc codec = _codecs_jp.getcodec('shift_jis_2004') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='shift_jis_2004', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/shift_jisx0213.py b/Lib/encodings/shift_jisx0213.py index 495468b..5a0f24c 100644 --- a/Lib/encodings/shift_jisx0213.py +++ b/Lib/encodings/shift_jisx0213.py @@ -2,10 +2,10 @@ # shift_jisx0213.py: Python Unicode Codec for SHIFT_JISX0213 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: shift_jisx0213.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_jp, codecs +import _multibytecodec as mbc codec = _codecs_jp.getcodec('shift_jisx0213') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='shift_jisx0213', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py index aef7931..8f9f6e9 100644 --- a/Lib/test/test_multibytecodec.py +++ b/Lib/test/test_multibytecodec.py @@ -9,11 +9,106 @@ from test import test_support from test import test_multibytecodec_support import unittest, StringIO, codecs +class Test_MultibyteCodec(unittest.TestCase): + + def test_nullcoding(self): + self.assertEqual(''.decode('gb18030'), u'') + self.assertEqual(unicode('', 'gb18030'), u'') + self.assertEqual(u''.encode('gb18030'), '') + + def test_str_decode(self): + self.assertEqual('abcd'.encode('gb18030'), 'abcd') + + +class Test_IncrementalEncoder(unittest.TestCase): + + def test_stateless(self): + # cp949 encoder isn't stateful at all. + encoder = codecs.getincrementalencoder('cp949')() + self.assertEqual(encoder.encode(u'\ud30c\uc774\uc36c \ub9c8\uc744'), + '\xc6\xc4\xc0\xcc\xbd\xe3 \xb8\xb6\xc0\xbb') + self.assertEqual(encoder.reset(), None) + self.assertEqual(encoder.encode(u'\u2606\u223c\u2606', True), + '\xa1\xd9\xa1\xad\xa1\xd9') + self.assertEqual(encoder.reset(), None) + self.assertEqual(encoder.encode(u'', True), '') + self.assertEqual(encoder.encode(u'', False), '') + self.assertEqual(encoder.reset(), None) + + def test_stateful(self): + # jisx0213 encoder is stateful for a few codepoints. eg) + # U+00E6 => A9DC + # U+00E6 U+0300 => ABC4 + # U+0300 => ABDC + + encoder = codecs.getincrementalencoder('jisx0213')() + self.assertEqual(encoder.encode(u'\u00e6\u0300'), '\xab\xc4') + self.assertEqual(encoder.encode(u'\u00e6'), '') + self.assertEqual(encoder.encode(u'\u0300'), '\xab\xc4') + self.assertEqual(encoder.encode(u'\u00e6', True), '\xa9\xdc') + + self.assertEqual(encoder.reset(), None) + self.assertEqual(encoder.encode(u'\u0300'), '\xab\xdc') + + self.assertEqual(encoder.encode(u'\u00e6'), '') + self.assertEqual(encoder.encode('', True), '\xa9\xdc') + self.assertEqual(encoder.encode('', True), '') + + def test_stateful_keep_buffer(self): + encoder = codecs.getincrementalencoder('jisx0213')() + self.assertEqual(encoder.encode(u'\u00e6'), '') + self.assertRaises(UnicodeEncodeError, encoder.encode, u'\u0123') + self.assertEqual(encoder.encode(u'\u0300\u00e6'), '\xab\xc4') + self.assertRaises(UnicodeEncodeError, encoder.encode, u'\u0123') + self.assertEqual(encoder.reset(), None) + self.assertEqual(encoder.encode(u'\u0300'), '\xab\xdc') + self.assertEqual(encoder.encode(u'\u00e6'), '') + self.assertRaises(UnicodeEncodeError, encoder.encode, u'\u0123') + self.assertEqual(encoder.encode(u'', True), '\xa9\xdc') + + +class Test_IncrementalDecoder(unittest.TestCase): + + def test_dbcs(self): + # cp949 decoder is simple with only 1 or 2 bytes sequences. + decoder = codecs.getincrementaldecoder('cp949')() + self.assertEqual(decoder.decode('\xc6\xc4\xc0\xcc\xbd'), + u'\ud30c\uc774') + self.assertEqual(decoder.decode('\xe3 \xb8\xb6\xc0\xbb'), + u'\uc36c \ub9c8\uc744') + self.assertEqual(decoder.decode(''), u'') + + def test_dbcs_keep_buffer(self): + decoder = codecs.getincrementaldecoder('cp949')() + self.assertEqual(decoder.decode('\xc6\xc4\xc0'), u'\ud30c') + self.assertRaises(UnicodeDecodeError, decoder.decode, '', True) + self.assertEqual(decoder.decode('\xcc'), u'\uc774') + + self.assertEqual(decoder.decode('\xc6\xc4\xc0'), u'\ud30c') + self.assertRaises(UnicodeDecodeError, decoder.decode, '\xcc\xbd', True) + self.assertEqual(decoder.decode('\xcc'), u'\uc774') + + def test_iso2022(self): + decoder = codecs.getincrementaldecoder('iso2022-jp')() + ESC = '\x1b' + self.assertEqual(decoder.decode(ESC + '('), u'') + self.assertEqual(decoder.decode('B', True), u'') + self.assertEqual(decoder.decode(ESC + '$'), u'') + self.assertEqual(decoder.decode('B@$'), u'\u4e16') + self.assertEqual(decoder.decode('@$@'), u'\u4e16') + self.assertEqual(decoder.decode('$', True), u'\u4e16') + self.assertEqual(decoder.reset(), None) + self.assertEqual(decoder.decode('@$'), u'@$') + self.assertEqual(decoder.decode(ESC + '$'), u'') + self.assertRaises(UnicodeDecodeError, decoder.decode, '', True) + self.assertEqual(decoder.decode('B@$'), u'\u4e16') + + class Test_StreamWriter(unittest.TestCase): if len(u'\U00012345') == 2: # UCS2 def test_gb18030(self): s= StringIO.StringIO() - c = codecs.lookup('gb18030')[3](s) + c = codecs.getwriter('gb18030')(s) c.write(u'123') self.assertEqual(s.getvalue(), '123') c.write(u'\U00012345') @@ -30,15 +125,16 @@ class Test_StreamWriter(unittest.TestCase): self.assertEqual(s.getvalue(), '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851') - # standard utf-8 codecs has broken StreamReader - if test_multibytecodec_support.__cjkcodecs__: - def test_utf_8(self): - s= StringIO.StringIO() - c = codecs.lookup('utf-8')[3](s) - c.write(u'123') - self.assertEqual(s.getvalue(), '123') - c.write(u'\U00012345') - self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85') + def test_utf_8(self): + s= StringIO.StringIO() + c = codecs.getwriter('utf-8')(s) + c.write(u'123') + self.assertEqual(s.getvalue(), '123') + c.write(u'\U00012345') + self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85') + + # Python utf-8 codec can't buffer surrogate pairs yet. + if 0: c.write(u'\U00012345'[0]) self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85') c.write(u'\U00012345'[1] + u'\U00012345' + u'\uac00\u00ac') @@ -61,14 +157,6 @@ class Test_StreamWriter(unittest.TestCase): else: # UCS4 pass - def test_nullcoding(self): - self.assertEqual(''.decode('gb18030'), u'') - self.assertEqual(unicode('', 'gb18030'), u'') - self.assertEqual(u''.encode('gb18030'), '') - - def test_str_decode(self): - self.assertEqual('abcd'.encode('gb18030'), 'abcd') - def test_streamwriter_strwrite(self): s = StringIO.StringIO() wr = codecs.getwriter('gb18030')(s) @@ -83,6 +171,9 @@ class Test_ISO2022(unittest.TestCase): def test_main(): suite = unittest.TestSuite() + suite.addTest(unittest.makeSuite(Test_MultibyteCodec)) + suite.addTest(unittest.makeSuite(Test_IncrementalEncoder)) + suite.addTest(unittest.makeSuite(Test_IncrementalDecoder)) suite.addTest(unittest.makeSuite(Test_StreamWriter)) suite.addTest(unittest.makeSuite(Test_ISO2022)) test_support.run_suite(suite) diff --git a/Lib/test/test_multibytecodec_support.py b/Lib/test/test_multibytecodec_support.py index 45a63e7..563a3ea 100644 --- a/Lib/test/test_multibytecodec_support.py +++ b/Lib/test/test_multibytecodec_support.py @@ -3,15 +3,12 @@ # test_multibytecodec_support.py # Common Unittest Routines for CJK codecs # -# $CJKCodecs: test_multibytecodec_support.py,v 1.6 2004/06/19 06:09:55 perky Exp $ import sys, codecs, os.path import unittest from test import test_support from StringIO import StringIO -__cjkcodecs__ = 0 # define this as 0 for python - class TestBase: encoding = '' # codec name codec = None # codec tuple (with 4 elements) @@ -21,11 +18,17 @@ class TestBase: roundtriptest = 1 # set if roundtrip is possible with unicode has_iso10646 = 0 # set if this encoding contains whole iso10646 map xmlcharnametest = None # string to test xmlcharrefreplace + unmappedunicode = u'\udeee' # a unicode codepoint that is not mapped. def setUp(self): if self.codec is None: self.codec = codecs.lookup(self.encoding) - self.encode, self.decode, self.reader, self.writer = self.codec + self.encode = self.codec.encode + self.decode = self.codec.decode + self.reader = self.codec.streamreader + self.writer = self.codec.streamwriter + self.incrementalencoder = self.codec.incrementalencoder + self.incrementaldecoder = self.codec.incrementaldecoder def test_chunkcoding(self): for native, utf8 in zip(*[StringIO(f).readlines() @@ -47,51 +50,142 @@ class TestBase: else: self.assertRaises(UnicodeError, func, source, scheme) - if sys.hexversion >= 0x02030000: - def test_xmlcharrefreplace(self): - if self.has_iso10646: - return + def test_xmlcharrefreplace(self): + if self.has_iso10646: + return + + s = u"\u0b13\u0b23\u0b60 nd eggs" + self.assertEqual( + self.encode(s, "xmlcharrefreplace")[0], + "ଓଣୠ nd eggs" + ) + + def test_customreplace(self): + if self.has_iso10646: + return + + from htmlentitydefs import codepoint2name + + def xmlcharnamereplace(exc): + if not isinstance(exc, UnicodeEncodeError): + raise TypeError("don't know how to handle %r" % exc) + l = [] + for c in exc.object[exc.start:exc.end]: + if ord(c) in codepoint2name: + l.append(u"&%s;" % codepoint2name[ord(c)]) + else: + l.append(u"&#%d;" % ord(c)) + return (u"".join(l), exc.end) + + codecs.register_error("test.xmlcharnamereplace", xmlcharnamereplace) - s = u"\u0b13\u0b23\u0b60 nd eggs" - self.assertEqual( - self.encode(s, "xmlcharrefreplace")[0], - "ଓଣୠ nd eggs" - ) + if self.xmlcharnametest: + sin, sout = self.xmlcharnametest + else: + sin = u"\xab\u211c\xbb = \u2329\u1234\u232a" + sout = "«ℜ» = ⟨ሴ⟩" + self.assertEqual(self.encode(sin, + "test.xmlcharnamereplace")[0], sout) + + def test_callback_wrong_objects(self): + def myreplace(exc): + return (ret, exc.end) + codecs.register_error("test.cjktest", myreplace) + + for ret in ([1, 2, 3], [], None, object(), 'string', ''): + self.assertRaises(TypeError, self.encode, self.unmappedunicode, + 'test.cjktest') + + def test_callback_None_index(self): + def myreplace(exc): + return (u'x', None) + codecs.register_error("test.cjktest", myreplace) + self.assertRaises(TypeError, self.encode, self.unmappedunicode, + 'test.cjktest') + + def test_callback_backward_index(self): + def myreplace(exc): + if myreplace.limit > 0: + myreplace.limit -= 1 + return (u'REPLACED', 0) + else: + return (u'TERMINAL', exc.end) + myreplace.limit = 3 + codecs.register_error("test.cjktest", myreplace) + self.assertEqual(self.encode(u'abcd' + self.unmappedunicode + u'efgh', + 'test.cjktest'), + ('abcdREPLACEDabcdREPLACEDabcdREPLACEDabcdTERMINALefgh', 9)) + + def test_callback_forward_index(self): + def myreplace(exc): + return (u'REPLACED', exc.end + 2) + codecs.register_error("test.cjktest", myreplace) + self.assertEqual(self.encode(u'abcd' + self.unmappedunicode + u'efgh', + 'test.cjktest'), ('abcdREPLACEDgh', 9)) + + def test_callback_index_outofbound(self): + def myreplace(exc): + return (u'TERM', 100) + codecs.register_error("test.cjktest", myreplace) + self.assertRaises(IndexError, self.encode, self.unmappedunicode, + 'test.cjktest') + + def test_incrementalencoder(self): + UTF8Reader = codecs.getreader('utf-8') + for sizehint in [None] + range(1, 33) + \ + [64, 128, 256, 512, 1024]: + istream = UTF8Reader(StringIO(self.tstring[1])) + ostream = StringIO() + encoder = self.incrementalencoder() + while 1: + if sizehint is not None: + data = istream.read(sizehint) + else: + data = istream.read() - def test_customreplace(self): - if self.has_iso10646: - return + if not data: + break + e = encoder.encode(data) + ostream.write(e) - import htmlentitydefs + self.assertEqual(ostream.getvalue(), self.tstring[0]) - names = {} - for (key, value) in htmlentitydefs.entitydefs.items(): - if len(value)==1: - names[value.decode('latin-1')] = self.decode(key)[0] + def test_incrementaldecoder(self): + UTF8Writer = codecs.getwriter('utf-8') + for sizehint in [None, -1] + range(1, 33) + \ + [64, 128, 256, 512, 1024]: + istream = StringIO(self.tstring[0]) + ostream = UTF8Writer(StringIO()) + decoder = self.incrementaldecoder() + while 1: + data = istream.read(sizehint) + if not data: + break else: - names[unichr(int(value[2:-1]))] = self.decode(key)[0] - - def xmlcharnamereplace(exc): - if not isinstance(exc, UnicodeEncodeError): - raise TypeError("don't know how to handle %r" % exc) - l = [] - for c in exc.object[exc.start:exc.end]: - try: - l.append(u"&%s;" % names[c]) - except KeyError: - l.append(u"&#%d;" % ord(c)) - return (u"".join(l), exc.end) - - codecs.register_error( - "test.xmlcharnamereplace", xmlcharnamereplace) - - if self.xmlcharnametest: - sin, sout = self.xmlcharnametest - else: - sin = u"\xab\u211c\xbb = \u2329\u1234\u232a" - sout = "«ℜ» = ⟨ሴ⟩" - self.assertEqual(self.encode(sin, - "test.xmlcharnamereplace")[0], sout) + u = decoder.decode(data) + ostream.write(u) + + self.assertEqual(ostream.getvalue(), self.tstring[1]) + + def test_incrementalencoder_error_callback(self): + inv = self.unmappedunicode + + e = self.incrementalencoder() + self.assertRaises(UnicodeEncodeError, e.encode, inv, True) + + e.errors = 'ignore' + self.assertEqual(e.encode(inv, True), '') + + e.reset() + def tempreplace(exc): + return (u'called', exc.end) + codecs.register_error('test.incremental_error_callback', tempreplace) + e.errors = 'test.incremental_error_callback' + self.assertEqual(e.encode(inv, True), 'called') + + # again + e.errors = 'ignore' + self.assertEqual(e.encode(inv, True), '') def test_streamreader(self): UTF8Writer = codecs.getwriter('utf-8') @@ -113,11 +207,7 @@ class TestBase: self.assertEqual(ostream.getvalue(), self.tstring[1]) def test_streamwriter(self): - if __cjkcodecs__: - readfuncs = ('read', 'readline', 'readlines') - else: - # standard utf8 codec has broken readline and readlines. - readfuncs = ('read',) + readfuncs = ('read', 'readline', 'readlines') UTF8Reader = codecs.getreader('utf-8') for name in readfuncs: for sizehint in [None] + range(1, 33) + \ @@ -211,10 +301,5 @@ class TestBase_Mapping(unittest.TestCase): self.assertEqual(unicode(csetch, self.encoding), unich) def load_teststring(encoding): - if __cjkcodecs__: - etxt = open(os.path.join('sampletexts', encoding) + '.txt').read() - utxt = open(os.path.join('sampletexts', encoding) + '.utf8').read() - return (etxt, utxt) - else: - from test import cjkencodings_test - return cjkencodings_test.teststring[encoding] + from test import cjkencodings_test + return cjkencodings_test.teststring[encoding] |