diff options
author | Hye-Shik Chang <hyeshik@gmail.com> | 2006-03-26 02:34:59 (GMT) |
---|---|---|
committer | Hye-Shik Chang <hyeshik@gmail.com> | 2006-03-26 02:34:59 (GMT) |
commit | e2ac4abd016310b0ce1152ccf65666b98e33a76e (patch) | |
tree | 7e6e8423a0ee27770c0635196a2ccfada73b33f9 | |
parent | a531e5b84c62e90ff0eaed1a61caf5d952409397 (diff) | |
download | cpython-e2ac4abd016310b0ce1152ccf65666b98e33a76e.zip cpython-e2ac4abd016310b0ce1152ccf65666b98e33a76e.tar.gz cpython-e2ac4abd016310b0ce1152ccf65666b98e33a76e.tar.bz2 |
Patch #1443155: Add the incremental codecs support for CJK codecs.
(reviewed by Walter Dörwald)
31 files changed, 1625 insertions, 823 deletions
diff --git a/Lib/encodings/big5.py b/Lib/encodings/big5.py index d56aa1b..c864b68 100644 --- a/Lib/encodings/big5.py +++ b/Lib/encodings/big5.py @@ -2,10 +2,10 @@ # big5.py: Python Unicode Codec for BIG5 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: big5.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_tw, codecs +import _multibytecodec as mbc codec = _codecs_tw.getcodec('big5') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='big5', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/big5hkscs.py b/Lib/encodings/big5hkscs.py index 443997f..9b812a2 100644 --- a/Lib/encodings/big5hkscs.py +++ b/Lib/encodings/big5hkscs.py @@ -2,10 +2,10 @@ # big5hkscs.py: Python Unicode Codec for BIG5HKSCS # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: big5hkscs.py,v 1.1 2004/06/29 05:14:27 perky Exp $ # import _codecs_hk, codecs +import _multibytecodec as mbc codec = _codecs_hk.getcodec('big5hkscs') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='big5hkscs', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/cp932.py b/Lib/encodings/cp932.py index 38937f5..54d6bb8 100644 --- a/Lib/encodings/cp932.py +++ b/Lib/encodings/cp932.py @@ -2,10 +2,10 @@ # cp932.py: Python Unicode Codec for CP932 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: cp932.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_jp, codecs +import _multibytecodec as mbc codec = _codecs_jp.getcodec('cp932') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='cp932', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/cp949.py b/Lib/encodings/cp949.py index 0f3c847..6012925 100644 --- a/Lib/encodings/cp949.py +++ b/Lib/encodings/cp949.py @@ -2,10 +2,10 @@ # cp949.py: Python Unicode Codec for CP949 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: cp949.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_kr, codecs +import _multibytecodec as mbc codec = _codecs_kr.getcodec('cp949') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='cp949', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/cp950.py b/Lib/encodings/cp950.py index dab3e28..b6517d9 100644 --- a/Lib/encodings/cp950.py +++ b/Lib/encodings/cp950.py @@ -2,10 +2,10 @@ # cp950.py: Python Unicode Codec for CP950 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: cp950.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_tw, codecs +import _multibytecodec as mbc codec = _codecs_tw.getcodec('cp950') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='cp950', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/euc_jis_2004.py b/Lib/encodings/euc_jis_2004.py index 02d55ca..88e605a 100644 --- a/Lib/encodings/euc_jis_2004.py +++ b/Lib/encodings/euc_jis_2004.py @@ -2,10 +2,10 @@ # euc_jis_2004.py: Python Unicode Codec for EUC_JIS_2004 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: euc_jis_2004.py,v 1.1 2004/07/07 16:18:25 perky Exp $ # import _codecs_jp, codecs +import _multibytecodec as mbc codec = _codecs_jp.getcodec('euc_jis_2004') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='euc_jis_2004', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/euc_jisx0213.py b/Lib/encodings/euc_jisx0213.py index 30f173e..10d4b31 100644 --- a/Lib/encodings/euc_jisx0213.py +++ b/Lib/encodings/euc_jisx0213.py @@ -2,10 +2,10 @@ # euc_jisx0213.py: Python Unicode Codec for EUC_JISX0213 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: euc_jisx0213.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_jp, codecs +import _multibytecodec as mbc codec = _codecs_jp.getcodec('euc_jisx0213') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='euc_jisx0213', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/euc_jp.py b/Lib/encodings/euc_jp.py index a3947a3..4dc0b9b 100644 --- a/Lib/encodings/euc_jp.py +++ b/Lib/encodings/euc_jp.py @@ -2,10 +2,10 @@ # euc_jp.py: Python Unicode Codec for EUC_JP # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: euc_jp.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_jp, codecs +import _multibytecodec as mbc codec = _codecs_jp.getcodec('euc_jp') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='euc_jp', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/euc_kr.py b/Lib/encodings/euc_kr.py index bbebee8..30716f3 100644 --- a/Lib/encodings/euc_kr.py +++ b/Lib/encodings/euc_kr.py @@ -2,10 +2,10 @@ # euc_kr.py: Python Unicode Codec for EUC_KR # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: euc_kr.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_kr, codecs +import _multibytecodec as mbc codec = _codecs_kr.getcodec('euc_kr') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='euc_kr', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/gb18030.py b/Lib/encodings/gb18030.py index 7eca319..e685cf6 100644 --- a/Lib/encodings/gb18030.py +++ b/Lib/encodings/gb18030.py @@ -2,10 +2,10 @@ # gb18030.py: Python Unicode Codec for GB18030 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: gb18030.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_cn, codecs +import _multibytecodec as mbc codec = _codecs_cn.getcodec('gb18030') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='gb18030', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/gb2312.py b/Lib/encodings/gb2312.py index 5130efa..e99bf1d 100644 --- a/Lib/encodings/gb2312.py +++ b/Lib/encodings/gb2312.py @@ -2,10 +2,10 @@ # gb2312.py: Python Unicode Codec for GB2312 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: gb2312.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_cn, codecs +import _multibytecodec as mbc codec = _codecs_cn.getcodec('gb2312') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='gb2312', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/gbk.py b/Lib/encodings/gbk.py index 67854bc..09123ae 100644 --- a/Lib/encodings/gbk.py +++ b/Lib/encodings/gbk.py @@ -2,10 +2,10 @@ # gbk.py: Python Unicode Codec for GBK # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: gbk.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_cn, codecs +import _multibytecodec as mbc codec = _codecs_cn.getcodec('gbk') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='gbk', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/hz.py b/Lib/encodings/hz.py index 3940894..06f7d2f 100644 --- a/Lib/encodings/hz.py +++ b/Lib/encodings/hz.py @@ -2,10 +2,10 @@ # hz.py: Python Unicode Codec for HZ # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: hz.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_cn, codecs +import _multibytecodec as mbc codec = _codecs_cn.getcodec('hz') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='hz', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/iso2022_jp.py b/Lib/encodings/iso2022_jp.py index 109658b..fb04159 100644 --- a/Lib/encodings/iso2022_jp.py +++ b/Lib/encodings/iso2022_jp.py @@ -2,10 +2,10 @@ # iso2022_jp.py: Python Unicode Codec for ISO2022_JP # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: iso2022_jp.py,v 1.2 2004/06/28 18:16:03 perky Exp $ # import _codecs_iso2022, codecs +import _multibytecodec as mbc codec = _codecs_iso2022.getcodec('iso2022_jp') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='iso2022_jp', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/iso2022_jp_1.py b/Lib/encodings/iso2022_jp_1.py index 201bd28..fde51c2 100644 --- a/Lib/encodings/iso2022_jp_1.py +++ b/Lib/encodings/iso2022_jp_1.py @@ -2,10 +2,10 @@ # iso2022_jp_1.py: Python Unicode Codec for ISO2022_JP_1 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: iso2022_jp_1.py,v 1.2 2004/06/28 18:16:03 perky Exp $ # import _codecs_iso2022, codecs +import _multibytecodec as mbc codec = _codecs_iso2022.getcodec('iso2022_jp_1') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='iso2022_jp_1', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/iso2022_jp_2.py b/Lib/encodings/iso2022_jp_2.py index 7a61018..766ab46 100644 --- a/Lib/encodings/iso2022_jp_2.py +++ b/Lib/encodings/iso2022_jp_2.py @@ -2,10 +2,10 @@ # iso2022_jp_2.py: Python Unicode Codec for ISO2022_JP_2 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: iso2022_jp_2.py,v 1.2 2004/06/28 18:16:03 perky Exp $ # import _codecs_iso2022, codecs +import _multibytecodec as mbc codec = _codecs_iso2022.getcodec('iso2022_jp_2') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='iso2022_jp_2', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/iso2022_jp_2004.py b/Lib/encodings/iso2022_jp_2004.py index 2497124..236ab4e 100644 --- a/Lib/encodings/iso2022_jp_2004.py +++ b/Lib/encodings/iso2022_jp_2004.py @@ -2,10 +2,10 @@ # iso2022_jp_2004.py: Python Unicode Codec for ISO2022_JP_2004 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: iso2022_jp_2004.py,v 1.1 2004/07/07 16:18:25 perky Exp $ # import _codecs_iso2022, codecs +import _multibytecodec as mbc codec = _codecs_iso2022.getcodec('iso2022_jp_2004') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='iso2022_jp_2004', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/iso2022_jp_3.py b/Lib/encodings/iso2022_jp_3.py index 8b2ed00..e3cf950 100644 --- a/Lib/encodings/iso2022_jp_3.py +++ b/Lib/encodings/iso2022_jp_3.py @@ -2,10 +2,10 @@ # iso2022_jp_3.py: Python Unicode Codec for ISO2022_JP_3 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: iso2022_jp_3.py,v 1.2 2004/06/28 18:16:03 perky Exp $ # import _codecs_iso2022, codecs +import _multibytecodec as mbc codec = _codecs_iso2022.getcodec('iso2022_jp_3') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='iso2022_jp_3', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/iso2022_jp_ext.py b/Lib/encodings/iso2022_jp_ext.py index 97cb4e7..89d35b5 100644 --- a/Lib/encodings/iso2022_jp_ext.py +++ b/Lib/encodings/iso2022_jp_ext.py @@ -2,10 +2,10 @@ # iso2022_jp_ext.py: Python Unicode Codec for ISO2022_JP_EXT # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: iso2022_jp_ext.py,v 1.2 2004/06/28 18:16:03 perky Exp $ # import _codecs_iso2022, codecs +import _multibytecodec as mbc codec = _codecs_iso2022.getcodec('iso2022_jp_ext') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='iso2022_jp_ext', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/iso2022_kr.py b/Lib/encodings/iso2022_kr.py index f5549ca..41f7ce0 100644 --- a/Lib/encodings/iso2022_kr.py +++ b/Lib/encodings/iso2022_kr.py @@ -2,10 +2,10 @@ # iso2022_kr.py: Python Unicode Codec for ISO2022_KR # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: iso2022_kr.py,v 1.2 2004/06/28 18:16:03 perky Exp $ # import _codecs_iso2022, codecs +import _multibytecodec as mbc codec = _codecs_iso2022.getcodec('iso2022_kr') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='iso2022_kr', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/johab.py b/Lib/encodings/johab.py index b6a87d7..6a2c993 100644 --- a/Lib/encodings/johab.py +++ b/Lib/encodings/johab.py @@ -2,10 +2,10 @@ # johab.py: Python Unicode Codec for JOHAB # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: johab.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_kr, codecs +import _multibytecodec as mbc codec = _codecs_kr.getcodec('johab') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='johab', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/shift_jis.py b/Lib/encodings/shift_jis.py index ec5e517..b1f77fc 100644 --- a/Lib/encodings/shift_jis.py +++ b/Lib/encodings/shift_jis.py @@ -2,10 +2,10 @@ # shift_jis.py: Python Unicode Codec for SHIFT_JIS # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: shift_jis.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_jp, codecs +import _multibytecodec as mbc codec = _codecs_jp.getcodec('shift_jis') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='shift_jis', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/shift_jis_2004.py b/Lib/encodings/shift_jis_2004.py index 446cd7c..6078a52 100644 --- a/Lib/encodings/shift_jis_2004.py +++ b/Lib/encodings/shift_jis_2004.py @@ -2,10 +2,10 @@ # shift_jis_2004.py: Python Unicode Codec for SHIFT_JIS_2004 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: shift_jis_2004.py,v 1.1 2004/07/07 16:18:25 perky Exp $ # import _codecs_jp, codecs +import _multibytecodec as mbc codec = _codecs_jp.getcodec('shift_jis_2004') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='shift_jis_2004', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/encodings/shift_jisx0213.py b/Lib/encodings/shift_jisx0213.py index 495468b..5a0f24c 100644 --- a/Lib/encodings/shift_jisx0213.py +++ b/Lib/encodings/shift_jisx0213.py @@ -2,10 +2,10 @@ # shift_jisx0213.py: Python Unicode Codec for SHIFT_JISX0213 # # Written by Hye-Shik Chang <perky@FreeBSD.org> -# $CJKCodecs: shift_jisx0213.py,v 1.8 2004/06/28 18:16:03 perky Exp $ # import _codecs_jp, codecs +import _multibytecodec as mbc codec = _codecs_jp.getcodec('shift_jisx0213') @@ -13,22 +13,24 @@ class Codec(codecs.Codec): encode = codec.encode decode = codec.decode -class StreamReader(Codec, codecs.StreamReader): - def __init__(self, stream, errors='strict'): - codecs.StreamReader.__init__(self, stream, errors) - __codec = codec.StreamReader(stream, errors) - self.read = __codec.read - self.readline = __codec.readline - self.readlines = __codec.readlines - self.reset = __codec.reset - -class StreamWriter(Codec, codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - __codec = codec.StreamWriter(stream, errors) - self.write = __codec.write - self.writelines = __codec.writelines - self.reset = __codec.reset +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec def getregentry(): - return (codec.encode, codec.decode, StreamReader, StreamWriter) + return codecs.CodecInfo( + name='shift_jisx0213', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py index aef7931..8f9f6e9 100644 --- a/Lib/test/test_multibytecodec.py +++ b/Lib/test/test_multibytecodec.py @@ -9,11 +9,106 @@ from test import test_support from test import test_multibytecodec_support import unittest, StringIO, codecs +class Test_MultibyteCodec(unittest.TestCase): + + def test_nullcoding(self): + self.assertEqual(''.decode('gb18030'), u'') + self.assertEqual(unicode('', 'gb18030'), u'') + self.assertEqual(u''.encode('gb18030'), '') + + def test_str_decode(self): + self.assertEqual('abcd'.encode('gb18030'), 'abcd') + + +class Test_IncrementalEncoder(unittest.TestCase): + + def test_stateless(self): + # cp949 encoder isn't stateful at all. + encoder = codecs.getincrementalencoder('cp949')() + self.assertEqual(encoder.encode(u'\ud30c\uc774\uc36c \ub9c8\uc744'), + '\xc6\xc4\xc0\xcc\xbd\xe3 \xb8\xb6\xc0\xbb') + self.assertEqual(encoder.reset(), None) + self.assertEqual(encoder.encode(u'\u2606\u223c\u2606', True), + '\xa1\xd9\xa1\xad\xa1\xd9') + self.assertEqual(encoder.reset(), None) + self.assertEqual(encoder.encode(u'', True), '') + self.assertEqual(encoder.encode(u'', False), '') + self.assertEqual(encoder.reset(), None) + + def test_stateful(self): + # jisx0213 encoder is stateful for a few codepoints. eg) + # U+00E6 => A9DC + # U+00E6 U+0300 => ABC4 + # U+0300 => ABDC + + encoder = codecs.getincrementalencoder('jisx0213')() + self.assertEqual(encoder.encode(u'\u00e6\u0300'), '\xab\xc4') + self.assertEqual(encoder.encode(u'\u00e6'), '') + self.assertEqual(encoder.encode(u'\u0300'), '\xab\xc4') + self.assertEqual(encoder.encode(u'\u00e6', True), '\xa9\xdc') + + self.assertEqual(encoder.reset(), None) + self.assertEqual(encoder.encode(u'\u0300'), '\xab\xdc') + + self.assertEqual(encoder.encode(u'\u00e6'), '') + self.assertEqual(encoder.encode('', True), '\xa9\xdc') + self.assertEqual(encoder.encode('', True), '') + + def test_stateful_keep_buffer(self): + encoder = codecs.getincrementalencoder('jisx0213')() + self.assertEqual(encoder.encode(u'\u00e6'), '') + self.assertRaises(UnicodeEncodeError, encoder.encode, u'\u0123') + self.assertEqual(encoder.encode(u'\u0300\u00e6'), '\xab\xc4') + self.assertRaises(UnicodeEncodeError, encoder.encode, u'\u0123') + self.assertEqual(encoder.reset(), None) + self.assertEqual(encoder.encode(u'\u0300'), '\xab\xdc') + self.assertEqual(encoder.encode(u'\u00e6'), '') + self.assertRaises(UnicodeEncodeError, encoder.encode, u'\u0123') + self.assertEqual(encoder.encode(u'', True), '\xa9\xdc') + + +class Test_IncrementalDecoder(unittest.TestCase): + + def test_dbcs(self): + # cp949 decoder is simple with only 1 or 2 bytes sequences. + decoder = codecs.getincrementaldecoder('cp949')() + self.assertEqual(decoder.decode('\xc6\xc4\xc0\xcc\xbd'), + u'\ud30c\uc774') + self.assertEqual(decoder.decode('\xe3 \xb8\xb6\xc0\xbb'), + u'\uc36c \ub9c8\uc744') + self.assertEqual(decoder.decode(''), u'') + + def test_dbcs_keep_buffer(self): + decoder = codecs.getincrementaldecoder('cp949')() + self.assertEqual(decoder.decode('\xc6\xc4\xc0'), u'\ud30c') + self.assertRaises(UnicodeDecodeError, decoder.decode, '', True) + self.assertEqual(decoder.decode('\xcc'), u'\uc774') + + self.assertEqual(decoder.decode('\xc6\xc4\xc0'), u'\ud30c') + self.assertRaises(UnicodeDecodeError, decoder.decode, '\xcc\xbd', True) + self.assertEqual(decoder.decode('\xcc'), u'\uc774') + + def test_iso2022(self): + decoder = codecs.getincrementaldecoder('iso2022-jp')() + ESC = '\x1b' + self.assertEqual(decoder.decode(ESC + '('), u'') + self.assertEqual(decoder.decode('B', True), u'') + self.assertEqual(decoder.decode(ESC + '$'), u'') + self.assertEqual(decoder.decode('B@$'), u'\u4e16') + self.assertEqual(decoder.decode('@$@'), u'\u4e16') + self.assertEqual(decoder.decode('$', True), u'\u4e16') + self.assertEqual(decoder.reset(), None) + self.assertEqual(decoder.decode('@$'), u'@$') + self.assertEqual(decoder.decode(ESC + '$'), u'') + self.assertRaises(UnicodeDecodeError, decoder.decode, '', True) + self.assertEqual(decoder.decode('B@$'), u'\u4e16') + + class Test_StreamWriter(unittest.TestCase): if len(u'\U00012345') == 2: # UCS2 def test_gb18030(self): s= StringIO.StringIO() - c = codecs.lookup('gb18030')[3](s) + c = codecs.getwriter('gb18030')(s) c.write(u'123') self.assertEqual(s.getvalue(), '123') c.write(u'\U00012345') @@ -30,15 +125,16 @@ class Test_StreamWriter(unittest.TestCase): self.assertEqual(s.getvalue(), '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851') - # standard utf-8 codecs has broken StreamReader - if test_multibytecodec_support.__cjkcodecs__: - def test_utf_8(self): - s= StringIO.StringIO() - c = codecs.lookup('utf-8')[3](s) - c.write(u'123') - self.assertEqual(s.getvalue(), '123') - c.write(u'\U00012345') - self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85') + def test_utf_8(self): + s= StringIO.StringIO() + c = codecs.getwriter('utf-8')(s) + c.write(u'123') + self.assertEqual(s.getvalue(), '123') + c.write(u'\U00012345') + self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85') + + # Python utf-8 codec can't buffer surrogate pairs yet. + if 0: c.write(u'\U00012345'[0]) self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85') c.write(u'\U00012345'[1] + u'\U00012345' + u'\uac00\u00ac') @@ -61,14 +157,6 @@ class Test_StreamWriter(unittest.TestCase): else: # UCS4 pass - def test_nullcoding(self): - self.assertEqual(''.decode('gb18030'), u'') - self.assertEqual(unicode('', 'gb18030'), u'') - self.assertEqual(u''.encode('gb18030'), '') - - def test_str_decode(self): - self.assertEqual('abcd'.encode('gb18030'), 'abcd') - def test_streamwriter_strwrite(self): s = StringIO.StringIO() wr = codecs.getwriter('gb18030')(s) @@ -83,6 +171,9 @@ class Test_ISO2022(unittest.TestCase): def test_main(): suite = unittest.TestSuite() + suite.addTest(unittest.makeSuite(Test_MultibyteCodec)) + suite.addTest(unittest.makeSuite(Test_IncrementalEncoder)) + suite.addTest(unittest.makeSuite(Test_IncrementalDecoder)) suite.addTest(unittest.makeSuite(Test_StreamWriter)) suite.addTest(unittest.makeSuite(Test_ISO2022)) test_support.run_suite(suite) diff --git a/Lib/test/test_multibytecodec_support.py b/Lib/test/test_multibytecodec_support.py index 45a63e7..563a3ea 100644 --- a/Lib/test/test_multibytecodec_support.py +++ b/Lib/test/test_multibytecodec_support.py @@ -3,15 +3,12 @@ # test_multibytecodec_support.py # Common Unittest Routines for CJK codecs # -# $CJKCodecs: test_multibytecodec_support.py,v 1.6 2004/06/19 06:09:55 perky Exp $ import sys, codecs, os.path import unittest from test import test_support from StringIO import StringIO -__cjkcodecs__ = 0 # define this as 0 for python - class TestBase: encoding = '' # codec name codec = None # codec tuple (with 4 elements) @@ -21,11 +18,17 @@ class TestBase: roundtriptest = 1 # set if roundtrip is possible with unicode has_iso10646 = 0 # set if this encoding contains whole iso10646 map xmlcharnametest = None # string to test xmlcharrefreplace + unmappedunicode = u'\udeee' # a unicode codepoint that is not mapped. def setUp(self): if self.codec is None: self.codec = codecs.lookup(self.encoding) - self.encode, self.decode, self.reader, self.writer = self.codec + self.encode = self.codec.encode + self.decode = self.codec.decode + self.reader = self.codec.streamreader + self.writer = self.codec.streamwriter + self.incrementalencoder = self.codec.incrementalencoder + self.incrementaldecoder = self.codec.incrementaldecoder def test_chunkcoding(self): for native, utf8 in zip(*[StringIO(f).readlines() @@ -47,51 +50,142 @@ class TestBase: else: self.assertRaises(UnicodeError, func, source, scheme) - if sys.hexversion >= 0x02030000: - def test_xmlcharrefreplace(self): - if self.has_iso10646: - return + def test_xmlcharrefreplace(self): + if self.has_iso10646: + return + + s = u"\u0b13\u0b23\u0b60 nd eggs" + self.assertEqual( + self.encode(s, "xmlcharrefreplace")[0], + "ଓଣୠ nd eggs" + ) + + def test_customreplace(self): + if self.has_iso10646: + return + + from htmlentitydefs import codepoint2name + + def xmlcharnamereplace(exc): + if not isinstance(exc, UnicodeEncodeError): + raise TypeError("don't know how to handle %r" % exc) + l = [] + for c in exc.object[exc.start:exc.end]: + if ord(c) in codepoint2name: + l.append(u"&%s;" % codepoint2name[ord(c)]) + else: + l.append(u"&#%d;" % ord(c)) + return (u"".join(l), exc.end) + + codecs.register_error("test.xmlcharnamereplace", xmlcharnamereplace) - s = u"\u0b13\u0b23\u0b60 nd eggs" - self.assertEqual( - self.encode(s, "xmlcharrefreplace")[0], - "ଓଣୠ nd eggs" - ) + if self.xmlcharnametest: + sin, sout = self.xmlcharnametest + else: + sin = u"\xab\u211c\xbb = \u2329\u1234\u232a" + sout = "«ℜ» = ⟨ሴ⟩" + self.assertEqual(self.encode(sin, + "test.xmlcharnamereplace")[0], sout) + + def test_callback_wrong_objects(self): + def myreplace(exc): + return (ret, exc.end) + codecs.register_error("test.cjktest", myreplace) + + for ret in ([1, 2, 3], [], None, object(), 'string', ''): + self.assertRaises(TypeError, self.encode, self.unmappedunicode, + 'test.cjktest') + + def test_callback_None_index(self): + def myreplace(exc): + return (u'x', None) + codecs.register_error("test.cjktest", myreplace) + self.assertRaises(TypeError, self.encode, self.unmappedunicode, + 'test.cjktest') + + def test_callback_backward_index(self): + def myreplace(exc): + if myreplace.limit > 0: + myreplace.limit -= 1 + return (u'REPLACED', 0) + else: + return (u'TERMINAL', exc.end) + myreplace.limit = 3 + codecs.register_error("test.cjktest", myreplace) + self.assertEqual(self.encode(u'abcd' + self.unmappedunicode + u'efgh', + 'test.cjktest'), + ('abcdREPLACEDabcdREPLACEDabcdREPLACEDabcdTERMINALefgh', 9)) + + def test_callback_forward_index(self): + def myreplace(exc): + return (u'REPLACED', exc.end + 2) + codecs.register_error("test.cjktest", myreplace) + self.assertEqual(self.encode(u'abcd' + self.unmappedunicode + u'efgh', + 'test.cjktest'), ('abcdREPLACEDgh', 9)) + + def test_callback_index_outofbound(self): + def myreplace(exc): + return (u'TERM', 100) + codecs.register_error("test.cjktest", myreplace) + self.assertRaises(IndexError, self.encode, self.unmappedunicode, + 'test.cjktest') + + def test_incrementalencoder(self): + UTF8Reader = codecs.getreader('utf-8') + for sizehint in [None] + range(1, 33) + \ + [64, 128, 256, 512, 1024]: + istream = UTF8Reader(StringIO(self.tstring[1])) + ostream = StringIO() + encoder = self.incrementalencoder() + while 1: + if sizehint is not None: + data = istream.read(sizehint) + else: + data = istream.read() - def test_customreplace(self): - if self.has_iso10646: - return + if not data: + break + e = encoder.encode(data) + ostream.write(e) - import htmlentitydefs + self.assertEqual(ostream.getvalue(), self.tstring[0]) - names = {} - for (key, value) in htmlentitydefs.entitydefs.items(): - if len(value)==1: - names[value.decode('latin-1')] = self.decode(key)[0] + def test_incrementaldecoder(self): + UTF8Writer = codecs.getwriter('utf-8') + for sizehint in [None, -1] + range(1, 33) + \ + [64, 128, 256, 512, 1024]: + istream = StringIO(self.tstring[0]) + ostream = UTF8Writer(StringIO()) + decoder = self.incrementaldecoder() + while 1: + data = istream.read(sizehint) + if not data: + break else: - names[unichr(int(value[2:-1]))] = self.decode(key)[0] - - def xmlcharnamereplace(exc): - if not isinstance(exc, UnicodeEncodeError): - raise TypeError("don't know how to handle %r" % exc) - l = [] - for c in exc.object[exc.start:exc.end]: - try: - l.append(u"&%s;" % names[c]) - except KeyError: - l.append(u"&#%d;" % ord(c)) - return (u"".join(l), exc.end) - - codecs.register_error( - "test.xmlcharnamereplace", xmlcharnamereplace) - - if self.xmlcharnametest: - sin, sout = self.xmlcharnametest - else: - sin = u"\xab\u211c\xbb = \u2329\u1234\u232a" - sout = "«ℜ» = ⟨ሴ⟩" - self.assertEqual(self.encode(sin, - "test.xmlcharnamereplace")[0], sout) + u = decoder.decode(data) + ostream.write(u) + + self.assertEqual(ostream.getvalue(), self.tstring[1]) + + def test_incrementalencoder_error_callback(self): + inv = self.unmappedunicode + + e = self.incrementalencoder() + self.assertRaises(UnicodeEncodeError, e.encode, inv, True) + + e.errors = 'ignore' + self.assertEqual(e.encode(inv, True), '') + + e.reset() + def tempreplace(exc): + return (u'called', exc.end) + codecs.register_error('test.incremental_error_callback', tempreplace) + e.errors = 'test.incremental_error_callback' + self.assertEqual(e.encode(inv, True), 'called') + + # again + e.errors = 'ignore' + self.assertEqual(e.encode(inv, True), '') def test_streamreader(self): UTF8Writer = codecs.getwriter('utf-8') @@ -113,11 +207,7 @@ class TestBase: self.assertEqual(ostream.getvalue(), self.tstring[1]) def test_streamwriter(self): - if __cjkcodecs__: - readfuncs = ('read', 'readline', 'readlines') - else: - # standard utf8 codec has broken readline and readlines. - readfuncs = ('read',) + readfuncs = ('read', 'readline', 'readlines') UTF8Reader = codecs.getreader('utf-8') for name in readfuncs: for sizehint in [None] + range(1, 33) + \ @@ -211,10 +301,5 @@ class TestBase_Mapping(unittest.TestCase): self.assertEqual(unicode(csetch, self.encoding), unich) def load_teststring(encoding): - if __cjkcodecs__: - etxt = open(os.path.join('sampletexts', encoding) + '.txt').read() - utxt = open(os.path.join('sampletexts', encoding) + '.utf8').read() - return (etxt, utxt) - else: - from test import cjkencodings_test - return cjkencodings_test.teststring[encoding] + from test import cjkencodings_test + return cjkencodings_test.teststring[encoding] diff --git a/Modules/cjkcodecs/_codecs_cn.c b/Modules/cjkcodecs/_codecs_cn.c index fd048d9..fb51297 100644 --- a/Modules/cjkcodecs/_codecs_cn.c +++ b/Modules/cjkcodecs/_codecs_cn.c @@ -217,11 +217,8 @@ ENCODER(gb18030) break; } - if (utrrange->first == 0) { - PyErr_SetString(PyExc_RuntimeError, - "unicode mapping invalid"); + if (utrrange->first == 0) return 1; - } continue; } diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c index f51b6f2..26d5c94 100644 --- a/Modules/cjkcodecs/multibytecodec.c +++ b/Modules/cjkcodecs/multibytecodec.c @@ -6,6 +6,7 @@ #define PY_SSIZE_T_CLEAN #include "Python.h" +#include "structmember.h" #include "multibytecodec.h" typedef struct { @@ -38,22 +39,14 @@ that encoding errors raise a UnicodeDecodeError. Other possible values\n\ are 'ignore' and 'replace' as well as any other name registerd with\n\ codecs.register_error that is able to handle UnicodeDecodeErrors."); -PyDoc_STRVAR(MultibyteCodec_StreamReader__doc__, -"I.StreamReader(stream[, errors]) -> StreamReader instance"); - -PyDoc_STRVAR(MultibyteCodec_StreamWriter__doc__, -"I.StreamWriter(stream[, errors]) -> StreamWriter instance"); - static char *codeckwarglist[] = {"input", "errors", NULL}; +static char *incnewkwarglist[] = {"errors", NULL}; +static char *incrementalkwarglist[] = {"input", "final", NULL}; static char *streamkwarglist[] = {"stream", "errors", NULL}; static PyObject *multibytecodec_encode(MultibyteCodec *, MultibyteCodec_State *, const Py_UNICODE **, Py_ssize_t, PyObject *, int); -static PyObject *mbstreamreader_create(MultibyteCodec *, - PyObject *, const char *); -static PyObject *mbstreamwriter_create(MultibyteCodec *, - PyObject *, const char *); #define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */ @@ -83,7 +76,7 @@ make_tuple(PyObject *object, Py_ssize_t len) } static PyObject * -get_errorcallback(const char *errors) +internal_error_callback(const char *errors) { if (errors == NULL || strcmp(errors, "strict") == 0) return ERROR_STRICT; @@ -91,17 +84,88 @@ get_errorcallback(const char *errors) return ERROR_IGNORE; else if (strcmp(errors, "replace") == 0) return ERROR_REPLACE; + else + return PyString_FromString(errors); +} + +static PyObject * +call_error_callback(PyObject *errors, PyObject *exc) +{ + PyObject *args, *cb, *r; + + assert(PyString_Check(errors)); + cb = PyCodec_LookupError(PyString_AS_STRING(errors)); + if (cb == NULL) + return NULL; + + args = PyTuple_New(1); + if (args == NULL) { + Py_DECREF(cb); + return NULL; + } + + PyTuple_SET_ITEM(args, 0, exc); + Py_INCREF(exc); + + r = PyObject_CallObject(cb, args); + Py_DECREF(args); + Py_DECREF(cb); + return r; +} + +static PyObject * +codecctx_errors_get(MultibyteStatefulCodecContext *self) +{ + const char *errors; + + if (self->errors == ERROR_STRICT) + errors = "strict"; + else if (self->errors == ERROR_IGNORE) + errors = "ignore"; + else if (self->errors == ERROR_REPLACE) + errors = "replace"; else { - return PyCodec_LookupError(errors); + Py_INCREF(self->errors); + return self->errors; } + + return PyString_FromString(errors); +} + +static int +codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value, + void *closure) +{ + PyObject *cb; + + if (!PyString_Check(value)) { + PyErr_SetString(PyExc_TypeError, "errors must be a string"); + return -1; + } + + cb = internal_error_callback(PyString_AS_STRING(value)); + if (cb == NULL) + return -1; + + ERROR_DECREF(self->errors); + self->errors = cb; + return 0; } +/* This getset handlers list is used by all the stateful codec objects */ +static PyGetSetDef codecctx_getsets[] = { + {"errors", (getter)codecctx_errors_get, + (setter)codecctx_errors_set, + PyDoc_STR("how to treat errors")}, + {NULL,} +}; + static int expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize) { Py_ssize_t orgpos, orgsize; - orgpos = (Py_ssize_t)((char*)buf->outbuf - + orgpos = (Py_ssize_t)((char *)buf->outbuf - PyString_AS_STRING(buf->outobj)); orgsize = PyString_GET_SIZE(buf->outobj); if (_PyString_Resize(&buf->outobj, orgsize + ( @@ -125,8 +189,7 @@ expand_decodebuffer(MultibyteDecodeBuffer *buf, Py_ssize_t esize) { Py_ssize_t orgpos, orgsize; - orgpos = (Py_ssize_t)(buf->outbuf - - PyUnicode_AS_UNICODE(buf->outobj)); + orgpos = (Py_ssize_t)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj)); orgsize = PyUnicode_GET_SIZE(buf->outobj); if (PyUnicode_Resize(&buf->outobj, orgsize + ( esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1) @@ -144,16 +207,21 @@ expand_decodebuffer(MultibyteDecodeBuffer *buf, Py_ssize_t esize) goto errorexit; \ } + +/** + * MultibyteCodec object + */ + static int multibytecodec_encerror(MultibyteCodec *codec, MultibyteCodec_State *state, MultibyteEncodeBuffer *buf, PyObject *errors, Py_ssize_t e) { - PyObject *retobj = NULL, *retstr = NULL, *argsobj, *tobj; + PyObject *retobj = NULL, *retstr = NULL, *tobj; Py_ssize_t retstrsize, newpos; - const char *reason; Py_ssize_t esize, start, end; + const char *reason; if (e > 0) { reason = "illegal multibyte sequence"; @@ -166,7 +234,7 @@ multibytecodec_encerror(MultibyteCodec *codec, return 0; /* retry it */ case MBERR_TOOFEW: reason = "incomplete multibyte sequence"; - esize = (size_t)(buf->inbuf_end - buf->inbuf); + esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); break; case MBERR_INTERNAL: PyErr_SetString(PyExc_RuntimeError, @@ -230,21 +298,14 @@ multibytecodec_encerror(MultibyteCodec *codec, goto errorexit; } - argsobj = PyTuple_New(1); - if (argsobj == NULL) - goto errorexit; - - PyTuple_SET_ITEM(argsobj, 0, buf->excobj); - Py_INCREF(buf->excobj); - retobj = PyObject_CallObject(errors, argsobj); - Py_DECREF(argsobj); + retobj = call_error_callback(errors, buf->excobj); if (retobj == NULL) goto errorexit; if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || !PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) || !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) { - PyErr_SetString(PyExc_ValueError, + PyErr_SetString(PyExc_TypeError, "encoding error handler must return " "(unicode, int) tuple"); goto errorexit; @@ -293,7 +354,7 @@ multibytecodec_decerror(MultibyteCodec *codec, MultibyteDecodeBuffer *buf, PyObject *errors, Py_ssize_t e) { - PyObject *argsobj, *retobj = NULL, *retuni = NULL; + PyObject *retobj = NULL, *retuni = NULL; Py_ssize_t retunisize, newpos; const char *reason; Py_ssize_t esize, start, end; @@ -309,7 +370,7 @@ multibytecodec_decerror(MultibyteCodec *codec, return 0; /* retry it */ case MBERR_TOOFEW: reason = "incomplete multibyte sequence"; - esize = (size_t)(buf->inbuf_end - buf->inbuf); + esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); break; case MBERR_INTERNAL: PyErr_SetString(PyExc_RuntimeError, @@ -354,21 +415,14 @@ multibytecodec_decerror(MultibyteCodec *codec, goto errorexit; } - argsobj = PyTuple_New(1); - if (argsobj == NULL) - goto errorexit; - - PyTuple_SET_ITEM(argsobj, 0, buf->excobj); - Py_INCREF(buf->excobj); - retobj = PyObject_CallObject(errors, argsobj); - Py_DECREF(argsobj); + retobj = call_error_callback(errors, buf->excobj); if (retobj == NULL) goto errorexit; if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) || !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) { - PyErr_SetString(PyExc_ValueError, + PyErr_SetString(PyExc_TypeError, "decoding error handler must return " "(unicode, int) tuple"); goto errorexit; @@ -453,7 +507,7 @@ multibytecodec_encode(MultibyteCodec *codec, goto errorexit; } - finalsize = (Py_ssize_t)((char*)buf.outbuf - + finalsize = (Py_ssize_t)((char *)buf.outbuf - PyString_AS_STRING(buf.outobj)); if (finalsize != PyString_GET_SIZE(buf.outobj)) @@ -500,7 +554,7 @@ MultibyteCodec_Encode(MultibyteCodecObject *self, data = PyUnicode_AS_UNICODE(arg); datalen = PyUnicode_GET_SIZE(arg); - errorcb = get_errorcallback(errors); + errorcb = internal_error_callback(errors); if (errorcb == NULL) { Py_XDECREF(ucvt); return NULL; @@ -515,16 +569,12 @@ MultibyteCodec_Encode(MultibyteCodecObject *self, if (r == NULL) goto errorexit; - if (errorcb > ERROR_MAX) { - Py_DECREF(errorcb); - } + ERROR_DECREF(errorcb); Py_XDECREF(ucvt); return make_tuple(r, datalen); errorexit: - if (errorcb > ERROR_MAX) { - Py_DECREF(errorcb); - } + ERROR_DECREF(errorcb); Py_XDECREF(ucvt); return NULL; } @@ -543,18 +593,16 @@ MultibyteCodec_Decode(MultibyteCodecObject *self, codeckwarglist, &data, &datalen, &errors)) return NULL; - errorcb = get_errorcallback(errors); + errorcb = internal_error_callback(errors); if (errorcb == NULL) return NULL; if (datalen == 0) { - if (errorcb > ERROR_MAX) { - Py_DECREF(errorcb); - } + ERROR_DECREF(errorcb); return make_tuple(PyUnicode_FromUnicode(NULL, 0), 0); } - buf.outobj = buf.excobj = NULL; + buf.excobj = NULL; buf.inbuf = buf.inbuf_top = (unsigned char *)data; buf.inbuf_end = buf.inbuf_top + datalen; buf.outobj = PyUnicode_FromUnicode(NULL, datalen); @@ -590,49 +638,17 @@ MultibyteCodec_Decode(MultibyteCodecObject *self, goto errorexit; Py_XDECREF(buf.excobj); - if (errorcb > ERROR_MAX) { - Py_DECREF(errorcb); - } + ERROR_DECREF(errorcb); return make_tuple(buf.outobj, datalen); errorexit: - if (errorcb > ERROR_MAX) { - Py_DECREF(errorcb); - } + ERROR_DECREF(errorcb); Py_XDECREF(buf.excobj); Py_XDECREF(buf.outobj); return NULL; } -static PyObject * -MultibyteCodec_StreamReader(MultibyteCodecObject *self, - PyObject *args, PyObject *kwargs) -{ - PyObject *stream; - char *errors = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s:StreamReader", - streamkwarglist, &stream, &errors)) - return NULL; - - return mbstreamreader_create(self->codec, stream, errors); -} - -static PyObject * -MultibyteCodec_StreamWriter(MultibyteCodecObject *self, - PyObject *args, PyObject *kwargs) -{ - PyObject *stream; - char *errors = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s:StreamWriter", - streamkwarglist, &stream, &errors)) - return NULL; - - return mbstreamwriter_create(self->codec, stream, errors); -} - static struct PyMethodDef multibytecodec_methods[] = { {"encode", (PyCFunction)MultibyteCodec_Encode, METH_VARARGS | METH_KEYWORDS, @@ -640,12 +656,6 @@ static struct PyMethodDef multibytecodec_methods[] = { {"decode", (PyCFunction)MultibyteCodec_Decode, METH_VARARGS | METH_KEYWORDS, MultibyteCodec_Decode__doc__}, - {"StreamReader",(PyCFunction)MultibyteCodec_StreamReader, - METH_VARARGS | METH_KEYWORDS, - MultibyteCodec_StreamReader__doc__}, - {"StreamWriter",(PyCFunction)MultibyteCodec_StreamWriter, - METH_VARARGS | METH_KEYWORDS, - MultibyteCodec_StreamWriter__doc__}, {NULL, NULL}, }; @@ -655,8 +665,6 @@ multibytecodec_dealloc(MultibyteCodecObject *self) PyObject_Del(self); } - - static PyTypeObject MultibyteCodec_Type = { PyObject_HEAD_INIT(NULL) 0, /* ob_size */ @@ -690,13 +698,492 @@ static PyTypeObject MultibyteCodec_Type = { multibytecodec_methods, /* tp_methods */ }; + +/** + * Utility functions for stateful codec mechanism + */ + +#define STATEFUL_DCTX(o) ((MultibyteStatefulDecoderContext *)(o)) +#define STATEFUL_ECTX(o) ((MultibyteStatefulEncoderContext *)(o)) + +static PyObject * +encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx, + PyObject *unistr, int final) +{ + PyObject *ucvt, *r = NULL; + Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL; + Py_ssize_t datalen, origpending; + + if (PyUnicode_Check(unistr)) + ucvt = NULL; + else { + unistr = ucvt = PyObject_Unicode(unistr); + if (unistr == NULL) + return NULL; + else if (!PyUnicode_Check(unistr)) { + PyErr_SetString(PyExc_TypeError, + "couldn't convert the object to unicode."); + Py_DECREF(ucvt); + return NULL; + } + } + + datalen = PyUnicode_GET_SIZE(unistr); + origpending = ctx->pendingsize; + + if (ctx->pendingsize > 0) { + inbuf_tmp = PyMem_New(Py_UNICODE, datalen + ctx->pendingsize); + if (inbuf_tmp == NULL) + goto errorexit; + memcpy(inbuf_tmp, ctx->pending, + Py_UNICODE_SIZE * ctx->pendingsize); + memcpy(inbuf_tmp + ctx->pendingsize, + PyUnicode_AS_UNICODE(unistr), + Py_UNICODE_SIZE * datalen); + datalen += ctx->pendingsize; + ctx->pendingsize = 0; + inbuf = inbuf_tmp; + } + else + inbuf = (Py_UNICODE *)PyUnicode_AS_UNICODE(unistr); + + inbuf_end = inbuf + datalen; + + r = multibytecodec_encode(ctx->codec, &ctx->state, + (const Py_UNICODE **)&inbuf, + datalen, ctx->errors, final ? MBENC_FLUSH : 0); + if (r == NULL) { + /* recover the original pending buffer */ + memcpy(ctx->pending, inbuf_tmp, Py_UNICODE_SIZE * origpending); + ctx->pendingsize = origpending; + goto errorexit; + } + + if (inbuf < inbuf_end) { + ctx->pendingsize = (Py_ssize_t)(inbuf_end - inbuf); + if (ctx->pendingsize > MAXENCPENDING) { + /* normal codecs can't reach here */ + ctx->pendingsize = 0; + PyErr_SetString(PyExc_UnicodeError, + "pending buffer overflow"); + goto errorexit; + } + memcpy(ctx->pending, inbuf, + ctx->pendingsize * Py_UNICODE_SIZE); + } + + if (inbuf_tmp != NULL) + PyMem_Del(inbuf_tmp); + Py_XDECREF(ucvt); + return r; + +errorexit: + if (inbuf_tmp != NULL) + PyMem_Del(inbuf_tmp); + Py_XDECREF(r); + Py_XDECREF(ucvt); + return NULL; +} + +static int +decoder_append_pending(MultibyteStatefulDecoderContext *ctx, + MultibyteDecodeBuffer *buf) +{ + Py_ssize_t npendings; + + npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); + if (npendings + ctx->pendingsize > MAXDECPENDING) { + PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow"); + return -1; + } + memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings); + ctx->pendingsize += npendings; + return 0; +} + +static int +decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data, + Py_ssize_t size) +{ + buf->inbuf = buf->inbuf_top = (const unsigned char *)data; + buf->inbuf_end = buf->inbuf_top + size; + if (buf->outobj == NULL) { /* only if outobj is not allocated yet */ + buf->outobj = PyUnicode_FromUnicode(NULL, size); + if (buf->outobj == NULL) + return -1; + buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj); + buf->outbuf_end = buf->outbuf + + PyUnicode_GET_SIZE(buf->outobj); + } + + return 0; +} + +static int +decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx, + MultibyteDecodeBuffer *buf) +{ + while (buf->inbuf < buf->inbuf_end) { + Py_ssize_t inleft, outleft; + int r; + + inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); + outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf); + + r = ctx->codec->decode(&ctx->state, ctx->codec->config, + &buf->inbuf, inleft, &buf->outbuf, outleft); + if (r == 0 || r == MBERR_TOOFEW) + break; + else if (multibytecodec_decerror(ctx->codec, &ctx->state, + buf, ctx->errors, r)) + return -1; + } + return 0; +} + + +/** + * MultibyteIncrementalEncoder object + */ + +static PyObject * +mbiencoder_encode(MultibyteIncrementalEncoderObject *self, + PyObject *args, PyObject *kwargs) +{ + PyObject *data; + int final = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:encode", + incrementalkwarglist, &data, &final)) + return NULL; + + return encoder_encode_stateful(STATEFUL_ECTX(self), data, final); +} + +static PyObject * +mbiencoder_reset(MultibyteIncrementalEncoderObject *self) +{ + if (self->codec->decreset != NULL && + self->codec->decreset(&self->state, self->codec->config) != 0) + return NULL; + self->pendingsize = 0; + + Py_RETURN_NONE; +} + +static struct PyMethodDef mbiencoder_methods[] = { + {"encode", (PyCFunction)mbiencoder_encode, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"reset", (PyCFunction)mbiencoder_reset, + METH_NOARGS, NULL}, + {NULL, NULL}, +}; + +static PyObject * +mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + MultibyteIncrementalEncoderObject *self; + PyObject *codec; + char *errors = NULL; + + codec = PyObject_GetAttrString((PyObject *)type, "codec"); + if (codec == NULL) + return NULL; + if (!MultibyteCodec_Check(codec)) { + PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); + return NULL; + } + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder", + incnewkwarglist, &errors)) + return NULL; + + self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0); + if (self == NULL) + return NULL; + + self->codec = ((MultibyteCodecObject *)codec)->codec; + self->pendingsize = 0; + self->errors = internal_error_callback(errors); + if (self->errors == NULL) + goto errorexit; + if (self->codec->encinit != NULL && + self->codec->encinit(&self->state, self->codec->config) != 0) + goto errorexit; + + return (PyObject *)self; + +errorexit: + Py_XDECREF(self); + return NULL; +} + +static int +mbiencoder_traverse(MultibyteIncrementalEncoderObject *self, + visitproc visit, void *arg) +{ + if (ERROR_ISCUSTOM(self->errors)) + Py_VISIT(self->errors); + return 0; +} + +static void +mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self) +{ + PyObject_GC_UnTrack(self); + ERROR_DECREF(self->errors); + self->ob_type->tp_free(self); +} + +static PyTypeObject MultibyteIncrementalEncoder_Type = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "MultibyteIncrementalEncoder", /* tp_name */ + sizeof(MultibyteIncrementalEncoderObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)mbiencoder_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC + | Py_TPFLAGS_BASETYPE, /* tp_flags */ + 0, /* tp_doc */ + (traverseproc)mbiencoder_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iterext */ + mbiencoder_methods, /* tp_methods */ + 0, /* tp_members */ + codecctx_getsets, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + mbiencoder_new, /* tp_new */ +}; + + +/** + * MultibyteIncrementalDecoder object + */ + +static PyObject * +mbidecoder_decode(MultibyteIncrementalDecoderObject *self, + PyObject *args, PyObject *kwargs) +{ + MultibyteDecodeBuffer buf; + char *data, *wdata; + Py_ssize_t wsize, finalsize = 0, size, origpending; + int final = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "t#|i:decode", + incrementalkwarglist, &data, &size, &final)) + return NULL; + + buf.outobj = buf.excobj = NULL; + origpending = self->pendingsize; + + if (self->pendingsize == 0) { + wsize = size; + wdata = data; + } + else { + wsize = size + self->pendingsize; + wdata = PyMem_Malloc(wsize); + if (wdata == NULL) + goto errorexit; + memcpy(wdata, self->pending, self->pendingsize); + memcpy(wdata + self->pendingsize, data, size); + self->pendingsize = 0; + } + + if (decoder_prepare_buffer(&buf, wdata, wsize) != 0) + goto errorexit; + + if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf)) + goto errorexit; + + if (final && buf.inbuf < buf.inbuf_end) { + if (multibytecodec_decerror(self->codec, &self->state, + &buf, self->errors, MBERR_TOOFEW)) { + /* recover the original pending buffer */ + memcpy(self->pending, wdata, origpending); + self->pendingsize = origpending; + goto errorexit; + } + } + + if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */ + if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0) + goto errorexit; + } + + finalsize = (Py_ssize_t)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj)); + if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) + if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) + goto errorexit; + + if (wdata != data) + PyMem_Del(wdata); + Py_XDECREF(buf.excobj); + return buf.outobj; + +errorexit: + if (wdata != NULL && wdata != data) + PyMem_Del(wdata); + Py_XDECREF(buf.excobj); + Py_XDECREF(buf.outobj); + return NULL; +} + +static PyObject * +mbidecoder_reset(MultibyteIncrementalDecoderObject *self) +{ + if (self->codec->decreset != NULL && + self->codec->decreset(&self->state, self->codec->config) != 0) + return NULL; + self->pendingsize = 0; + + Py_RETURN_NONE; +} + +static struct PyMethodDef mbidecoder_methods[] = { + {"decode", (PyCFunction)mbidecoder_decode, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"reset", (PyCFunction)mbidecoder_reset, + METH_NOARGS, NULL}, + {NULL, NULL}, +}; + +static PyObject * +mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + MultibyteIncrementalDecoderObject *self; + PyObject *codec; + char *errors = NULL; + + codec = PyObject_GetAttrString((PyObject *)type, "codec"); + if (codec == NULL) + return NULL; + if (!MultibyteCodec_Check(codec)) { + PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); + return NULL; + } + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder", + incnewkwarglist, &errors)) + return NULL; + + self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0); + if (self == NULL) + return NULL; + + self->codec = ((MultibyteCodecObject *)codec)->codec; + self->pendingsize = 0; + self->errors = internal_error_callback(errors); + if (self->errors == NULL) + goto errorexit; + if (self->codec->decinit != NULL && + self->codec->decinit(&self->state, self->codec->config) != 0) + goto errorexit; + + return (PyObject *)self; + +errorexit: + Py_XDECREF(self); + return NULL; +} + +static int +mbidecoder_traverse(MultibyteIncrementalDecoderObject *self, + visitproc visit, void *arg) +{ + if (ERROR_ISCUSTOM(self->errors)) + Py_VISIT(self->errors); + return 0; +} + +static void +mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self) +{ + PyObject_GC_UnTrack(self); + ERROR_DECREF(self->errors); + self->ob_type->tp_free(self); +} + +static PyTypeObject MultibyteIncrementalDecoder_Type = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "MultibyteIncrementalDecoder", /* tp_name */ + sizeof(MultibyteIncrementalDecoderObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)mbidecoder_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC + | Py_TPFLAGS_BASETYPE, /* tp_flags */ + 0, /* tp_doc */ + (traverseproc)mbidecoder_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iterext */ + mbidecoder_methods, /* tp_methods */ + 0, /* tp_members */ + codecctx_getsets, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + mbidecoder_new, /* tp_new */ +}; + + +/** + * MultibyteStreamReader object + */ + static PyObject * mbstreamreader_iread(MultibyteStreamReaderObject *self, const char *method, Py_ssize_t sizehint) { MultibyteDecodeBuffer buf; PyObject *cres; - Py_ssize_t rsize, r, finalsize = 0; + Py_ssize_t rsize, finalsize = 0; if (sizehint == 0) return PyUnicode_FromUnicode(NULL, 0); @@ -740,39 +1227,13 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self, } rsize = PyString_GET_SIZE(cres); - buf.inbuf = buf.inbuf_top = - (unsigned char *)PyString_AS_STRING(cres); - buf.inbuf_end = buf.inbuf_top + rsize; - if (buf.outobj == NULL) { - buf.outobj = PyUnicode_FromUnicode(NULL, rsize); - if (buf.outobj == NULL) - goto errorexit; - buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj); - buf.outbuf_end = buf.outbuf + - PyUnicode_GET_SIZE(buf.outobj); - } + if (decoder_prepare_buffer(&buf, PyString_AS_STRING(cres), + rsize) != 0) + goto errorexit; - r = 0; - if (rsize > 0) - while (buf.inbuf < buf.inbuf_end) { - Py_ssize_t inleft, outleft; - - inleft = (Py_ssize_t)(buf.inbuf_end - - buf.inbuf); - outleft = (Py_ssize_t)(buf.outbuf_end - - buf.outbuf); - - r = self->codec->decode(&self->state, - self->codec->config, - &buf.inbuf, inleft, - &buf.outbuf, outleft); - if (r == 0 || r == MBERR_TOOFEW) - break; - else if (multibytecodec_decerror(self->codec, - &self->state, &buf, - self->errors, r)) - goto errorexit; - } + if (rsize > 0 && decoder_feed_buffer( + (MultibyteStatefulDecoderContext *)self, &buf)) + goto errorexit; if (rsize == 0 || sizehint < 0) { /* end of file */ if (buf.inbuf < buf.inbuf_end && @@ -782,20 +1243,9 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self, } if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */ - Py_ssize_t npendings; - - /* we can't assume that pendingsize is still 0 here. - * because this function can be called recursively - * from error callback */ - npendings = (Py_ssize_t)(buf.inbuf_end - buf.inbuf); - if (npendings + self->pendingsize > MAXDECPENDING) { - PyErr_SetString(PyExc_RuntimeError, - "pending buffer overflow"); + if (decoder_append_pending(STATEFUL_DCTX(self), + &buf) != 0) goto errorexit; - } - memcpy(self->pending + self->pendingsize, buf.inbuf, - npendings); - self->pendingsize += npendings; } finalsize = (Py_ssize_t)(buf.outbuf - @@ -901,8 +1351,7 @@ mbstreamreader_reset(MultibyteStreamReaderObject *self) return NULL; self->pendingsize = 0; - Py_INCREF(Py_None); - return Py_None; + Py_RETURN_NONE; } static struct PyMethodDef mbstreamreader_methods[] = { @@ -917,17 +1366,72 @@ static struct PyMethodDef mbstreamreader_methods[] = { {NULL, NULL}, }; -static void -mbstreamreader_dealloc(MultibyteStreamReaderObject *self) +static PyMemberDef mbstreamreader_members[] = { + {"stream", T_OBJECT, + offsetof(MultibyteStreamReaderObject, stream), + READONLY, NULL}, + {NULL,} +}; + +static PyObject * +mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { - if (self->errors > ERROR_MAX) { - Py_DECREF(self->errors); + MultibyteStreamReaderObject *self; + PyObject *codec, *stream; + char *errors = NULL; + + codec = PyObject_GetAttrString((PyObject *)type, "codec"); + if (codec == NULL) + return NULL; + if (!MultibyteCodec_Check(codec)) { + PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); + return NULL; } - Py_DECREF(self->stream); - PyObject_Del(self); + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader", + streamkwarglist, &stream, &errors)) + return NULL; + + self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0); + if (self == NULL) + return NULL; + + self->codec = ((MultibyteCodecObject *)codec)->codec; + self->stream = stream; + Py_INCREF(stream); + self->pendingsize = 0; + self->errors = internal_error_callback(errors); + if (self->errors == NULL) + goto errorexit; + if (self->codec->decinit != NULL && + self->codec->decinit(&self->state, self->codec->config) != 0) + goto errorexit; + + return (PyObject *)self; + +errorexit: + Py_XDECREF(self); + return NULL; } +static int +mbstreamreader_traverse(MultibyteStreamReaderObject *self, + visitproc visit, void *arg) +{ + if (ERROR_ISCUSTOM(self->errors)) + Py_VISIT(self->errors); + Py_VISIT(self->stream); + return 0; +} +static void +mbstreamreader_dealloc(MultibyteStreamReaderObject *self) +{ + PyObject_GC_UnTrack(self); + ERROR_DECREF(self->errors); + Py_DECREF(self->stream); + self->ob_type->tp_free(self); +} static PyTypeObject MultibyteStreamReader_Type = { PyObject_HEAD_INIT(NULL) @@ -951,97 +1455,49 @@ static PyTypeObject MultibyteStreamReader_Type = { PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT, /* tp_flags */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC + | Py_TPFLAGS_BASETYPE, /* tp_flags */ 0, /* tp_doc */ - 0, /* tp_traverse */ + (traverseproc)mbstreamreader_traverse, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iterext */ mbstreamreader_methods, /* tp_methods */ + mbstreamreader_members, /* tp_members */ + codecctx_getsets, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + mbstreamreader_new, /* tp_new */ }; + +/** + * MultibyteStreamWriter object + */ + static int mbstreamwriter_iwrite(MultibyteStreamWriterObject *self, PyObject *unistr) { - PyObject *wr, *ucvt, *r = NULL; - Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL; - Py_ssize_t datalen; - - if (PyUnicode_Check(unistr)) - ucvt = NULL; - else { - unistr = ucvt = PyObject_Unicode(unistr); - if (unistr == NULL) - return -1; - else if (!PyUnicode_Check(unistr)) { - PyErr_SetString(PyExc_TypeError, - "couldn't convert the object to unicode."); - Py_DECREF(ucvt); - return -1; - } - } + PyObject *str, *wr; - datalen = PyUnicode_GET_SIZE(unistr); - if (datalen == 0) { - Py_XDECREF(ucvt); - return 0; - } - - if (self->pendingsize > 0) { - inbuf_tmp = PyMem_New(Py_UNICODE, datalen + self->pendingsize); - if (inbuf_tmp == NULL) - goto errorexit; - memcpy(inbuf_tmp, self->pending, - Py_UNICODE_SIZE * self->pendingsize); - memcpy(inbuf_tmp + self->pendingsize, - PyUnicode_AS_UNICODE(unistr), - Py_UNICODE_SIZE * datalen); - datalen += self->pendingsize; - self->pendingsize = 0; - inbuf = inbuf_tmp; - } - else - inbuf = (Py_UNICODE *)PyUnicode_AS_UNICODE(unistr); - - inbuf_end = inbuf + datalen; - - r = multibytecodec_encode(self->codec, &self->state, - (const Py_UNICODE **)&inbuf, datalen, self->errors, 0); - if (r == NULL) - goto errorexit; - - if (inbuf < inbuf_end) { - self->pendingsize = (Py_ssize_t)(inbuf_end - inbuf); - if (self->pendingsize > MAXENCPENDING) { - self->pendingsize = 0; - PyErr_SetString(PyExc_RuntimeError, - "pending buffer overflow"); - goto errorexit; - } - memcpy(self->pending, inbuf, - self->pendingsize * Py_UNICODE_SIZE); - } + str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0); + if (str == NULL) + return -1; - wr = PyObject_CallMethod(self->stream, "write", "O", r); + wr = PyObject_CallMethod(self->stream, "write", "O", str); + Py_DECREF(str); if (wr == NULL) - goto errorexit; + return -1; - if (inbuf_tmp != NULL) - PyMem_Del(inbuf_tmp); - Py_DECREF(r); - Py_DECREF(wr); - Py_XDECREF(ucvt); return 0; - -errorexit: - if (inbuf_tmp != NULL) - PyMem_Del(inbuf_tmp); - Py_XDECREF(r); - Py_XDECREF(ucvt); - return -1; } static PyObject * @@ -1054,10 +1510,8 @@ mbstreamwriter_write(MultibyteStreamWriterObject *self, PyObject *args) if (mbstreamwriter_iwrite(self, strobj)) return NULL; - else { - Py_INCREF(Py_None); - return Py_None; - } + else + Py_RETURN_NONE; } static PyObject * @@ -1087,8 +1541,7 @@ mbstreamwriter_writelines(MultibyteStreamWriterObject *self, PyObject *args) return NULL; } - Py_INCREF(Py_None); - return Py_None; + Py_RETURN_NONE; } static PyObject * @@ -1119,18 +1572,67 @@ mbstreamwriter_reset(MultibyteStreamWriterObject *self) } Py_DECREF(pwrt); - Py_INCREF(Py_None); - return Py_None; + Py_RETURN_NONE; +} + +static PyObject * +mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + MultibyteStreamWriterObject *self; + PyObject *codec, *stream; + char *errors = NULL; + + codec = PyObject_GetAttrString((PyObject *)type, "codec"); + if (codec == NULL) + return NULL; + if (!MultibyteCodec_Check(codec)) { + PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); + return NULL; + } + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter", + streamkwarglist, &stream, &errors)) + return NULL; + + self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0); + if (self == NULL) + return NULL; + + self->codec = ((MultibyteCodecObject *)codec)->codec; + self->stream = stream; + Py_INCREF(stream); + self->pendingsize = 0; + self->errors = internal_error_callback(errors); + if (self->errors == NULL) + goto errorexit; + if (self->codec->encinit != NULL && + self->codec->encinit(&self->state, self->codec->config) != 0) + goto errorexit; + + return (PyObject *)self; + +errorexit: + Py_XDECREF(self); + return NULL; +} + +static int +mbstreamwriter_traverse(MultibyteStreamWriterObject *self, + visitproc visit, void *arg) +{ + if (ERROR_ISCUSTOM(self->errors)) + Py_VISIT(self->errors); + Py_VISIT(self->stream); + return 0; } static void mbstreamwriter_dealloc(MultibyteStreamWriterObject *self) { - if (self->errors > ERROR_MAX) { - Py_DECREF(self->errors); - } + PyObject_GC_UnTrack(self); + ERROR_DECREF(self->errors); Py_DECREF(self->stream); - PyObject_Del(self); + self->ob_type->tp_free(self); } static struct PyMethodDef mbstreamwriter_methods[] = { @@ -1143,7 +1645,12 @@ static struct PyMethodDef mbstreamwriter_methods[] = { {NULL, NULL}, }; - +static PyMemberDef mbstreamwriter_members[] = { + {"stream", T_OBJECT, + offsetof(MultibyteStreamWriterObject, stream), + READONLY, NULL}, + {NULL,} +}; static PyTypeObject MultibyteStreamWriter_Type = { PyObject_HEAD_INIT(NULL) @@ -1167,17 +1674,33 @@ static PyTypeObject MultibyteStreamWriter_Type = { PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT, /* tp_flags */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC + | Py_TPFLAGS_BASETYPE, /* tp_flags */ 0, /* tp_doc */ - 0, /* tp_traverse */ + (traverseproc)mbstreamwriter_traverse, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iterext */ mbstreamwriter_methods, /* tp_methods */ + mbstreamwriter_members, /* tp_members */ + codecctx_getsets, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + mbstreamwriter_new, /* tp_new */ }; + +/** + * Exposed factory function + */ + static PyObject * __create_codec(PyObject *ignore, PyObject *arg) { @@ -1201,80 +1724,38 @@ __create_codec(PyObject *ignore, PyObject *arg) return (PyObject *)self; } -static PyObject * -mbstreamreader_create(MultibyteCodec *codec, - PyObject *stream, const char *errors) -{ - MultibyteStreamReaderObject *self; - - self = PyObject_New(MultibyteStreamReaderObject, - &MultibyteStreamReader_Type); - if (self == NULL) - return NULL; - - self->codec = codec; - self->stream = stream; - Py_INCREF(stream); - self->pendingsize = 0; - self->errors = get_errorcallback(errors); - if (self->errors == NULL) - goto errorexit; - if (self->codec->decinit != NULL && - self->codec->decinit(&self->state, self->codec->config) != 0) - goto errorexit; - - return (PyObject *)self; - -errorexit: - Py_XDECREF(self); - return NULL; -} - -static PyObject * -mbstreamwriter_create(MultibyteCodec *codec, - PyObject *stream, const char *errors) -{ - MultibyteStreamWriterObject *self; - - self = PyObject_New(MultibyteStreamWriterObject, - &MultibyteStreamWriter_Type); - if (self == NULL) - return NULL; - - self->codec = codec; - self->stream = stream; - Py_INCREF(stream); - self->pendingsize = 0; - self->errors = get_errorcallback(errors); - if (self->errors == NULL) - goto errorexit; - if (self->codec->encinit != NULL && - self->codec->encinit(&self->state, self->codec->config) != 0) - goto errorexit; - - return (PyObject *)self; - -errorexit: - Py_XDECREF(self); - return NULL; -} - static struct PyMethodDef __methods[] = { {"__create_codec", (PyCFunction)__create_codec, METH_O}, {NULL, NULL}, }; -void +PyMODINIT_FUNC init_multibytecodec(void) { + int i; + PyObject *m; + PyTypeObject *typelist[] = { + &MultibyteIncrementalEncoder_Type, + &MultibyteIncrementalDecoder_Type, + &MultibyteStreamReader_Type, + &MultibyteStreamWriter_Type, + NULL + }; + if (PyType_Ready(&MultibyteCodec_Type) < 0) return; - if (PyType_Ready(&MultibyteStreamReader_Type) < 0) - return; - if (PyType_Ready(&MultibyteStreamWriter_Type) < 0) + + m = Py_InitModule("_multibytecodec", __methods); + if (m == NULL) return; - Py_InitModule("_multibytecodec", __methods); + for (i = 0; typelist[i] != NULL; i++) { + if (PyType_Ready(typelist[i]) < 0) + return; + Py_INCREF(typelist[i]); + PyModule_AddObject(m, typelist[i]->tp_name, + (PyObject *)typelist[i]); + } if (PyErr_Occurred()) Py_FatalError("can't initialize the _multibytecodec module"); diff --git a/Modules/cjkcodecs/multibytecodec.h b/Modules/cjkcodecs/multibytecodec.h index ec49c78..671ecae 100644 --- a/Modules/cjkcodecs/multibytecodec.h +++ b/Modules/cjkcodecs/multibytecodec.h @@ -67,24 +67,51 @@ typedef struct { MultibyteCodec *codec; } MultibyteCodecObject; -#define MAXDECPENDING 8 +#define MultibyteCodec_Check(op) ((op)->ob_type == &MultibyteCodec_Type) + +#define _MultibyteStatefulCodec_HEAD \ + PyObject_HEAD \ + MultibyteCodec *codec; \ + MultibyteCodec_State state; \ + PyObject *errors; typedef struct { - PyObject_HEAD - MultibyteCodec *codec; - MultibyteCodec_State state; - unsigned char pending[MAXDECPENDING]; - Py_ssize_t pendingsize; - PyObject *stream, *errors; -} MultibyteStreamReaderObject; + _MultibyteStatefulCodec_HEAD +} MultibyteStatefulCodecContext; #define MAXENCPENDING 2 +#define _MultibyteStatefulEncoder_HEAD \ + _MultibyteStatefulCodec_HEAD \ + Py_UNICODE pending[MAXENCPENDING]; \ + Py_ssize_t pendingsize; typedef struct { - PyObject_HEAD - MultibyteCodec *codec; - MultibyteCodec_State state; - Py_UNICODE pending[MAXENCPENDING]; + _MultibyteStatefulEncoder_HEAD +} MultibyteStatefulEncoderContext; + +#define MAXDECPENDING 8 +#define _MultibyteStatefulDecoder_HEAD \ + _MultibyteStatefulCodec_HEAD \ + unsigned char pending[MAXDECPENDING]; \ Py_ssize_t pendingsize; - PyObject *stream, *errors; +typedef struct { + _MultibyteStatefulDecoder_HEAD +} MultibyteStatefulDecoderContext; + +typedef struct { + _MultibyteStatefulEncoder_HEAD +} MultibyteIncrementalEncoderObject; + +typedef struct { + _MultibyteStatefulDecoder_HEAD +} MultibyteIncrementalDecoderObject; + +typedef struct { + _MultibyteStatefulDecoder_HEAD + PyObject *stream; +} MultibyteStreamReaderObject; + +typedef struct { + _MultibyteStatefulEncoder_HEAD + PyObject *stream; } MultibyteStreamWriterObject; /* positive values for illegal sequences */ @@ -95,7 +122,12 @@ typedef struct { #define ERROR_STRICT (PyObject *)(1) #define ERROR_IGNORE (PyObject *)(2) #define ERROR_REPLACE (PyObject *)(3) -#define ERROR_MAX ERROR_REPLACE +#define ERROR_ISCUSTOM(p) ((p) < ERROR_STRICT || ERROR_REPLACE < (p)) +#define ERROR_DECREF(p) do { \ + if (ERROR_ISCUSTOM(p)) { \ + Py_DECREF(p); \ + } \ +} while (0); #define MBENC_FLUSH 0x0001 /* encode all characters encodable */ #define MBENC_MAX MBENC_FLUSH diff --git a/Tools/unicode/Makefile b/Tools/unicode/Makefile index f266d4d..fbd3557 100644 --- a/Tools/unicode/Makefile +++ b/Tools/unicode/Makefile @@ -15,7 +15,7 @@ RM = /bin/rm all: distclean mappings codecs -codecs: misc windows iso apple ebcdic custom-mappings +codecs: misc windows iso apple ebcdic custom-mappings cjk ### Mappings @@ -72,6 +72,9 @@ ebcdic: build/ $(PYTHON) gencodec.py MAPPINGS/VENDORS/MICSFT/EBCDIC/ build/ $(RM) -f build/readme.* +cjk: build/ + $(PYTHON) gencjkcodecs.py build/ + ### Cleanup clean: diff --git a/Tools/unicode/gencjkcodecs.py b/Tools/unicode/gencjkcodecs.py new file mode 100644 index 0000000..47627c5 --- /dev/null +++ b/Tools/unicode/gencjkcodecs.py @@ -0,0 +1,65 @@ +import os, string + +codecs = { + 'cn': ('gb2312', 'gbk', 'gb18030', 'hz'), + 'tw': ('big5', 'cp950'), + 'hk': ('big5hkscs',), + 'jp': ('cp932', 'shift_jis', 'euc_jp', 'euc_jisx0213', 'shift_jisx0213', + 'euc_jis_2004', 'shift_jis_2004'), + 'kr': ('cp949', 'euc_kr', 'johab'), + 'iso2022': ('iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2', + 'iso2022_jp_2004', 'iso2022_jp_3', 'iso2022_jp_ext', + 'iso2022_kr'), +} + +TEMPLATE = string.Template("""\ +# +# $encoding.py: Python Unicode Codec for $ENCODING +# +# Written by Hye-Shik Chang <perky@FreeBSD.org> +# + +import _codecs_$owner, codecs +import _multibytecodec as mbc + +codec = _codecs_$owner.getcodec('$encoding') + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='$encoding', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) +""") + +def gencodecs(prefix): + for loc, encodings in codecs.iteritems(): + for enc in encodings: + code = TEMPLATE.substitute(ENCODING=enc.upper(), + encoding=enc.lower(), + owner=loc) + codecpath = os.path.join(prefix, enc + '.py') + open(codecpath, 'w').write(code) + +if __name__ == '__main__': + import sys + gencodecs(sys.argv[1]) |