summaryrefslogtreecommitdiffstats
path: root/Modules/cjkcodecs
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2010-05-09 15:52:27 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2010-05-09 15:52:27 (GMT)
commitf95a1b3c53bdd678b64aa608d4375660033460c3 (patch)
treea8bee40b1b14e28ff5978ea519f3035a3c399912 /Modules/cjkcodecs
parentbd250300191133d276a71b395b6428081bf825b8 (diff)
downloadcpython-f95a1b3c53bdd678b64aa608d4375660033460c3.zip
cpython-f95a1b3c53bdd678b64aa608d4375660033460c3.tar.gz
cpython-f95a1b3c53bdd678b64aa608d4375660033460c3.tar.bz2
Recorded merge of revisions 81029 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk ........ r81029 | antoine.pitrou | 2010-05-09 16:46:46 +0200 (dim., 09 mai 2010) | 3 lines Untabify C files. Will watch buildbots. ........
Diffstat (limited to 'Modules/cjkcodecs')
-rw-r--r--Modules/cjkcodecs/_codecs_cn.c634
-rw-r--r--Modules/cjkcodecs/_codecs_hk.c256
-rw-r--r--Modules/cjkcodecs/_codecs_iso2022.c1622
-rw-r--r--Modules/cjkcodecs/_codecs_jp.c1204
-rw-r--r--Modules/cjkcodecs/_codecs_kr.c602
-rw-r--r--Modules/cjkcodecs/_codecs_tw.c140
-rw-r--r--Modules/cjkcodecs/alg_jisx0201.h46
-rw-r--r--Modules/cjkcodecs/cjkcodecs.h578
-rw-r--r--Modules/cjkcodecs/emu_jisx0213_2000.h58
-rw-r--r--Modules/cjkcodecs/multibytecodec.c2870
-rw-r--r--Modules/cjkcodecs/multibytecodec.h132
11 files changed, 4071 insertions, 4071 deletions
diff --git a/Modules/cjkcodecs/_codecs_cn.c b/Modules/cjkcodecs/_codecs_cn.c
index 4542ce6..ab4e659 100644
--- a/Modules/cjkcodecs/_codecs_cn.c
+++ b/Modules/cjkcodecs/_codecs_cn.c
@@ -17,24 +17,24 @@
/* GBK and GB2312 map differently in few codepoints that are listed below:
*
- * gb2312 gbk
- * A1A4 U+30FB KATAKANA MIDDLE DOT U+00B7 MIDDLE DOT
- * A1AA U+2015 HORIZONTAL BAR U+2014 EM DASH
- * A844 undefined U+2015 HORIZONTAL BAR
+ * gb2312 gbk
+ * A1A4 U+30FB KATAKANA MIDDLE DOT U+00B7 MIDDLE DOT
+ * A1AA U+2015 HORIZONTAL BAR U+2014 EM DASH
+ * A844 undefined U+2015 HORIZONTAL BAR
*/
#define GBK_DECODE(dc1, dc2, assi) \
- if ((dc1) == 0xa1 && (dc2) == 0xaa) (assi) = 0x2014; \
- else if ((dc1) == 0xa8 && (dc2) == 0x44) (assi) = 0x2015; \
- else if ((dc1) == 0xa1 && (dc2) == 0xa4) (assi) = 0x00b7; \
- else TRYMAP_DEC(gb2312, assi, dc1 ^ 0x80, dc2 ^ 0x80); \
- else TRYMAP_DEC(gbkext, assi, dc1, dc2);
+ if ((dc1) == 0xa1 && (dc2) == 0xaa) (assi) = 0x2014; \
+ else if ((dc1) == 0xa8 && (dc2) == 0x44) (assi) = 0x2015; \
+ else if ((dc1) == 0xa1 && (dc2) == 0xa4) (assi) = 0x00b7; \
+ else TRYMAP_DEC(gb2312, assi, dc1 ^ 0x80, dc2 ^ 0x80); \
+ else TRYMAP_DEC(gbkext, assi, dc1, dc2);
#define GBK_ENCODE(code, assi) \
- if ((code) == 0x2014) (assi) = 0xa1aa; \
- else if ((code) == 0x2015) (assi) = 0xa844; \
- else if ((code) == 0x00b7) (assi) = 0xa1a4; \
- else if ((code) != 0x30fb && TRYMAP_ENC_COND(gbcommon, assi, code));
+ if ((code) == 0x2014) (assi) = 0xa1aa; \
+ else if ((code) == 0x2015) (assi) = 0xa844; \
+ else if ((code) == 0x00b7) (assi) = 0xa1a4; \
+ else if ((code) != 0x30fb && TRYMAP_ENC_COND(gbcommon, assi, code));
/*
* GB2312 codec
@@ -42,53 +42,53 @@
ENCODER(gb2312)
{
- while (inleft > 0) {
- Py_UNICODE c = IN1;
- DBCHAR code;
-
- if (c < 0x80) {
- WRITE1((unsigned char)c)
- NEXT(1, 1)
- continue;
- }
- UCS4INVALID(c)
-
- REQUIRE_OUTBUF(2)
- TRYMAP_ENC(gbcommon, code, c);
- else return 1;
-
- if (code & 0x8000) /* MSB set: GBK */
- return 1;
-
- OUT1((code >> 8) | 0x80)
- OUT2((code & 0xFF) | 0x80)
- NEXT(1, 2)
- }
-
- return 0;
+ while (inleft > 0) {
+ Py_UNICODE c = IN1;
+ DBCHAR code;
+
+ if (c < 0x80) {
+ WRITE1((unsigned char)c)
+ NEXT(1, 1)
+ continue;
+ }
+ UCS4INVALID(c)
+
+ REQUIRE_OUTBUF(2)
+ TRYMAP_ENC(gbcommon, code, c);
+ else return 1;
+
+ if (code & 0x8000) /* MSB set: GBK */
+ return 1;
+
+ OUT1((code >> 8) | 0x80)
+ OUT2((code & 0xFF) | 0x80)
+ NEXT(1, 2)
+ }
+
+ return 0;
}
DECODER(gb2312)
{
- while (inleft > 0) {
- unsigned char c = **inbuf;
+ while (inleft > 0) {
+ unsigned char c = **inbuf;
- REQUIRE_OUTBUF(1)
+ REQUIRE_OUTBUF(1)
- if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
- continue;
- }
+ if (c < 0x80) {
+ OUT1(c)
+ NEXT(1, 1)
+ continue;
+ }
- REQUIRE_INBUF(2)
- TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
- NEXT(2, 1)
- }
- else return 2;
- }
+ REQUIRE_INBUF(2)
+ TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
+ NEXT(2, 1)
+ }
+ else return 2;
+ }
- return 0;
+ return 0;
}
@@ -98,55 +98,55 @@ DECODER(gb2312)
ENCODER(gbk)
{
- while (inleft > 0) {
- Py_UNICODE c = IN1;
- DBCHAR code;
-
- if (c < 0x80) {
- WRITE1((unsigned char)c)
- NEXT(1, 1)
- continue;
- }
- UCS4INVALID(c)
-
- REQUIRE_OUTBUF(2)
-
- GBK_ENCODE(c, code)
- else return 1;
-
- OUT1((code >> 8) | 0x80)
- if (code & 0x8000)
- OUT2((code & 0xFF)) /* MSB set: GBK */
- else
- OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
- NEXT(1, 2)
- }
-
- return 0;
+ while (inleft > 0) {
+ Py_UNICODE c = IN1;
+ DBCHAR code;
+
+ if (c < 0x80) {
+ WRITE1((unsigned char)c)
+ NEXT(1, 1)
+ continue;
+ }
+ UCS4INVALID(c)
+
+ REQUIRE_OUTBUF(2)
+
+ GBK_ENCODE(c, code)
+ else return 1;
+
+ OUT1((code >> 8) | 0x80)
+ if (code & 0x8000)
+ OUT2((code & 0xFF)) /* MSB set: GBK */
+ else
+ OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
+ NEXT(1, 2)
+ }
+
+ return 0;
}
DECODER(gbk)
{
- while (inleft > 0) {
- unsigned char c = IN1;
+ while (inleft > 0) {
+ unsigned char c = IN1;
- REQUIRE_OUTBUF(1)
+ REQUIRE_OUTBUF(1)
- if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
- continue;
- }
+ if (c < 0x80) {
+ OUT1(c)
+ NEXT(1, 1)
+ continue;
+ }
- REQUIRE_INBUF(2)
+ REQUIRE_INBUF(2)
- GBK_DECODE(c, IN2, **outbuf)
- else return 2;
+ GBK_DECODE(c, IN2, **outbuf)
+ else return 2;
- NEXT(2, 1)
- }
+ NEXT(2, 1)
+ }
- return 0;
+ return 0;
}
@@ -156,153 +156,153 @@ DECODER(gbk)
ENCODER(gb18030)
{
- while (inleft > 0) {
- ucs4_t c = IN1;
- DBCHAR code;
-
- if (c < 0x80) {
- WRITE1(c)
- NEXT(1, 1)
- continue;
- }
-
- DECODE_SURROGATE(c)
- if (c > 0x10FFFF)
+ while (inleft > 0) {
+ ucs4_t c = IN1;
+ DBCHAR code;
+
+ if (c < 0x80) {
+ WRITE1(c)
+ NEXT(1, 1)
+ continue;
+ }
+
+ DECODE_SURROGATE(c)
+ if (c > 0x10FFFF)
#if Py_UNICODE_SIZE == 2
- return 2; /* surrogates pair */
+ return 2; /* surrogates pair */
#else
- return 1;
+ return 1;
#endif
- else if (c >= 0x10000) {
- ucs4_t tc = c - 0x10000;
+ else if (c >= 0x10000) {
+ ucs4_t tc = c - 0x10000;
- REQUIRE_OUTBUF(4)
+ REQUIRE_OUTBUF(4)
- OUT4((unsigned char)(tc % 10) + 0x30)
- tc /= 10;
- OUT3((unsigned char)(tc % 126) + 0x81)
- tc /= 126;
- OUT2((unsigned char)(tc % 10) + 0x30)
- tc /= 10;
- OUT1((unsigned char)(tc + 0x90))
+ OUT4((unsigned char)(tc % 10) + 0x30)
+ tc /= 10;
+ OUT3((unsigned char)(tc % 126) + 0x81)
+ tc /= 126;
+ OUT2((unsigned char)(tc % 10) + 0x30)
+ tc /= 10;
+ OUT1((unsigned char)(tc + 0x90))
#if Py_UNICODE_SIZE == 2
- NEXT(2, 4) /* surrogates pair */
+ NEXT(2, 4) /* surrogates pair */
#else
- NEXT(1, 4)
+ NEXT(1, 4)
#endif
- continue;
- }
-
- REQUIRE_OUTBUF(2)
-
- GBK_ENCODE(c, code)
- else TRYMAP_ENC(gb18030ext, code, c);
- else {
- const struct _gb18030_to_unibmp_ranges *utrrange;
-
- REQUIRE_OUTBUF(4)
-
- for (utrrange = gb18030_to_unibmp_ranges;
- utrrange->first != 0;
- utrrange++)
- if (utrrange->first <= c &&
- c <= utrrange->last) {
- Py_UNICODE tc;
-
- tc = c - utrrange->first +
- utrrange->base;
-
- OUT4((unsigned char)(tc % 10) + 0x30)
- tc /= 10;
- OUT3((unsigned char)(tc % 126) + 0x81)
- tc /= 126;
- OUT2((unsigned char)(tc % 10) + 0x30)
- tc /= 10;
- OUT1((unsigned char)tc + 0x81)
-
- NEXT(1, 4)
- break;
- }
-
- if (utrrange->first == 0)
- return 1;
- continue;
- }
-
- OUT1((code >> 8) | 0x80)
- if (code & 0x8000)
- OUT2((code & 0xFF)) /* MSB set: GBK or GB18030ext */
- else
- OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
-
- NEXT(1, 2)
- }
-
- return 0;
+ continue;
+ }
+
+ REQUIRE_OUTBUF(2)
+
+ GBK_ENCODE(c, code)
+ else TRYMAP_ENC(gb18030ext, code, c);
+ else {
+ const struct _gb18030_to_unibmp_ranges *utrrange;
+
+ REQUIRE_OUTBUF(4)
+
+ for (utrrange = gb18030_to_unibmp_ranges;
+ utrrange->first != 0;
+ utrrange++)
+ if (utrrange->first <= c &&
+ c <= utrrange->last) {
+ Py_UNICODE tc;
+
+ tc = c - utrrange->first +
+ utrrange->base;
+
+ OUT4((unsigned char)(tc % 10) + 0x30)
+ tc /= 10;
+ OUT3((unsigned char)(tc % 126) + 0x81)
+ tc /= 126;
+ OUT2((unsigned char)(tc % 10) + 0x30)
+ tc /= 10;
+ OUT1((unsigned char)tc + 0x81)
+
+ NEXT(1, 4)
+ break;
+ }
+
+ if (utrrange->first == 0)
+ return 1;
+ continue;
+ }
+
+ OUT1((code >> 8) | 0x80)
+ if (code & 0x8000)
+ OUT2((code & 0xFF)) /* MSB set: GBK or GB18030ext */
+ else
+ OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
+
+ NEXT(1, 2)
+ }
+
+ return 0;
}
DECODER(gb18030)
{
- while (inleft > 0) {
- unsigned char c = IN1, c2;
-
- REQUIRE_OUTBUF(1)
-
- if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
- continue;
- }
-
- REQUIRE_INBUF(2)
-
- c2 = IN2;
- if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */
- const struct _gb18030_to_unibmp_ranges *utr;
- unsigned char c3, c4;
- ucs4_t lseq;
-
- REQUIRE_INBUF(4)
- c3 = IN3;
- c4 = IN4;
- if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39)
- return 4;
- c -= 0x81; c2 -= 0x30;
- c3 -= 0x81; c4 -= 0x30;
-
- if (c < 4) { /* U+0080 - U+FFFF */
- lseq = ((ucs4_t)c * 10 + c2) * 1260 +
- (ucs4_t)c3 * 10 + c4;
- if (lseq < 39420) {
- for (utr = gb18030_to_unibmp_ranges;
- lseq >= (utr + 1)->base;
- utr++) ;
- OUT1(utr->first - utr->base + lseq)
- NEXT(4, 1)
- continue;
- }
- }
- else if (c >= 15) { /* U+10000 - U+10FFFF */
- lseq = 0x10000 + (((ucs4_t)c-15) * 10 + c2)
- * 1260 + (ucs4_t)c3 * 10 + c4;
- if (lseq <= 0x10FFFF) {
- WRITEUCS4(lseq);
- NEXT_IN(4)
- continue;
- }
- }
- return 4;
- }
-
- GBK_DECODE(c, c2, **outbuf)
- else TRYMAP_DEC(gb18030ext, **outbuf, c, c2);
- else return 2;
-
- NEXT(2, 1)
- }
-
- return 0;
+ while (inleft > 0) {
+ unsigned char c = IN1, c2;
+
+ REQUIRE_OUTBUF(1)
+
+ if (c < 0x80) {
+ OUT1(c)
+ NEXT(1, 1)
+ continue;
+ }
+
+ REQUIRE_INBUF(2)
+
+ c2 = IN2;
+ if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */
+ const struct _gb18030_to_unibmp_ranges *utr;
+ unsigned char c3, c4;
+ ucs4_t lseq;
+
+ REQUIRE_INBUF(4)
+ c3 = IN3;
+ c4 = IN4;
+ if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39)
+ return 4;
+ c -= 0x81; c2 -= 0x30;
+ c3 -= 0x81; c4 -= 0x30;
+
+ if (c < 4) { /* U+0080 - U+FFFF */
+ lseq = ((ucs4_t)c * 10 + c2) * 1260 +
+ (ucs4_t)c3 * 10 + c4;
+ if (lseq < 39420) {
+ for (utr = gb18030_to_unibmp_ranges;
+ lseq >= (utr + 1)->base;
+ utr++) ;
+ OUT1(utr->first - utr->base + lseq)
+ NEXT(4, 1)
+ continue;
+ }
+ }
+ else if (c >= 15) { /* U+10000 - U+10FFFF */
+ lseq = 0x10000 + (((ucs4_t)c-15) * 10 + c2)
+ * 1260 + (ucs4_t)c3 * 10 + c4;
+ if (lseq <= 0x10FFFF) {
+ WRITEUCS4(lseq);
+ NEXT_IN(4)
+ continue;
+ }
+ }
+ return 4;
+ }
+
+ GBK_DECODE(c, c2, **outbuf)
+ else TRYMAP_DEC(gb18030ext, **outbuf, c, c2);
+ else return 2;
+
+ NEXT(2, 1)
+ }
+
+ return 0;
}
@@ -312,118 +312,118 @@ DECODER(gb18030)
ENCODER_INIT(hz)
{
- state->i = 0;
- return 0;
+ state->i = 0;
+ return 0;
}
ENCODER_RESET(hz)
{
- if (state->i != 0) {
- WRITE2('~', '}')
- state->i = 0;
- NEXT_OUT(2)
- }
- return 0;
+ if (state->i != 0) {
+ WRITE2('~', '}')
+ state->i = 0;
+ NEXT_OUT(2)
+ }
+ return 0;
}
ENCODER(hz)
{
- while (inleft > 0) {
- Py_UNICODE c = IN1;
- DBCHAR code;
-
- if (c < 0x80) {
- if (state->i == 0) {
- WRITE1((unsigned char)c)
- NEXT(1, 1)
- }
- else {
- WRITE3('~', '}', (unsigned char)c)
- NEXT(1, 3)
- state->i = 0;
- }
- continue;
- }
-
- UCS4INVALID(c)
-
- TRYMAP_ENC(gbcommon, code, c);
- else return 1;
-
- if (code & 0x8000) /* MSB set: GBK */
- return 1;
-
- if (state->i == 0) {
- WRITE4('~', '{', code >> 8, code & 0xff)
- NEXT(1, 4)
- state->i = 1;
- }
- else {
- WRITE2(code >> 8, code & 0xff)
- NEXT(1, 2)
- }
- }
-
- return 0;
+ while (inleft > 0) {
+ Py_UNICODE c = IN1;
+ DBCHAR code;
+
+ if (c < 0x80) {
+ if (state->i == 0) {
+ WRITE1((unsigned char)c)
+ NEXT(1, 1)
+ }
+ else {
+ WRITE3('~', '}', (unsigned char)c)
+ NEXT(1, 3)
+ state->i = 0;
+ }
+ continue;
+ }
+
+ UCS4INVALID(c)
+
+ TRYMAP_ENC(gbcommon, code, c);
+ else return 1;
+
+ if (code & 0x8000) /* MSB set: GBK */
+ return 1;
+
+ if (state->i == 0) {
+ WRITE4('~', '{', code >> 8, code & 0xff)
+ NEXT(1, 4)
+ state->i = 1;
+ }
+ else {
+ WRITE2(code >> 8, code & 0xff)
+ NEXT(1, 2)
+ }
+ }
+
+ return 0;
}
DECODER_INIT(hz)
{
- state->i = 0;
- return 0;
+ state->i = 0;
+ return 0;
}
DECODER_RESET(hz)
{
- state->i = 0;
- return 0;
+ state->i = 0;
+ return 0;
}
DECODER(hz)
{
- while (inleft > 0) {
- unsigned char c = IN1;
-
- if (c == '~') {
- unsigned char c2 = IN2;
-
- REQUIRE_INBUF(2)
- if (c2 == '~') {
- WRITE1('~')
- NEXT(2, 1)
- continue;
- }
- else if (c2 == '{' && state->i == 0)
- state->i = 1; /* set GB */
- else if (c2 == '}' && state->i == 1)
- state->i = 0; /* set ASCII */
- else if (c2 == '\n')
- ; /* line-continuation */
- else
- return 2;
- NEXT(2, 0);
- continue;
- }
-
- if (c & 0x80)
- return 1;
-
- if (state->i == 0) { /* ASCII mode */
- WRITE1(c)
- NEXT(1, 1)
- }
- else { /* GB mode */
- REQUIRE_INBUF(2)
- REQUIRE_OUTBUF(1)
- TRYMAP_DEC(gb2312, **outbuf, c, IN2) {
- NEXT(2, 1)
- }
- else
- return 2;
- }
- }
-
- return 0;
+ while (inleft > 0) {
+ unsigned char c = IN1;
+
+ if (c == '~') {
+ unsigned char c2 = IN2;
+
+ REQUIRE_INBUF(2)
+ if (c2 == '~') {
+ WRITE1('~')
+ NEXT(2, 1)
+ continue;
+ }
+ else if (c2 == '{' && state->i == 0)
+ state->i = 1; /* set GB */
+ else if (c2 == '}' && state->i == 1)
+ state->i = 0; /* set ASCII */
+ else if (c2 == '\n')
+ ; /* line-continuation */
+ else
+ return 2;
+ NEXT(2, 0);
+ continue;
+ }
+
+ if (c & 0x80)
+ return 1;
+
+ if (state->i == 0) { /* ASCII mode */
+ WRITE1(c)
+ NEXT(1, 1)
+ }
+ else { /* GB mode */
+ REQUIRE_INBUF(2)
+ REQUIRE_OUTBUF(1)
+ TRYMAP_DEC(gb2312, **outbuf, c, IN2) {
+ NEXT(2, 1)
+ }
+ else
+ return 2;
+ }
+ }
+
+ return 0;
}
diff --git a/Modules/cjkcodecs/_codecs_hk.c b/Modules/cjkcodecs/_codecs_hk.c
index 4bbd622..aaf103d 100644
--- a/Modules/cjkcodecs/_codecs_hk.c
+++ b/Modules/cjkcodecs/_codecs_hk.c
@@ -18,12 +18,12 @@ static const decode_map *big5_decmap = NULL;
CODEC_INIT(big5hkscs)
{
- static int initialized = 0;
+ static int initialized = 0;
- if (!initialized && IMPORT_MAP(tw, big5, &big5_encmap, &big5_decmap))
- return -1;
- initialized = 1;
- return 0;
+ if (!initialized && IMPORT_MAP(tw, big5, &big5_encmap, &big5_decmap))
+ return -1;
+ initialized = 1;
+ return 0;
}
/*
@@ -38,135 +38,135 @@ static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5
ENCODER(big5hkscs)
{
- while (inleft > 0) {
- ucs4_t c = **inbuf;
- DBCHAR code;
- Py_ssize_t insize;
-
- if (c < 0x80) {
- REQUIRE_OUTBUF(1)
- **outbuf = (unsigned char)c;
- NEXT(1, 1)
- continue;
- }
-
- DECODE_SURROGATE(c)
- insize = GET_INSIZE(c);
-
- REQUIRE_OUTBUF(2)
-
- if (c < 0x10000) {
- TRYMAP_ENC(big5hkscs_bmp, code, c) {
- if (code == MULTIC) {
- if (inleft >= 2 &&
- ((c & 0xffdf) == 0x00ca) &&
- (((*inbuf)[1] & 0xfff7) == 0x0304)) {
- code = big5hkscs_pairenc_table[
- ((c >> 4) |
- ((*inbuf)[1] >> 3)) & 3];
- insize = 2;
- }
- else if (inleft < 2 &&
- !(flags & MBENC_FLUSH))
- return MBERR_TOOFEW;
- else {
- if (c == 0xca)
- code = 0x8866;
- else /* c == 0xea */
- code = 0x88a7;
- }
- }
- }
- else TRYMAP_ENC(big5, code, c);
- else return 1;
- }
- else if (c < 0x20000)
- return insize;
- else if (c < 0x30000) {
- TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff);
- else return insize;
- }
- else
- return insize;
-
- OUT1(code >> 8)
- OUT2(code & 0xFF)
- NEXT(insize, 2)
- }
-
- return 0;
+ while (inleft > 0) {
+ ucs4_t c = **inbuf;
+ DBCHAR code;
+ Py_ssize_t insize;
+
+ if (c < 0x80) {
+ REQUIRE_OUTBUF(1)
+ **outbuf = (unsigned char)c;
+ NEXT(1, 1)
+ continue;
+ }
+
+ DECODE_SURROGATE(c)
+ insize = GET_INSIZE(c);
+
+ REQUIRE_OUTBUF(2)
+
+ if (c < 0x10000) {
+ TRYMAP_ENC(big5hkscs_bmp, code, c) {
+ if (code == MULTIC) {
+ if (inleft >= 2 &&
+ ((c & 0xffdf) == 0x00ca) &&
+ (((*inbuf)[1] & 0xfff7) == 0x0304)) {
+ code = big5hkscs_pairenc_table[
+ ((c >> 4) |
+ ((*inbuf)[1] >> 3)) & 3];
+ insize = 2;
+ }
+ else if (inleft < 2 &&
+ !(flags & MBENC_FLUSH))
+ return MBERR_TOOFEW;
+ else {
+ if (c == 0xca)
+ code = 0x8866;
+ else /* c == 0xea */
+ code = 0x88a7;
+ }
+ }
+ }
+ else TRYMAP_ENC(big5, code, c);
+ else return 1;
+ }
+ else if (c < 0x20000)
+ return insize;
+ else if (c < 0x30000) {
+ TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff);
+ else return insize;
+ }
+ else
+ return insize;
+
+ OUT1(code >> 8)
+ OUT2(code & 0xFF)
+ NEXT(insize, 2)
+ }
+
+ return 0;
}
#define BH2S(c1, c2) (((c1) - 0x87) * (0xfe - 0x40 + 1) + ((c2) - 0x40))
DECODER(big5hkscs)
{
- while (inleft > 0) {
- unsigned char c = IN1;
- ucs4_t decoded;
-
- REQUIRE_OUTBUF(1)
-
- if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
- continue;
- }
-
- REQUIRE_INBUF(2)
-
- if (0xc6 <= c && c <= 0xc8 && (c >= 0xc7 || IN2 >= 0xa1))
- goto hkscsdec;
-
- TRYMAP_DEC(big5, **outbuf, c, IN2) {
- NEXT(2, 1)
- }
- else
-hkscsdec: TRYMAP_DEC(big5hkscs, decoded, c, IN2) {
- int s = BH2S(c, IN2);
- const unsigned char *hintbase;
-
- assert(0x87 <= c && c <= 0xfe);
- assert(0x40 <= IN2 && IN2 <= 0xfe);
-
- if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
- hintbase = big5hkscs_phint_0;
- s -= BH2S(0x87, 0x40);
- }
- else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
- hintbase = big5hkscs_phint_12130;
- s -= BH2S(0xc6, 0xa1);
- }
- else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
- hintbase = big5hkscs_phint_21924;
- s -= BH2S(0xf9, 0xd6);
- }
- else
- return MBERR_INTERNAL;
-
- if (hintbase[s >> 3] & (1 << (s & 7))) {
- WRITEUCS4(decoded | 0x20000)
- NEXT_IN(2)
- }
- else {
- OUT1(decoded)
- NEXT(2, 1)
- }
- }
- else {
- switch ((c << 8) | IN2) {
- case 0x8862: WRITE2(0x00ca, 0x0304); break;
- case 0x8864: WRITE2(0x00ca, 0x030c); break;
- case 0x88a3: WRITE2(0x00ea, 0x0304); break;
- case 0x88a5: WRITE2(0x00ea, 0x030c); break;
- default: return 2;
- }
-
- NEXT(2, 2) /* all decoded codepoints are pairs, above. */
- }
- }
-
- return 0;
+ while (inleft > 0) {
+ unsigned char c = IN1;
+ ucs4_t decoded;
+
+ REQUIRE_OUTBUF(1)
+
+ if (c < 0x80) {
+ OUT1(c)
+ NEXT(1, 1)
+ continue;
+ }
+
+ REQUIRE_INBUF(2)
+
+ if (0xc6 <= c && c <= 0xc8 && (c >= 0xc7 || IN2 >= 0xa1))
+ goto hkscsdec;
+
+ TRYMAP_DEC(big5, **outbuf, c, IN2) {
+ NEXT(2, 1)
+ }
+ else
+hkscsdec: TRYMAP_DEC(big5hkscs, decoded, c, IN2) {
+ int s = BH2S(c, IN2);
+ const unsigned char *hintbase;
+
+ assert(0x87 <= c && c <= 0xfe);
+ assert(0x40 <= IN2 && IN2 <= 0xfe);
+
+ if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
+ hintbase = big5hkscs_phint_0;
+ s -= BH2S(0x87, 0x40);
+ }
+ else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
+ hintbase = big5hkscs_phint_12130;
+ s -= BH2S(0xc6, 0xa1);
+ }
+ else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
+ hintbase = big5hkscs_phint_21924;
+ s -= BH2S(0xf9, 0xd6);
+ }
+ else
+ return MBERR_INTERNAL;
+
+ if (hintbase[s >> 3] & (1 << (s & 7))) {
+ WRITEUCS4(decoded | 0x20000)
+ NEXT_IN(2)
+ }
+ else {
+ OUT1(decoded)
+ NEXT(2, 1)
+ }
+ }
+ else {
+ switch ((c << 8) | IN2) {
+ case 0x8862: WRITE2(0x00ca, 0x0304); break;
+ case 0x8864: WRITE2(0x00ca, 0x030c); break;
+ case 0x88a3: WRITE2(0x00ea, 0x0304); break;
+ case 0x88a5: WRITE2(0x00ea, 0x030c); break;
+ default: return 2;
+ }
+
+ NEXT(2, 2) /* all decoded codepoints are pairs, above. */
+ }
+ }
+
+ return 0;
}
diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c
index 9ce7b75..25c1a36 100644
--- a/Modules/cjkcodecs/_codecs_iso2022.c
+++ b/Modules/cjkcodecs/_codecs_iso2022.c
@@ -19,85 +19,85 @@
state->c[0-3]
- 00000000
- ||^^^^^|
- |+-----+---- G0-3 Character Set
- +----------- Is G0-3 double byte?
+ 00000000
+ ||^^^^^|
+ |+-----+---- G0-3 Character Set
+ +----------- Is G0-3 double byte?
state->c[4]
- 00000000
- ||
- |+---- Locked-Shift?
- +----- ESC Throughout
+ 00000000
+ ||
+ |+---- Locked-Shift?
+ +----- ESC Throughout
*/
-#define ESC 0x1B
-#define SO 0x0E
-#define SI 0x0F
-#define LF 0x0A
-
-#define MAX_ESCSEQLEN 16
-
-#define CHARSET_ISO8859_1 'A'
-#define CHARSET_ASCII 'B'
-#define CHARSET_ISO8859_7 'F'
-#define CHARSET_JISX0201_K 'I'
-#define CHARSET_JISX0201_R 'J'
-
-#define CHARSET_GB2312 ('A'|CHARSET_DBCS)
-#define CHARSET_JISX0208 ('B'|CHARSET_DBCS)
-#define CHARSET_KSX1001 ('C'|CHARSET_DBCS)
-#define CHARSET_JISX0212 ('D'|CHARSET_DBCS)
-#define CHARSET_GB2312_8565 ('E'|CHARSET_DBCS)
-#define CHARSET_CNS11643_1 ('G'|CHARSET_DBCS)
-#define CHARSET_CNS11643_2 ('H'|CHARSET_DBCS)
-#define CHARSET_JISX0213_2000_1 ('O'|CHARSET_DBCS)
-#define CHARSET_JISX0213_2 ('P'|CHARSET_DBCS)
-#define CHARSET_JISX0213_2004_1 ('Q'|CHARSET_DBCS)
-#define CHARSET_JISX0208_O ('@'|CHARSET_DBCS)
-
-#define CHARSET_DBCS 0x80
-#define ESCMARK(mark) ((mark) & 0x7f)
-
-#define IS_ESCEND(c) (((c) >= 'A' && (c) <= 'Z') || (c) == '@')
+#define ESC 0x1B
+#define SO 0x0E
+#define SI 0x0F
+#define LF 0x0A
+
+#define MAX_ESCSEQLEN 16
+
+#define CHARSET_ISO8859_1 'A'
+#define CHARSET_ASCII 'B'
+#define CHARSET_ISO8859_7 'F'
+#define CHARSET_JISX0201_K 'I'
+#define CHARSET_JISX0201_R 'J'
+
+#define CHARSET_GB2312 ('A'|CHARSET_DBCS)
+#define CHARSET_JISX0208 ('B'|CHARSET_DBCS)
+#define CHARSET_KSX1001 ('C'|CHARSET_DBCS)
+#define CHARSET_JISX0212 ('D'|CHARSET_DBCS)
+#define CHARSET_GB2312_8565 ('E'|CHARSET_DBCS)
+#define CHARSET_CNS11643_1 ('G'|CHARSET_DBCS)
+#define CHARSET_CNS11643_2 ('H'|CHARSET_DBCS)
+#define CHARSET_JISX0213_2000_1 ('O'|CHARSET_DBCS)
+#define CHARSET_JISX0213_2 ('P'|CHARSET_DBCS)
+#define CHARSET_JISX0213_2004_1 ('Q'|CHARSET_DBCS)
+#define CHARSET_JISX0208_O ('@'|CHARSET_DBCS)
+
+#define CHARSET_DBCS 0x80
+#define ESCMARK(mark) ((mark) & 0x7f)
+
+#define IS_ESCEND(c) (((c) >= 'A' && (c) <= 'Z') || (c) == '@')
#define IS_ISO2022ESC(c2) \
- ((c2) == '(' || (c2) == ')' || (c2) == '$' || \
- (c2) == '.' || (c2) == '&')
- /* this is not a complete list of ISO-2022 escape sequence headers.
- * but, it's enough to implement CJK instances of iso-2022. */
-
-#define MAP_UNMAPPABLE 0xFFFF
-#define MAP_MULTIPLE_AVAIL 0xFFFE /* for JIS X 0213 */
-
-#define F_SHIFTED 0x01
-#define F_ESCTHROUGHOUT 0x02
-
-#define STATE_SETG(dn, v) ((state)->c[dn]) = (v);
-#define STATE_GETG(dn) ((state)->c[dn])
-
-#define STATE_G0 STATE_GETG(0)
-#define STATE_G1 STATE_GETG(1)
-#define STATE_G2 STATE_GETG(2)
-#define STATE_G3 STATE_GETG(3)
-#define STATE_SETG0(v) STATE_SETG(0, v)
-#define STATE_SETG1(v) STATE_SETG(1, v)
-#define STATE_SETG2(v) STATE_SETG(2, v)
-#define STATE_SETG3(v) STATE_SETG(3, v)
-
-#define STATE_SETFLAG(f) ((state)->c[4]) |= (f);
-#define STATE_GETFLAG(f) ((state)->c[4] & (f))
-#define STATE_CLEARFLAG(f) ((state)->c[4]) &= ~(f);
-#define STATE_CLEARFLAGS() ((state)->c[4]) = 0;
-
-#define ISO2022_CONFIG ((const struct iso2022_config *)config)
-#define CONFIG_ISSET(flag) (ISO2022_CONFIG->flags & (flag))
-#define CONFIG_DESIGNATIONS (ISO2022_CONFIG->designations)
+ ((c2) == '(' || (c2) == ')' || (c2) == '$' || \
+ (c2) == '.' || (c2) == '&')
+ /* this is not a complete list of ISO-2022 escape sequence headers.
+ * but, it's enough to implement CJK instances of iso-2022. */
+
+#define MAP_UNMAPPABLE 0xFFFF
+#define MAP_MULTIPLE_AVAIL 0xFFFE /* for JIS X 0213 */
+
+#define F_SHIFTED 0x01
+#define F_ESCTHROUGHOUT 0x02
+
+#define STATE_SETG(dn, v) ((state)->c[dn]) = (v);
+#define STATE_GETG(dn) ((state)->c[dn])
+
+#define STATE_G0 STATE_GETG(0)
+#define STATE_G1 STATE_GETG(1)
+#define STATE_G2 STATE_GETG(2)
+#define STATE_G3 STATE_GETG(3)
+#define STATE_SETG0(v) STATE_SETG(0, v)
+#define STATE_SETG1(v) STATE_SETG(1, v)
+#define STATE_SETG2(v) STATE_SETG(2, v)
+#define STATE_SETG3(v) STATE_SETG(3, v)
+
+#define STATE_SETFLAG(f) ((state)->c[4]) |= (f);
+#define STATE_GETFLAG(f) ((state)->c[4] & (f))
+#define STATE_CLEARFLAG(f) ((state)->c[4]) &= ~(f);
+#define STATE_CLEARFLAGS() ((state)->c[4]) = 0;
+
+#define ISO2022_CONFIG ((const struct iso2022_config *)config)
+#define CONFIG_ISSET(flag) (ISO2022_CONFIG->flags & (flag))
+#define CONFIG_DESIGNATIONS (ISO2022_CONFIG->designations)
/* iso2022_config.flags */
-#define NO_SHIFT 0x01
-#define USE_G2 0x02
-#define USE_JISX0208_EXT 0x04
+#define NO_SHIFT 0x01
+#define USE_G2 0x02
+#define USE_JISX0208_EXT 0x04
/*-*- internal data structures -*-*/
@@ -106,434 +106,434 @@ typedef ucs4_t (*iso2022_decode_func)(const unsigned char *data);
typedef DBCHAR (*iso2022_encode_func)(const ucs4_t *data, Py_ssize_t *length);
struct iso2022_designation {
- unsigned char mark;
- unsigned char plane;
- unsigned char width;
- iso2022_init_func initializer;
- iso2022_decode_func decoder;
- iso2022_encode_func encoder;
+ unsigned char mark;
+ unsigned char plane;
+ unsigned char width;
+ iso2022_init_func initializer;
+ iso2022_decode_func decoder;
+ iso2022_encode_func encoder;
};
struct iso2022_config {
- int flags;
- const struct iso2022_designation *designations; /* non-ascii desigs */
+ int flags;
+ const struct iso2022_designation *designations; /* non-ascii desigs */
};
/*-*- iso-2022 codec implementation -*-*/
CODEC_INIT(iso2022)
{
- const struct iso2022_designation *desig = CONFIG_DESIGNATIONS;
- for (desig = CONFIG_DESIGNATIONS; desig->mark; desig++)
- if (desig->initializer != NULL && desig->initializer() != 0)
- return -1;
- return 0;
+ const struct iso2022_designation *desig = CONFIG_DESIGNATIONS;
+ for (desig = CONFIG_DESIGNATIONS; desig->mark; desig++)
+ if (desig->initializer != NULL && desig->initializer() != 0)
+ return -1;
+ return 0;
}
ENCODER_INIT(iso2022)
{
- STATE_CLEARFLAGS()
- STATE_SETG0(CHARSET_ASCII)
- STATE_SETG1(CHARSET_ASCII)
- return 0;
+ STATE_CLEARFLAGS()
+ STATE_SETG0(CHARSET_ASCII)
+ STATE_SETG1(CHARSET_ASCII)
+ return 0;
}
ENCODER_RESET(iso2022)
{
- if (STATE_GETFLAG(F_SHIFTED)) {
- WRITE1(SI)
- NEXT_OUT(1)
- STATE_CLEARFLAG(F_SHIFTED)
- }
- if (STATE_G0 != CHARSET_ASCII) {
- WRITE3(ESC, '(', 'B')
- NEXT_OUT(3)
- STATE_SETG0(CHARSET_ASCII)
- }
- return 0;
+ if (STATE_GETFLAG(F_SHIFTED)) {
+ WRITE1(SI)
+ NEXT_OUT(1)
+ STATE_CLEARFLAG(F_SHIFTED)
+ }
+ if (STATE_G0 != CHARSET_ASCII) {
+ WRITE3(ESC, '(', 'B')
+ NEXT_OUT(3)
+ STATE_SETG0(CHARSET_ASCII)
+ }
+ return 0;
}
ENCODER(iso2022)
{
- while (inleft > 0) {
- const struct iso2022_designation *dsg;
- DBCHAR encoded;
- ucs4_t c = **inbuf;
- Py_ssize_t insize;
-
- if (c < 0x80) {
- if (STATE_G0 != CHARSET_ASCII) {
- WRITE3(ESC, '(', 'B')
- STATE_SETG0(CHARSET_ASCII)
- NEXT_OUT(3)
- }
- if (STATE_GETFLAG(F_SHIFTED)) {
- WRITE1(SI)
- STATE_CLEARFLAG(F_SHIFTED)
- NEXT_OUT(1)
- }
- WRITE1((unsigned char)c)
- NEXT(1, 1)
- continue;
- }
-
- DECODE_SURROGATE(c)
- insize = GET_INSIZE(c);
-
- encoded = MAP_UNMAPPABLE;
- for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) {
- Py_ssize_t length = 1;
- encoded = dsg->encoder(&c, &length);
- if (encoded == MAP_MULTIPLE_AVAIL) {
- /* this implementation won't work for pair
- * of non-bmp characters. */
- if (inleft < 2) {
- if (!(flags & MBENC_FLUSH))
- return MBERR_TOOFEW;
- length = -1;
- }
- else
- length = 2;
+ while (inleft > 0) {
+ const struct iso2022_designation *dsg;
+ DBCHAR encoded;
+ ucs4_t c = **inbuf;
+ Py_ssize_t insize;
+
+ if (c < 0x80) {
+ if (STATE_G0 != CHARSET_ASCII) {
+ WRITE3(ESC, '(', 'B')
+ STATE_SETG0(CHARSET_ASCII)
+ NEXT_OUT(3)
+ }
+ if (STATE_GETFLAG(F_SHIFTED)) {
+ WRITE1(SI)
+ STATE_CLEARFLAG(F_SHIFTED)
+ NEXT_OUT(1)
+ }
+ WRITE1((unsigned char)c)
+ NEXT(1, 1)
+ continue;
+ }
+
+ DECODE_SURROGATE(c)
+ insize = GET_INSIZE(c);
+
+ encoded = MAP_UNMAPPABLE;
+ for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) {
+ Py_ssize_t length = 1;
+ encoded = dsg->encoder(&c, &length);
+ if (encoded == MAP_MULTIPLE_AVAIL) {
+ /* this implementation won't work for pair
+ * of non-bmp characters. */
+ if (inleft < 2) {
+ if (!(flags & MBENC_FLUSH))
+ return MBERR_TOOFEW;
+ length = -1;
+ }
+ else
+ length = 2;
#if Py_UNICODE_SIZE == 2
- if (length == 2) {
- ucs4_t u4in[2];
- u4in[0] = (ucs4_t)IN1;
- u4in[1] = (ucs4_t)IN2;
- encoded = dsg->encoder(u4in, &length);
- } else
- encoded = dsg->encoder(&c, &length);
+ if (length == 2) {
+ ucs4_t u4in[2];
+ u4in[0] = (ucs4_t)IN1;
+ u4in[1] = (ucs4_t)IN2;
+ encoded = dsg->encoder(u4in, &length);
+ } else
+ encoded = dsg->encoder(&c, &length);
#else
- encoded = dsg->encoder(&c, &length);
+ encoded = dsg->encoder(&c, &length);
#endif
- if (encoded != MAP_UNMAPPABLE) {
- insize = length;
- break;
- }
- }
- else if (encoded != MAP_UNMAPPABLE)
- break;
- }
-
- if (!dsg->mark)
- return 1;
- assert(dsg->width == 1 || dsg->width == 2);
-
- switch (dsg->plane) {
- case 0: /* G0 */
- if (STATE_GETFLAG(F_SHIFTED)) {
- WRITE1(SI)
- STATE_CLEARFLAG(F_SHIFTED)
- NEXT_OUT(1)
- }
- if (STATE_G0 != dsg->mark) {
- if (dsg->width == 1) {
- WRITE3(ESC, '(', ESCMARK(dsg->mark))
- STATE_SETG0(dsg->mark)
- NEXT_OUT(3)
- }
- else if (dsg->mark == CHARSET_JISX0208) {
- WRITE3(ESC, '$', ESCMARK(dsg->mark))
- STATE_SETG0(dsg->mark)
- NEXT_OUT(3)
- }
- else {
- WRITE4(ESC, '$', '(',
- ESCMARK(dsg->mark))
- STATE_SETG0(dsg->mark)
- NEXT_OUT(4)
- }
- }
- break;
- case 1: /* G1 */
- if (STATE_G1 != dsg->mark) {
- if (dsg->width == 1) {
- WRITE3(ESC, ')', ESCMARK(dsg->mark))
- STATE_SETG1(dsg->mark)
- NEXT_OUT(3)
- }
- else {
- WRITE4(ESC, '$', ')',
- ESCMARK(dsg->mark))
- STATE_SETG1(dsg->mark)
- NEXT_OUT(4)
- }
- }
- if (!STATE_GETFLAG(F_SHIFTED)) {
- WRITE1(SO)
- STATE_SETFLAG(F_SHIFTED)
- NEXT_OUT(1)
- }
- break;
- default: /* G2 and G3 is not supported: no encoding in
- * CJKCodecs are using them yet */
- return MBERR_INTERNAL;
- }
-
- if (dsg->width == 1) {
- WRITE1((unsigned char)encoded)
- NEXT_OUT(1)
- }
- else {
- WRITE2(encoded >> 8, encoded & 0xff)
- NEXT_OUT(2)
- }
- NEXT_IN(insize)
- }
-
- return 0;
+ if (encoded != MAP_UNMAPPABLE) {
+ insize = length;
+ break;
+ }
+ }
+ else if (encoded != MAP_UNMAPPABLE)
+ break;
+ }
+
+ if (!dsg->mark)
+ return 1;
+ assert(dsg->width == 1 || dsg->width == 2);
+
+ switch (dsg->plane) {
+ case 0: /* G0 */
+ if (STATE_GETFLAG(F_SHIFTED)) {
+ WRITE1(SI)
+ STATE_CLEARFLAG(F_SHIFTED)
+ NEXT_OUT(1)
+ }
+ if (STATE_G0 != dsg->mark) {
+ if (dsg->width == 1) {
+ WRITE3(ESC, '(', ESCMARK(dsg->mark))
+ STATE_SETG0(dsg->mark)
+ NEXT_OUT(3)
+ }
+ else if (dsg->mark == CHARSET_JISX0208) {
+ WRITE3(ESC, '$', ESCMARK(dsg->mark))
+ STATE_SETG0(dsg->mark)
+ NEXT_OUT(3)
+ }
+ else {
+ WRITE4(ESC, '$', '(',
+ ESCMARK(dsg->mark))
+ STATE_SETG0(dsg->mark)
+ NEXT_OUT(4)
+ }
+ }
+ break;
+ case 1: /* G1 */
+ if (STATE_G1 != dsg->mark) {
+ if (dsg->width == 1) {
+ WRITE3(ESC, ')', ESCMARK(dsg->mark))
+ STATE_SETG1(dsg->mark)
+ NEXT_OUT(3)
+ }
+ else {
+ WRITE4(ESC, '$', ')',
+ ESCMARK(dsg->mark))
+ STATE_SETG1(dsg->mark)
+ NEXT_OUT(4)
+ }
+ }
+ if (!STATE_GETFLAG(F_SHIFTED)) {
+ WRITE1(SO)
+ STATE_SETFLAG(F_SHIFTED)
+ NEXT_OUT(1)
+ }
+ break;
+ default: /* G2 and G3 is not supported: no encoding in
+ * CJKCodecs are using them yet */
+ return MBERR_INTERNAL;
+ }
+
+ if (dsg->width == 1) {
+ WRITE1((unsigned char)encoded)
+ NEXT_OUT(1)
+ }
+ else {
+ WRITE2(encoded >> 8, encoded & 0xff)
+ NEXT_OUT(2)
+ }
+ NEXT_IN(insize)
+ }
+
+ return 0;
}
DECODER_INIT(iso2022)
{
- STATE_CLEARFLAGS()
- STATE_SETG0(CHARSET_ASCII)
- STATE_SETG1(CHARSET_ASCII)
- STATE_SETG2(CHARSET_ASCII)
- return 0;
+ STATE_CLEARFLAGS()
+ STATE_SETG0(CHARSET_ASCII)
+ STATE_SETG1(CHARSET_ASCII)
+ STATE_SETG2(CHARSET_ASCII)
+ return 0;
}
DECODER_RESET(iso2022)
{
- STATE_SETG0(CHARSET_ASCII)
- STATE_CLEARFLAG(F_SHIFTED)
- return 0;
+ STATE_SETG0(CHARSET_ASCII)
+ STATE_CLEARFLAG(F_SHIFTED)
+ return 0;
}
static Py_ssize_t
iso2022processesc(const void *config, MultibyteCodec_State *state,
- const unsigned char **inbuf, Py_ssize_t *inleft)
+ const unsigned char **inbuf, Py_ssize_t *inleft)
{
- unsigned char charset, designation;
- Py_ssize_t i, esclen;
-
- for (i = 1;i < MAX_ESCSEQLEN;i++) {
- if (i >= *inleft)
- return MBERR_TOOFEW;
- if (IS_ESCEND((*inbuf)[i])) {
- esclen = i + 1;
- break;
- }
- else if (CONFIG_ISSET(USE_JISX0208_EXT) && i+1 < *inleft &&
- (*inbuf)[i] == '&' && (*inbuf)[i+1] == '@')
- i += 2;
- }
-
- if (i >= MAX_ESCSEQLEN)
- return 1; /* unterminated escape sequence */
-
- switch (esclen) {
- case 3:
- if (IN2 == '$') {
- charset = IN3 | CHARSET_DBCS;
- designation = 0;
- }
- else {
- charset = IN3;
- if (IN2 == '(') designation = 0;
- else if (IN2 == ')') designation = 1;
- else if (CONFIG_ISSET(USE_G2) && IN2 == '.')
- designation = 2;
- else return 3;
- }
- break;
- case 4:
- if (IN2 != '$')
- return 4;
-
- charset = IN4 | CHARSET_DBCS;
- if (IN3 == '(') designation = 0;
- else if (IN3 == ')') designation = 1;
- else return 4;
- break;
- case 6: /* designation with prefix */
- if (CONFIG_ISSET(USE_JISX0208_EXT) &&
- (*inbuf)[3] == ESC && (*inbuf)[4] == '$' &&
- (*inbuf)[5] == 'B') {
- charset = 'B' | CHARSET_DBCS;
- designation = 0;
- }
- else
- return 6;
- break;
- default:
- return esclen;
- }
-
- /* raise error when the charset is not designated for this encoding */
- if (charset != CHARSET_ASCII) {
- const struct iso2022_designation *dsg;
-
- for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++)
- if (dsg->mark == charset)
- break;
- if (!dsg->mark)
- return esclen;
- }
-
- STATE_SETG(designation, charset)
- *inleft -= esclen;
- (*inbuf) += esclen;
- return 0;
+ unsigned char charset, designation;
+ Py_ssize_t i, esclen;
+
+ for (i = 1;i < MAX_ESCSEQLEN;i++) {
+ if (i >= *inleft)
+ return MBERR_TOOFEW;
+ if (IS_ESCEND((*inbuf)[i])) {
+ esclen = i + 1;
+ break;
+ }
+ else if (CONFIG_ISSET(USE_JISX0208_EXT) && i+1 < *inleft &&
+ (*inbuf)[i] == '&' && (*inbuf)[i+1] == '@')
+ i += 2;
+ }
+
+ if (i >= MAX_ESCSEQLEN)
+ return 1; /* unterminated escape sequence */
+
+ switch (esclen) {
+ case 3:
+ if (IN2 == '$') {
+ charset = IN3 | CHARSET_DBCS;
+ designation = 0;
+ }
+ else {
+ charset = IN3;
+ if (IN2 == '(') designation = 0;
+ else if (IN2 == ')') designation = 1;
+ else if (CONFIG_ISSET(USE_G2) && IN2 == '.')
+ designation = 2;
+ else return 3;
+ }
+ break;
+ case 4:
+ if (IN2 != '$')
+ return 4;
+
+ charset = IN4 | CHARSET_DBCS;
+ if (IN3 == '(') designation = 0;
+ else if (IN3 == ')') designation = 1;
+ else return 4;
+ break;
+ case 6: /* designation with prefix */
+ if (CONFIG_ISSET(USE_JISX0208_EXT) &&
+ (*inbuf)[3] == ESC && (*inbuf)[4] == '$' &&
+ (*inbuf)[5] == 'B') {
+ charset = 'B' | CHARSET_DBCS;
+ designation = 0;
+ }
+ else
+ return 6;
+ break;
+ default:
+ return esclen;
+ }
+
+ /* raise error when the charset is not designated for this encoding */
+ if (charset != CHARSET_ASCII) {
+ const struct iso2022_designation *dsg;
+
+ for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++)
+ if (dsg->mark == charset)
+ break;
+ if (!dsg->mark)
+ return esclen;
+ }
+
+ STATE_SETG(designation, charset)
+ *inleft -= esclen;
+ (*inbuf) += esclen;
+ return 0;
}
-#define ISO8859_7_DECODE(c, assi) \
- if ((c) < 0xa0) (assi) = (c); \
- else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \
- (assi) = (c); \
- else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \
- (0xbffffd77L & (1L << ((c)-0xb4))))) \
- (assi) = 0x02d0 + (c); \
- else if ((c) == 0xa1) (assi) = 0x2018; \
- else if ((c) == 0xa2) (assi) = 0x2019; \
- else if ((c) == 0xaf) (assi) = 0x2015;
+#define ISO8859_7_DECODE(c, assi) \
+ if ((c) < 0xa0) (assi) = (c); \
+ else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \
+ (assi) = (c); \
+ else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \
+ (0xbffffd77L & (1L << ((c)-0xb4))))) \
+ (assi) = 0x02d0 + (c); \
+ else if ((c) == 0xa1) (assi) = 0x2018; \
+ else if ((c) == 0xa2) (assi) = 0x2019; \
+ else if ((c) == 0xaf) (assi) = 0x2015;
static Py_ssize_t
iso2022processg2(const void *config, MultibyteCodec_State *state,
- const unsigned char **inbuf, Py_ssize_t *inleft,
- Py_UNICODE **outbuf, Py_ssize_t *outleft)
+ const unsigned char **inbuf, Py_ssize_t *inleft,
+ Py_UNICODE **outbuf, Py_ssize_t *outleft)
{
- /* not written to use encoder, decoder functions because only few
- * encodings use G2 designations in CJKCodecs */
- if (STATE_G2 == CHARSET_ISO8859_1) {
- if (IN3 < 0x80)
- OUT1(IN3 + 0x80)
- else
- return 3;
- }
- else if (STATE_G2 == CHARSET_ISO8859_7) {
- ISO8859_7_DECODE(IN3 ^ 0x80, **outbuf)
- else return 3;
- }
- else if (STATE_G2 == CHARSET_ASCII) {
- if (IN3 & 0x80) return 3;
- else **outbuf = IN3;
- }
- else
- return MBERR_INTERNAL;
-
- (*inbuf) += 3;
- *inleft -= 3;
- (*outbuf) += 1;
- *outleft -= 1;
- return 0;
+ /* not written to use encoder, decoder functions because only few
+ * encodings use G2 designations in CJKCodecs */
+ if (STATE_G2 == CHARSET_ISO8859_1) {
+ if (IN3 < 0x80)
+ OUT1(IN3 + 0x80)
+ else
+ return 3;
+ }
+ else if (STATE_G2 == CHARSET_ISO8859_7) {
+ ISO8859_7_DECODE(IN3 ^ 0x80, **outbuf)
+ else return 3;
+ }
+ else if (STATE_G2 == CHARSET_ASCII) {
+ if (IN3 & 0x80) return 3;
+ else **outbuf = IN3;
+ }
+ else
+ return MBERR_INTERNAL;
+
+ (*inbuf) += 3;
+ *inleft -= 3;
+ (*outbuf) += 1;
+ *outleft -= 1;
+ return 0;
}
DECODER(iso2022)
{
- const struct iso2022_designation *dsgcache = NULL;
-
- while (inleft > 0) {
- unsigned char c = IN1;
- Py_ssize_t err;
-
- if (STATE_GETFLAG(F_ESCTHROUGHOUT)) {
- /* ESC throughout mode:
- * for non-iso2022 escape sequences */
- WRITE1(c) /* assume as ISO-8859-1 */
- NEXT(1, 1)
- if (IS_ESCEND(c)) {
- STATE_CLEARFLAG(F_ESCTHROUGHOUT)
- }
- continue;
- }
-
- switch (c) {
- case ESC:
- REQUIRE_INBUF(2)
- if (IS_ISO2022ESC(IN2)) {
- err = iso2022processesc(config, state,
- inbuf, &inleft);
- if (err != 0)
- return err;
- }
- else if (CONFIG_ISSET(USE_G2) && IN2 == 'N') {/* SS2 */
- REQUIRE_INBUF(3)
- err = iso2022processg2(config, state,
- inbuf, &inleft, outbuf, &outleft);
- if (err != 0)
- return err;
- }
- else {
- WRITE1(ESC)
- STATE_SETFLAG(F_ESCTHROUGHOUT)
- NEXT(1, 1)
- }
- break;
- case SI:
- if (CONFIG_ISSET(NO_SHIFT))
- goto bypass;
- STATE_CLEARFLAG(F_SHIFTED)
- NEXT_IN(1)
- break;
- case SO:
- if (CONFIG_ISSET(NO_SHIFT))
- goto bypass;
- STATE_SETFLAG(F_SHIFTED)
- NEXT_IN(1)
- break;
- case LF:
- STATE_CLEARFLAG(F_SHIFTED)
- WRITE1(LF)
- NEXT(1, 1)
- break;
- default:
- if (c < 0x20) /* C0 */
- goto bypass;
- else if (c >= 0x80)
- return 1;
- else {
- const struct iso2022_designation *dsg;
- unsigned char charset;
- ucs4_t decoded;
-
- if (STATE_GETFLAG(F_SHIFTED))
- charset = STATE_G1;
- else
- charset = STATE_G0;
-
- if (charset == CHARSET_ASCII) {
-bypass: WRITE1(c)
- NEXT(1, 1)
- break;
- }
-
- if (dsgcache != NULL &&
- dsgcache->mark == charset)
- dsg = dsgcache;
- else {
- for (dsg = CONFIG_DESIGNATIONS;
- dsg->mark != charset
+ const struct iso2022_designation *dsgcache = NULL;
+
+ while (inleft > 0) {
+ unsigned char c = IN1;
+ Py_ssize_t err;
+
+ if (STATE_GETFLAG(F_ESCTHROUGHOUT)) {
+ /* ESC throughout mode:
+ * for non-iso2022 escape sequences */
+ WRITE1(c) /* assume as ISO-8859-1 */
+ NEXT(1, 1)
+ if (IS_ESCEND(c)) {
+ STATE_CLEARFLAG(F_ESCTHROUGHOUT)
+ }
+ continue;
+ }
+
+ switch (c) {
+ case ESC:
+ REQUIRE_INBUF(2)
+ if (IS_ISO2022ESC(IN2)) {
+ err = iso2022processesc(config, state,
+ inbuf, &inleft);
+ if (err != 0)
+ return err;
+ }
+ else if (CONFIG_ISSET(USE_G2) && IN2 == 'N') {/* SS2 */
+ REQUIRE_INBUF(3)
+ err = iso2022processg2(config, state,
+ inbuf, &inleft, outbuf, &outleft);
+ if (err != 0)
+ return err;
+ }
+ else {
+ WRITE1(ESC)
+ STATE_SETFLAG(F_ESCTHROUGHOUT)
+ NEXT(1, 1)
+ }
+ break;
+ case SI:
+ if (CONFIG_ISSET(NO_SHIFT))
+ goto bypass;
+ STATE_CLEARFLAG(F_SHIFTED)
+ NEXT_IN(1)
+ break;
+ case SO:
+ if (CONFIG_ISSET(NO_SHIFT))
+ goto bypass;
+ STATE_SETFLAG(F_SHIFTED)
+ NEXT_IN(1)
+ break;
+ case LF:
+ STATE_CLEARFLAG(F_SHIFTED)
+ WRITE1(LF)
+ NEXT(1, 1)
+ break;
+ default:
+ if (c < 0x20) /* C0 */
+ goto bypass;
+ else if (c >= 0x80)
+ return 1;
+ else {
+ const struct iso2022_designation *dsg;
+ unsigned char charset;
+ ucs4_t decoded;
+
+ if (STATE_GETFLAG(F_SHIFTED))
+ charset = STATE_G1;
+ else
+ charset = STATE_G0;
+
+ if (charset == CHARSET_ASCII) {
+bypass: WRITE1(c)
+ NEXT(1, 1)
+ break;
+ }
+
+ if (dsgcache != NULL &&
+ dsgcache->mark == charset)
+ dsg = dsgcache;
+ else {
+ for (dsg = CONFIG_DESIGNATIONS;
+ dsg->mark != charset
#ifdef Py_DEBUG
- && dsg->mark != '\0'
+ && dsg->mark != '\0'
#endif
- ;dsg++)
- /* noop */;
- assert(dsg->mark != '\0');
- dsgcache = dsg;
- }
-
- REQUIRE_INBUF(dsg->width)
- decoded = dsg->decoder(*inbuf);
- if (decoded == MAP_UNMAPPABLE)
- return dsg->width;
-
- if (decoded < 0x10000) {
- WRITE1(decoded)
- NEXT_OUT(1)
- }
- else if (decoded < 0x30000) {
- WRITEUCS4(decoded)
- }
- else { /* JIS X 0213 pairs */
- WRITE2(decoded >> 16, decoded & 0xffff)
- NEXT_OUT(2)
- }
- NEXT_IN(dsg->width)
- }
- break;
- }
- }
- return 0;
+ ;dsg++)
+ /* noop */;
+ assert(dsg->mark != '\0');
+ dsgcache = dsg;
+ }
+
+ REQUIRE_INBUF(dsg->width)
+ decoded = dsg->decoder(*inbuf);
+ if (decoded == MAP_UNMAPPABLE)
+ return dsg->width;
+
+ if (decoded < 0x10000) {
+ WRITE1(decoded)
+ NEXT_OUT(1)
+ }
+ else if (decoded < 0x30000) {
+ WRITEUCS4(decoded)
+ }
+ else { /* JIS X 0213 pairs */
+ WRITE2(decoded >> 16, decoded & 0xffff)
+ NEXT_OUT(2)
+ }
+ NEXT_IN(dsg->width)
+ }
+ break;
+ }
+ }
+ return 0;
}
/*-*- mapping table holders -*-*/
@@ -567,542 +567,542 @@ DECMAP(gb2312)
static int
ksx1001_init(void)
{
- static int initialized = 0;
-
- if (!initialized && (
- IMPORT_MAP(kr, cp949, &cp949_encmap, NULL) ||
- IMPORT_MAP(kr, ksx1001, NULL, &ksx1001_decmap)))
- return -1;
- initialized = 1;
- return 0;
+ static int initialized = 0;
+
+ if (!initialized && (
+ IMPORT_MAP(kr, cp949, &cp949_encmap, NULL) ||
+ IMPORT_MAP(kr, ksx1001, NULL, &ksx1001_decmap)))
+ return -1;
+ initialized = 1;
+ return 0;
}
static ucs4_t
ksx1001_decoder(const unsigned char *data)
{
- ucs4_t u;
- TRYMAP_DEC(ksx1001, u, data[0], data[1])
- return u;
- else
- return MAP_UNMAPPABLE;
+ ucs4_t u;
+ TRYMAP_DEC(ksx1001, u, data[0], data[1])
+ return u;
+ else
+ return MAP_UNMAPPABLE;
}
static DBCHAR
ksx1001_encoder(const ucs4_t *data, Py_ssize_t *length)
{
- DBCHAR coded;
- assert(*length == 1);
- if (*data < 0x10000) {
- TRYMAP_ENC(cp949, coded, *data)
- if (!(coded & 0x8000))
- return coded;
- }
- return MAP_UNMAPPABLE;
+ DBCHAR coded;
+ assert(*length == 1);
+ if (*data < 0x10000) {
+ TRYMAP_ENC(cp949, coded, *data)
+ if (!(coded & 0x8000))
+ return coded;
+ }
+ return MAP_UNMAPPABLE;
}
static int
jisx0208_init(void)
{
- static int initialized = 0;
-
- if (!initialized && (
- IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL) ||
- IMPORT_MAP(jp, jisx0208, NULL, &jisx0208_decmap)))
- return -1;
- initialized = 1;
- return 0;
+ static int initialized = 0;
+
+ if (!initialized && (
+ IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL) ||
+ IMPORT_MAP(jp, jisx0208, NULL, &jisx0208_decmap)))
+ return -1;
+ initialized = 1;
+ return 0;
}
static ucs4_t
jisx0208_decoder(const unsigned char *data)
{
- ucs4_t u;
- if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
- return 0xff3c;
- else TRYMAP_DEC(jisx0208, u, data[0], data[1])
- return u;
- else
- return MAP_UNMAPPABLE;
+ ucs4_t u;
+ if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
+ return 0xff3c;
+ else TRYMAP_DEC(jisx0208, u, data[0], data[1])
+ return u;
+ else
+ return MAP_UNMAPPABLE;
}
static DBCHAR
jisx0208_encoder(const ucs4_t *data, Py_ssize_t *length)
{
- DBCHAR coded;
- assert(*length == 1);
- if (*data < 0x10000) {
- if (*data == 0xff3c) /* F/W REVERSE SOLIDUS */
- return 0x2140;
- else TRYMAP_ENC(jisxcommon, coded, *data) {
- if (!(coded & 0x8000))
- return coded;
- }
- }
- return MAP_UNMAPPABLE;
+ DBCHAR coded;
+ assert(*length == 1);
+ if (*data < 0x10000) {
+ if (*data == 0xff3c) /* F/W REVERSE SOLIDUS */
+ return 0x2140;
+ else TRYMAP_ENC(jisxcommon, coded, *data) {
+ if (!(coded & 0x8000))
+ return coded;
+ }
+ }
+ return MAP_UNMAPPABLE;
}
static int
jisx0212_init(void)
{
- static int initialized = 0;
-
- if (!initialized && (
- IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL) ||
- IMPORT_MAP(jp, jisx0212, NULL, &jisx0212_decmap)))
- return -1;
- initialized = 1;
- return 0;
+ static int initialized = 0;
+
+ if (!initialized && (
+ IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL) ||
+ IMPORT_MAP(jp, jisx0212, NULL, &jisx0212_decmap)))
+ return -1;
+ initialized = 1;
+ return 0;
}
static ucs4_t
jisx0212_decoder(const unsigned char *data)
{
- ucs4_t u;
- TRYMAP_DEC(jisx0212, u, data[0], data[1])
- return u;
- else
- return MAP_UNMAPPABLE;
+ ucs4_t u;
+ TRYMAP_DEC(jisx0212, u, data[0], data[1])
+ return u;
+ else
+ return MAP_UNMAPPABLE;
}
static DBCHAR
jisx0212_encoder(const ucs4_t *data, Py_ssize_t *length)
{
- DBCHAR coded;
- assert(*length == 1);
- if (*data < 0x10000) {
- TRYMAP_ENC(jisxcommon, coded, *data) {
- if (coded & 0x8000)
- return coded & 0x7fff;
- }
- }
- return MAP_UNMAPPABLE;
+ DBCHAR coded;
+ assert(*length == 1);
+ if (*data < 0x10000) {
+ TRYMAP_ENC(jisxcommon, coded, *data) {
+ if (coded & 0x8000)
+ return coded & 0x7fff;
+ }
+ }
+ return MAP_UNMAPPABLE;
}
static int
jisx0213_init(void)
{
- static int initialized = 0;
-
- if (!initialized && (
- jisx0208_init() ||
- IMPORT_MAP(jp, jisx0213_bmp,
- &jisx0213_bmp_encmap, NULL) ||
- IMPORT_MAP(jp, jisx0213_1_bmp,
- NULL, &jisx0213_1_bmp_decmap) ||
- IMPORT_MAP(jp, jisx0213_2_bmp,
- NULL, &jisx0213_2_bmp_decmap) ||
- IMPORT_MAP(jp, jisx0213_emp,
- &jisx0213_emp_encmap, NULL) ||
- IMPORT_MAP(jp, jisx0213_1_emp,
- NULL, &jisx0213_1_emp_decmap) ||
- IMPORT_MAP(jp, jisx0213_2_emp,
- NULL, &jisx0213_2_emp_decmap) ||
- IMPORT_MAP(jp, jisx0213_pair, &jisx0213_pair_encmap,
- &jisx0213_pair_decmap)))
- return -1;
- initialized = 1;
- return 0;
+ static int initialized = 0;
+
+ if (!initialized && (
+ jisx0208_init() ||
+ IMPORT_MAP(jp, jisx0213_bmp,
+ &jisx0213_bmp_encmap, NULL) ||
+ IMPORT_MAP(jp, jisx0213_1_bmp,
+ NULL, &jisx0213_1_bmp_decmap) ||
+ IMPORT_MAP(jp, jisx0213_2_bmp,
+ NULL, &jisx0213_2_bmp_decmap) ||
+ IMPORT_MAP(jp, jisx0213_emp,
+ &jisx0213_emp_encmap, NULL) ||
+ IMPORT_MAP(jp, jisx0213_1_emp,
+ NULL, &jisx0213_1_emp_decmap) ||
+ IMPORT_MAP(jp, jisx0213_2_emp,
+ NULL, &jisx0213_2_emp_decmap) ||
+ IMPORT_MAP(jp, jisx0213_pair, &jisx0213_pair_encmap,
+ &jisx0213_pair_decmap)))
+ return -1;
+ initialized = 1;
+ return 0;
}
#define config ((void *)2000)
static ucs4_t
jisx0213_2000_1_decoder(const unsigned char *data)
{
- ucs4_t u;
- EMULATE_JISX0213_2000_DECODE_PLANE1(u, data[0], data[1])
- else if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
- return 0xff3c;
- else TRYMAP_DEC(jisx0208, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])
- u |= 0x20000;
- else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]);
- else
- return MAP_UNMAPPABLE;
- return u;
+ ucs4_t u;
+ EMULATE_JISX0213_2000_DECODE_PLANE1(u, data[0], data[1])
+ else if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
+ return 0xff3c;
+ else TRYMAP_DEC(jisx0208, u, data[0], data[1]);
+ else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);
+ else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])
+ u |= 0x20000;
+ else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]);
+ else
+ return MAP_UNMAPPABLE;
+ return u;
}
static ucs4_t
jisx0213_2000_2_decoder(const unsigned char *data)
{
- ucs4_t u;
- EMULATE_JISX0213_2000_DECODE_PLANE2(u, data[0], data[1])
- TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])
- u |= 0x20000;
- else
- return MAP_UNMAPPABLE;
- return u;
+ ucs4_t u;
+ EMULATE_JISX0213_2000_DECODE_PLANE2(u, data[0], data[1])
+ TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);
+ else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])
+ u |= 0x20000;
+ else
+ return MAP_UNMAPPABLE;
+ return u;
}
#undef config
static ucs4_t
jisx0213_2004_1_decoder(const unsigned char *data)
{
- ucs4_t u;
- if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
- return 0xff3c;
- else TRYMAP_DEC(jisx0208, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])
- u |= 0x20000;
- else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]);
- else
- return MAP_UNMAPPABLE;
- return u;
+ ucs4_t u;
+ if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
+ return 0xff3c;
+ else TRYMAP_DEC(jisx0208, u, data[0], data[1]);
+ else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);
+ else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])
+ u |= 0x20000;
+ else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]);
+ else
+ return MAP_UNMAPPABLE;
+ return u;
}
static ucs4_t
jisx0213_2004_2_decoder(const unsigned char *data)
{
- ucs4_t u;
- TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])
- u |= 0x20000;
- else
- return MAP_UNMAPPABLE;
- return u;
+ ucs4_t u;
+ TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);
+ else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])
+ u |= 0x20000;
+ else
+ return MAP_UNMAPPABLE;
+ return u;
}
static DBCHAR
jisx0213_encoder(const ucs4_t *data, Py_ssize_t *length, void *config)
{
- DBCHAR coded;
-
- switch (*length) {
- case 1: /* first character */
- if (*data >= 0x10000) {
- if ((*data) >> 16 == 0x20000 >> 16) {
- EMULATE_JISX0213_2000_ENCODE_EMP(coded, *data)
- else TRYMAP_ENC(jisx0213_emp, coded,
- (*data) & 0xffff)
- return coded;
- }
- return MAP_UNMAPPABLE;
- }
-
- EMULATE_JISX0213_2000_ENCODE_BMP(coded, *data)
- else TRYMAP_ENC(jisx0213_bmp, coded, *data) {
- if (coded == MULTIC)
- return MAP_MULTIPLE_AVAIL;
- }
- else TRYMAP_ENC(jisxcommon, coded, *data) {
- if (coded & 0x8000)
- return MAP_UNMAPPABLE;
- }
- else
- return MAP_UNMAPPABLE;
- return coded;
- case 2: /* second character of unicode pair */
- coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1],
- jisx0213_pair_encmap, JISX0213_ENCPAIRS);
- if (coded == DBCINV) {
- *length = 1;
- coded = find_pairencmap((ucs2_t)data[0], 0,
- jisx0213_pair_encmap, JISX0213_ENCPAIRS);
- if (coded == DBCINV)
- return MAP_UNMAPPABLE;
- }
- else
- return coded;
- case -1: /* flush unterminated */
- *length = 1;
- coded = find_pairencmap((ucs2_t)data[0], 0,
- jisx0213_pair_encmap, JISX0213_ENCPAIRS);
- if (coded == DBCINV)
- return MAP_UNMAPPABLE;
- else
- return coded;
- default:
- return MAP_UNMAPPABLE;
- }
+ DBCHAR coded;
+
+ switch (*length) {
+ case 1: /* first character */
+ if (*data >= 0x10000) {
+ if ((*data) >> 16 == 0x20000 >> 16) {
+ EMULATE_JISX0213_2000_ENCODE_EMP(coded, *data)
+ else TRYMAP_ENC(jisx0213_emp, coded,
+ (*data) & 0xffff)
+ return coded;
+ }
+ return MAP_UNMAPPABLE;
+ }
+
+ EMULATE_JISX0213_2000_ENCODE_BMP(coded, *data)
+ else TRYMAP_ENC(jisx0213_bmp, coded, *data) {
+ if (coded == MULTIC)
+ return MAP_MULTIPLE_AVAIL;
+ }
+ else TRYMAP_ENC(jisxcommon, coded, *data) {
+ if (coded & 0x8000)
+ return MAP_UNMAPPABLE;
+ }
+ else
+ return MAP_UNMAPPABLE;
+ return coded;
+ case 2: /* second character of unicode pair */
+ coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1],
+ jisx0213_pair_encmap, JISX0213_ENCPAIRS);
+ if (coded == DBCINV) {
+ *length = 1;
+ coded = find_pairencmap((ucs2_t)data[0], 0,
+ jisx0213_pair_encmap, JISX0213_ENCPAIRS);
+ if (coded == DBCINV)
+ return MAP_UNMAPPABLE;
+ }
+ else
+ return coded;
+ case -1: /* flush unterminated */
+ *length = 1;
+ coded = find_pairencmap((ucs2_t)data[0], 0,
+ jisx0213_pair_encmap, JISX0213_ENCPAIRS);
+ if (coded == DBCINV)
+ return MAP_UNMAPPABLE;
+ else
+ return coded;
+ default:
+ return MAP_UNMAPPABLE;
+ }
}
static DBCHAR
jisx0213_2000_1_encoder(const ucs4_t *data, Py_ssize_t *length)
{
- DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
- if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
- return coded;
- else if (coded & 0x8000)
- return MAP_UNMAPPABLE;
- else
- return coded;
+ DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
+ if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
+ return coded;
+ else if (coded & 0x8000)
+ return MAP_UNMAPPABLE;
+ else
+ return coded;
}
static DBCHAR
jisx0213_2000_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)
{
- DBCHAR coded;
- Py_ssize_t ilength = *length;
-
- coded = jisx0213_encoder(data, length, (void *)2000);
- switch (ilength) {
- case 1:
- if (coded == MAP_MULTIPLE_AVAIL)
- return MAP_MULTIPLE_AVAIL;
- else
- return MAP_UNMAPPABLE;
- case 2:
- if (*length != 2)
- return MAP_UNMAPPABLE;
- else
- return coded;
- default:
- return MAP_UNMAPPABLE;
- }
+ DBCHAR coded;
+ Py_ssize_t ilength = *length;
+
+ coded = jisx0213_encoder(data, length, (void *)2000);
+ switch (ilength) {
+ case 1:
+ if (coded == MAP_MULTIPLE_AVAIL)
+ return MAP_MULTIPLE_AVAIL;
+ else
+ return MAP_UNMAPPABLE;
+ case 2:
+ if (*length != 2)
+ return MAP_UNMAPPABLE;
+ else
+ return coded;
+ default:
+ return MAP_UNMAPPABLE;
+ }
}
static DBCHAR
jisx0213_2000_2_encoder(const ucs4_t *data, Py_ssize_t *length)
{
- DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
- if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
- return coded;
- else if (coded & 0x8000)
- return coded & 0x7fff;
- else
- return MAP_UNMAPPABLE;
+ DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
+ if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
+ return coded;
+ else if (coded & 0x8000)
+ return coded & 0x7fff;
+ else
+ return MAP_UNMAPPABLE;
}
static DBCHAR
jisx0213_2004_1_encoder(const ucs4_t *data, Py_ssize_t *length)
{
- DBCHAR coded = jisx0213_encoder(data, length, NULL);
- if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
- return coded;
- else if (coded & 0x8000)
- return MAP_UNMAPPABLE;
- else
- return coded;
+ DBCHAR coded = jisx0213_encoder(data, length, NULL);
+ if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
+ return coded;
+ else if (coded & 0x8000)
+ return MAP_UNMAPPABLE;
+ else
+ return coded;
}
static DBCHAR
jisx0213_2004_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)
{
- DBCHAR coded;
- Py_ssize_t ilength = *length;
-
- coded = jisx0213_encoder(data, length, NULL);
- switch (ilength) {
- case 1:
- if (coded == MAP_MULTIPLE_AVAIL)
- return MAP_MULTIPLE_AVAIL;
- else
- return MAP_UNMAPPABLE;
- case 2:
- if (*length != 2)
- return MAP_UNMAPPABLE;
- else
- return coded;
- default:
- return MAP_UNMAPPABLE;
- }
+ DBCHAR coded;
+ Py_ssize_t ilength = *length;
+
+ coded = jisx0213_encoder(data, length, NULL);
+ switch (ilength) {
+ case 1:
+ if (coded == MAP_MULTIPLE_AVAIL)
+ return MAP_MULTIPLE_AVAIL;
+ else
+ return MAP_UNMAPPABLE;
+ case 2:
+ if (*length != 2)
+ return MAP_UNMAPPABLE;
+ else
+ return coded;
+ default:
+ return MAP_UNMAPPABLE;
+ }
}
static DBCHAR
jisx0213_2004_2_encoder(const ucs4_t *data, Py_ssize_t *length)
{
- DBCHAR coded = jisx0213_encoder(data, length, NULL);
- if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
- return coded;
- else if (coded & 0x8000)
- return coded & 0x7fff;
- else
- return MAP_UNMAPPABLE;
+ DBCHAR coded = jisx0213_encoder(data, length, NULL);
+ if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
+ return coded;
+ else if (coded & 0x8000)
+ return coded & 0x7fff;
+ else
+ return MAP_UNMAPPABLE;
}
static ucs4_t
jisx0201_r_decoder(const unsigned char *data)
{
- ucs4_t u;
- JISX0201_R_DECODE(*data, u)
- else return MAP_UNMAPPABLE;
- return u;
+ ucs4_t u;
+ JISX0201_R_DECODE(*data, u)
+ else return MAP_UNMAPPABLE;
+ return u;
}
static DBCHAR
jisx0201_r_encoder(const ucs4_t *data, Py_ssize_t *length)
{
- DBCHAR coded;
- JISX0201_R_ENCODE(*data, coded)
- else return MAP_UNMAPPABLE;
- return coded;
+ DBCHAR coded;
+ JISX0201_R_ENCODE(*data, coded)
+ else return MAP_UNMAPPABLE;
+ return coded;
}
static ucs4_t
jisx0201_k_decoder(const unsigned char *data)
{
- ucs4_t u;
- JISX0201_K_DECODE(*data ^ 0x80, u)
- else return MAP_UNMAPPABLE;
- return u;
+ ucs4_t u;
+ JISX0201_K_DECODE(*data ^ 0x80, u)
+ else return MAP_UNMAPPABLE;
+ return u;
}
static DBCHAR
jisx0201_k_encoder(const ucs4_t *data, Py_ssize_t *length)
{
- DBCHAR coded;
- JISX0201_K_ENCODE(*data, coded)
- else return MAP_UNMAPPABLE;
- return coded - 0x80;
+ DBCHAR coded;
+ JISX0201_K_ENCODE(*data, coded)
+ else return MAP_UNMAPPABLE;
+ return coded - 0x80;
}
static int
gb2312_init(void)
{
- static int initialized = 0;
-
- if (!initialized && (
- IMPORT_MAP(cn, gbcommon, &gbcommon_encmap, NULL) ||
- IMPORT_MAP(cn, gb2312, NULL, &gb2312_decmap)))
- return -1;
- initialized = 1;
- return 0;
+ static int initialized = 0;
+
+ if (!initialized && (
+ IMPORT_MAP(cn, gbcommon, &gbcommon_encmap, NULL) ||
+ IMPORT_MAP(cn, gb2312, NULL, &gb2312_decmap)))
+ return -1;
+ initialized = 1;
+ return 0;
}
static ucs4_t
gb2312_decoder(const unsigned char *data)
{
- ucs4_t u;
- TRYMAP_DEC(gb2312, u, data[0], data[1])
- return u;
- else
- return MAP_UNMAPPABLE;
+ ucs4_t u;
+ TRYMAP_DEC(gb2312, u, data[0], data[1])
+ return u;
+ else
+ return MAP_UNMAPPABLE;
}
static DBCHAR
gb2312_encoder(const ucs4_t *data, Py_ssize_t *length)
{
- DBCHAR coded;
- assert(*length == 1);
- if (*data < 0x10000) {
- TRYMAP_ENC(gbcommon, coded, *data) {
- if (!(coded & 0x8000))
- return coded;
- }
- }
- return MAP_UNMAPPABLE;
+ DBCHAR coded;
+ assert(*length == 1);
+ if (*data < 0x10000) {
+ TRYMAP_ENC(gbcommon, coded, *data) {
+ if (!(coded & 0x8000))
+ return coded;
+ }
+ }
+ return MAP_UNMAPPABLE;
}
static ucs4_t
dummy_decoder(const unsigned char *data)
{
- return MAP_UNMAPPABLE;
+ return MAP_UNMAPPABLE;
}
static DBCHAR
dummy_encoder(const ucs4_t *data, Py_ssize_t *length)
{
- return MAP_UNMAPPABLE;
+ return MAP_UNMAPPABLE;
}
/*-*- registry tables -*-*/
-#define REGISTRY_KSX1001_G0 { CHARSET_KSX1001, 0, 2, \
- ksx1001_init, \
- ksx1001_decoder, ksx1001_encoder }
-#define REGISTRY_KSX1001_G1 { CHARSET_KSX1001, 1, 2, \
- ksx1001_init, \
- ksx1001_decoder, ksx1001_encoder }
-#define REGISTRY_JISX0201_R { CHARSET_JISX0201_R, 0, 1, \
- NULL, \
- jisx0201_r_decoder, jisx0201_r_encoder }
-#define REGISTRY_JISX0201_K { CHARSET_JISX0201_K, 0, 1, \
- NULL, \
- jisx0201_k_decoder, jisx0201_k_encoder }
-#define REGISTRY_JISX0208 { CHARSET_JISX0208, 0, 2, \
- jisx0208_init, \
- jisx0208_decoder, jisx0208_encoder }
-#define REGISTRY_JISX0208_O { CHARSET_JISX0208_O, 0, 2, \
- jisx0208_init, \
- jisx0208_decoder, jisx0208_encoder }
-#define REGISTRY_JISX0212 { CHARSET_JISX0212, 0, 2, \
- jisx0212_init, \
- jisx0212_decoder, jisx0212_encoder }
-#define REGISTRY_JISX0213_2000_1 { CHARSET_JISX0213_2000_1, 0, 2, \
- jisx0213_init, \
- jisx0213_2000_1_decoder, \
- jisx0213_2000_1_encoder }
+#define REGISTRY_KSX1001_G0 { CHARSET_KSX1001, 0, 2, \
+ ksx1001_init, \
+ ksx1001_decoder, ksx1001_encoder }
+#define REGISTRY_KSX1001_G1 { CHARSET_KSX1001, 1, 2, \
+ ksx1001_init, \
+ ksx1001_decoder, ksx1001_encoder }
+#define REGISTRY_JISX0201_R { CHARSET_JISX0201_R, 0, 1, \
+ NULL, \
+ jisx0201_r_decoder, jisx0201_r_encoder }
+#define REGISTRY_JISX0201_K { CHARSET_JISX0201_K, 0, 1, \
+ NULL, \
+ jisx0201_k_decoder, jisx0201_k_encoder }
+#define REGISTRY_JISX0208 { CHARSET_JISX0208, 0, 2, \
+ jisx0208_init, \
+ jisx0208_decoder, jisx0208_encoder }
+#define REGISTRY_JISX0208_O { CHARSET_JISX0208_O, 0, 2, \
+ jisx0208_init, \
+ jisx0208_decoder, jisx0208_encoder }
+#define REGISTRY_JISX0212 { CHARSET_JISX0212, 0, 2, \
+ jisx0212_init, \
+ jisx0212_decoder, jisx0212_encoder }
+#define REGISTRY_JISX0213_2000_1 { CHARSET_JISX0213_2000_1, 0, 2, \
+ jisx0213_init, \
+ jisx0213_2000_1_decoder, \
+ jisx0213_2000_1_encoder }
#define REGISTRY_JISX0213_2000_1_PAIRONLY { CHARSET_JISX0213_2000_1, 0, 2, \
- jisx0213_init, \
- jisx0213_2000_1_decoder, \
- jisx0213_2000_1_encoder_paironly }
-#define REGISTRY_JISX0213_2000_2 { CHARSET_JISX0213_2, 0, 2, \
- jisx0213_init, \
- jisx0213_2000_2_decoder, \
- jisx0213_2000_2_encoder }
-#define REGISTRY_JISX0213_2004_1 { CHARSET_JISX0213_2004_1, 0, 2, \
- jisx0213_init, \
- jisx0213_2004_1_decoder, \
- jisx0213_2004_1_encoder }
+ jisx0213_init, \
+ jisx0213_2000_1_decoder, \
+ jisx0213_2000_1_encoder_paironly }
+#define REGISTRY_JISX0213_2000_2 { CHARSET_JISX0213_2, 0, 2, \
+ jisx0213_init, \
+ jisx0213_2000_2_decoder, \
+ jisx0213_2000_2_encoder }
+#define REGISTRY_JISX0213_2004_1 { CHARSET_JISX0213_2004_1, 0, 2, \
+ jisx0213_init, \
+ jisx0213_2004_1_decoder, \
+ jisx0213_2004_1_encoder }
#define REGISTRY_JISX0213_2004_1_PAIRONLY { CHARSET_JISX0213_2004_1, 0, 2, \
- jisx0213_init, \
- jisx0213_2004_1_decoder, \
- jisx0213_2004_1_encoder_paironly }
-#define REGISTRY_JISX0213_2004_2 { CHARSET_JISX0213_2, 0, 2, \
- jisx0213_init, \
- jisx0213_2004_2_decoder, \
- jisx0213_2004_2_encoder }
-#define REGISTRY_GB2312 { CHARSET_GB2312, 0, 2, \
- gb2312_init, \
- gb2312_decoder, gb2312_encoder }
-#define REGISTRY_CNS11643_1 { CHARSET_CNS11643_1, 1, 2, \
- cns11643_init, \
- cns11643_1_decoder, cns11643_1_encoder }
-#define REGISTRY_CNS11643_2 { CHARSET_CNS11643_2, 2, 2, \
- cns11643_init, \
- cns11643_2_decoder, cns11643_2_encoder }
-#define REGISTRY_ISO8859_1 { CHARSET_ISO8859_1, 2, 1, \
- NULL, dummy_decoder, dummy_encoder }
-#define REGISTRY_ISO8859_7 { CHARSET_ISO8859_7, 2, 1, \
- NULL, dummy_decoder, dummy_encoder }
-#define REGISTRY_SENTINEL { 0, }
-#define CONFIGDEF(var, attrs) \
- static const struct iso2022_config iso2022_##var##_config = { \
- attrs, iso2022_##var##_designations \
- };
+ jisx0213_init, \
+ jisx0213_2004_1_decoder, \
+ jisx0213_2004_1_encoder_paironly }
+#define REGISTRY_JISX0213_2004_2 { CHARSET_JISX0213_2, 0, 2, \
+ jisx0213_init, \
+ jisx0213_2004_2_decoder, \
+ jisx0213_2004_2_encoder }
+#define REGISTRY_GB2312 { CHARSET_GB2312, 0, 2, \
+ gb2312_init, \
+ gb2312_decoder, gb2312_encoder }
+#define REGISTRY_CNS11643_1 { CHARSET_CNS11643_1, 1, 2, \
+ cns11643_init, \
+ cns11643_1_decoder, cns11643_1_encoder }
+#define REGISTRY_CNS11643_2 { CHARSET_CNS11643_2, 2, 2, \
+ cns11643_init, \
+ cns11643_2_decoder, cns11643_2_encoder }
+#define REGISTRY_ISO8859_1 { CHARSET_ISO8859_1, 2, 1, \
+ NULL, dummy_decoder, dummy_encoder }
+#define REGISTRY_ISO8859_7 { CHARSET_ISO8859_7, 2, 1, \
+ NULL, dummy_decoder, dummy_encoder }
+#define REGISTRY_SENTINEL { 0, }
+#define CONFIGDEF(var, attrs) \
+ static const struct iso2022_config iso2022_##var##_config = { \
+ attrs, iso2022_##var##_designations \
+ };
static const struct iso2022_designation iso2022_kr_designations[] = {
- REGISTRY_KSX1001_G1, REGISTRY_SENTINEL
+ REGISTRY_KSX1001_G1, REGISTRY_SENTINEL
};
CONFIGDEF(kr, 0)
static const struct iso2022_designation iso2022_jp_designations[] = {
- REGISTRY_JISX0208, REGISTRY_JISX0201_R, REGISTRY_JISX0208_O,
- REGISTRY_SENTINEL
+ REGISTRY_JISX0208, REGISTRY_JISX0201_R, REGISTRY_JISX0208_O,
+ REGISTRY_SENTINEL
};
CONFIGDEF(jp, NO_SHIFT | USE_JISX0208_EXT)
static const struct iso2022_designation iso2022_jp_1_designations[] = {
- REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_JISX0201_R,
- REGISTRY_JISX0208_O, REGISTRY_SENTINEL
+ REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_JISX0201_R,
+ REGISTRY_JISX0208_O, REGISTRY_SENTINEL
};
CONFIGDEF(jp_1, NO_SHIFT | USE_JISX0208_EXT)
static const struct iso2022_designation iso2022_jp_2_designations[] = {
- REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_KSX1001_G0,
- REGISTRY_GB2312, REGISTRY_JISX0201_R, REGISTRY_JISX0208_O,
- REGISTRY_ISO8859_1, REGISTRY_ISO8859_7, REGISTRY_SENTINEL
+ REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_KSX1001_G0,
+ REGISTRY_GB2312, REGISTRY_JISX0201_R, REGISTRY_JISX0208_O,
+ REGISTRY_ISO8859_1, REGISTRY_ISO8859_7, REGISTRY_SENTINEL
};
CONFIGDEF(jp_2, NO_SHIFT | USE_G2 | USE_JISX0208_EXT)
static const struct iso2022_designation iso2022_jp_2004_designations[] = {
- REGISTRY_JISX0213_2004_1_PAIRONLY, REGISTRY_JISX0208,
- REGISTRY_JISX0213_2004_1, REGISTRY_JISX0213_2004_2, REGISTRY_SENTINEL
+ REGISTRY_JISX0213_2004_1_PAIRONLY, REGISTRY_JISX0208,
+ REGISTRY_JISX0213_2004_1, REGISTRY_JISX0213_2004_2, REGISTRY_SENTINEL
};
CONFIGDEF(jp_2004, NO_SHIFT | USE_JISX0208_EXT)
static const struct iso2022_designation iso2022_jp_3_designations[] = {
- REGISTRY_JISX0213_2000_1_PAIRONLY, REGISTRY_JISX0208,
- REGISTRY_JISX0213_2000_1, REGISTRY_JISX0213_2000_2, REGISTRY_SENTINEL
+ REGISTRY_JISX0213_2000_1_PAIRONLY, REGISTRY_JISX0208,
+ REGISTRY_JISX0213_2000_1, REGISTRY_JISX0213_2000_2, REGISTRY_SENTINEL
};
CONFIGDEF(jp_3, NO_SHIFT | USE_JISX0208_EXT)
static const struct iso2022_designation iso2022_jp_ext_designations[] = {
- REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_JISX0201_R,
- REGISTRY_JISX0201_K, REGISTRY_JISX0208_O, REGISTRY_SENTINEL
+ REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_JISX0201_R,
+ REGISTRY_JISX0201_K, REGISTRY_JISX0208_O, REGISTRY_SENTINEL
};
CONFIGDEF(jp_ext, NO_SHIFT | USE_JISX0208_EXT)
@@ -1111,11 +1111,11 @@ BEGIN_MAPPINGS_LIST
/* no mapping table here */
END_MAPPINGS_LIST
-#define ISO2022_CODEC(variation) { \
- "iso2022_" #variation, \
- &iso2022_##variation##_config, \
- iso2022_codec_init, \
- _STATEFUL_METHODS(iso2022) \
+#define ISO2022_CODEC(variation) { \
+ "iso2022_" #variation, \
+ &iso2022_##variation##_config, \
+ iso2022_codec_init, \
+ _STATEFUL_METHODS(iso2022) \
},
BEGIN_CODECS_LIST
diff --git a/Modules/cjkcodecs/_codecs_jp.c b/Modules/cjkcodecs/_codecs_jp.c
index f49a10b..901d3be 100644
--- a/Modules/cjkcodecs/_codecs_jp.c
+++ b/Modules/cjkcodecs/_codecs_jp.c
@@ -19,124 +19,124 @@
ENCODER(cp932)
{
- while (inleft > 0) {
- Py_UNICODE c = IN1;
- DBCHAR code;
- unsigned char c1, c2;
-
- if (c <= 0x80) {
- WRITE1((unsigned char)c)
- NEXT(1, 1)
- continue;
- }
- else if (c >= 0xff61 && c <= 0xff9f) {
- WRITE1(c - 0xfec0)
- NEXT(1, 1)
- continue;
- }
- else if (c >= 0xf8f0 && c <= 0xf8f3) {
- /* Windows compatibility */
- REQUIRE_OUTBUF(1)
- if (c == 0xf8f0)
- OUT1(0xa0)
- else
- OUT1(c - 0xfef1 + 0xfd)
- NEXT(1, 1)
- continue;
- }
-
- UCS4INVALID(c)
- REQUIRE_OUTBUF(2)
-
- TRYMAP_ENC(cp932ext, code, c) {
- OUT1(code >> 8)
- OUT2(code & 0xff)
- }
- else TRYMAP_ENC(jisxcommon, code, c) {
- if (code & 0x8000) /* MSB set: JIS X 0212 */
- return 1;
-
- /* JIS X 0208 */
- c1 = code >> 8;
- c2 = code & 0xff;
- c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
- c1 = (c1 - 0x21) >> 1;
- OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
- OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
- }
- else if (c >= 0xe000 && c < 0xe758) {
- /* User-defined area */
- c1 = (Py_UNICODE)(c - 0xe000) / 188;
- c2 = (Py_UNICODE)(c - 0xe000) % 188;
- OUT1(c1 + 0xf0)
- OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
- }
- else
- return 1;
-
- NEXT(1, 2)
- }
-
- return 0;
+ while (inleft > 0) {
+ Py_UNICODE c = IN1;
+ DBCHAR code;
+ unsigned char c1, c2;
+
+ if (c <= 0x80) {
+ WRITE1((unsigned char)c)
+ NEXT(1, 1)
+ continue;
+ }
+ else if (c >= 0xff61 && c <= 0xff9f) {
+ WRITE1(c - 0xfec0)
+ NEXT(1, 1)
+ continue;
+ }
+ else if (c >= 0xf8f0 && c <= 0xf8f3) {
+ /* Windows compatibility */
+ REQUIRE_OUTBUF(1)
+ if (c == 0xf8f0)
+ OUT1(0xa0)
+ else
+ OUT1(c - 0xfef1 + 0xfd)
+ NEXT(1, 1)
+ continue;
+ }
+
+ UCS4INVALID(c)
+ REQUIRE_OUTBUF(2)
+
+ TRYMAP_ENC(cp932ext, code, c) {
+ OUT1(code >> 8)
+ OUT2(code & 0xff)
+ }
+ else TRYMAP_ENC(jisxcommon, code, c) {
+ if (code & 0x8000) /* MSB set: JIS X 0212 */
+ return 1;
+
+ /* JIS X 0208 */
+ c1 = code >> 8;
+ c2 = code & 0xff;
+ c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
+ c1 = (c1 - 0x21) >> 1;
+ OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
+ OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
+ }
+ else if (c >= 0xe000 && c < 0xe758) {
+ /* User-defined area */
+ c1 = (Py_UNICODE)(c - 0xe000) / 188;
+ c2 = (Py_UNICODE)(c - 0xe000) % 188;
+ OUT1(c1 + 0xf0)
+ OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
+ }
+ else
+ return 1;
+
+ NEXT(1, 2)
+ }
+
+ return 0;
}
DECODER(cp932)
{
- while (inleft > 0) {
- unsigned char c = IN1, c2;
-
- REQUIRE_OUTBUF(1)
- if (c <= 0x80) {
- OUT1(c)
- NEXT(1, 1)
- continue;
- }
- else if (c >= 0xa0 && c <= 0xdf) {
- if (c == 0xa0)
- OUT1(0xf8f0) /* half-width katakana */
- else
- OUT1(0xfec0 + c)
- NEXT(1, 1)
- continue;
- }
- else if (c >= 0xfd/* && c <= 0xff*/) {
- /* Windows compatibility */
- OUT1(0xf8f1 - 0xfd + c)
- NEXT(1, 1)
- continue;
- }
-
- REQUIRE_INBUF(2)
- c2 = IN2;
-
- TRYMAP_DEC(cp932ext, **outbuf, c, c2);
- else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
- if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
- return 2;
-
- c = (c < 0xe0 ? c - 0x81 : c - 0xc1);
- c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
- c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21);
- c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
-
- TRYMAP_DEC(jisx0208, **outbuf, c, c2);
- else return 2;
- }
- else if (c >= 0xf0 && c <= 0xf9) {
- if ((c2 >= 0x40 && c2 <= 0x7e) ||
- (c2 >= 0x80 && c2 <= 0xfc))
- OUT1(0xe000 + 188 * (c - 0xf0) +
- (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41))
- else
- return 2;
- }
- else
- return 2;
-
- NEXT(2, 1)
- }
-
- return 0;
+ while (inleft > 0) {
+ unsigned char c = IN1, c2;
+
+ REQUIRE_OUTBUF(1)
+ if (c <= 0x80) {
+ OUT1(c)
+ NEXT(1, 1)
+ continue;
+ }
+ else if (c >= 0xa0 && c <= 0xdf) {
+ if (c == 0xa0)
+ OUT1(0xf8f0) /* half-width katakana */
+ else
+ OUT1(0xfec0 + c)
+ NEXT(1, 1)
+ continue;
+ }
+ else if (c >= 0xfd/* && c <= 0xff*/) {
+ /* Windows compatibility */
+ OUT1(0xf8f1 - 0xfd + c)
+ NEXT(1, 1)
+ continue;
+ }
+
+ REQUIRE_INBUF(2)
+ c2 = IN2;
+
+ TRYMAP_DEC(cp932ext, **outbuf, c, c2);
+ else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
+ if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
+ return 2;
+
+ c = (c < 0xe0 ? c - 0x81 : c - 0xc1);
+ c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
+ c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21);
+ c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
+
+ TRYMAP_DEC(jisx0208, **outbuf, c, c2);
+ else return 2;
+ }
+ else if (c >= 0xf0 && c <= 0xf9) {
+ if ((c2 >= 0x40 && c2 <= 0x7e) ||
+ (c2 >= 0x80 && c2 <= 0xfc))
+ OUT1(0xe000 + 188 * (c - 0xf0) +
+ (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41))
+ else
+ return 2;
+ }
+ else
+ return 2;
+
+ NEXT(2, 1)
+ }
+
+ return 0;
}
@@ -146,166 +146,166 @@ DECODER(cp932)
ENCODER(euc_jis_2004)
{
- while (inleft > 0) {
- ucs4_t c = IN1;
- DBCHAR code;
- Py_ssize_t insize;
-
- if (c < 0x80) {
- WRITE1(c)
- NEXT(1, 1)
- continue;
- }
-
- DECODE_SURROGATE(c)
- insize = GET_INSIZE(c);
-
- if (c <= 0xFFFF) {
- EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
- else TRYMAP_ENC(jisx0213_bmp, code, c) {
- if (code == MULTIC) {
- if (inleft < 2) {
- if (flags & MBENC_FLUSH) {
- code = find_pairencmap(
- (ucs2_t)c, 0,
- jisx0213_pair_encmap,
- JISX0213_ENCPAIRS);
- if (code == DBCINV)
- return 1;
- }
- else
- return MBERR_TOOFEW;
- }
- else {
- code = find_pairencmap(
- (ucs2_t)c, (*inbuf)[1],
- jisx0213_pair_encmap,
- JISX0213_ENCPAIRS);
- if (code == DBCINV) {
- code = find_pairencmap(
- (ucs2_t)c, 0,
- jisx0213_pair_encmap,
- JISX0213_ENCPAIRS);
- if (code == DBCINV)
- return 1;
- } else
- insize = 2;
- }
- }
- }
- else TRYMAP_ENC(jisxcommon, code, c);
- else if (c >= 0xff61 && c <= 0xff9f) {
- /* JIS X 0201 half-width katakana */
- WRITE2(0x8e, c - 0xfec0)
- NEXT(1, 2)
- continue;
- }
- else if (c == 0xff3c)
- /* F/W REVERSE SOLIDUS (see NOTES) */
- code = 0x2140;
- else if (c == 0xff5e)
- /* F/W TILDE (see NOTES) */
- code = 0x2232;
- else
- return 1;
- }
- else if (c >> 16 == EMPBASE >> 16) {
- EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
- else TRYMAP_ENC(jisx0213_emp, code, c & 0xffff);
- else return insize;
- }
- else
- return insize;
-
- if (code & 0x8000) {
- /* Codeset 2 */
- WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
- NEXT(insize, 3)
- } else {
- /* Codeset 1 */
- WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
- NEXT(insize, 2)
- }
- }
-
- return 0;
+ while (inleft > 0) {
+ ucs4_t c = IN1;
+ DBCHAR code;
+ Py_ssize_t insize;
+
+ if (c < 0x80) {
+ WRITE1(c)
+ NEXT(1, 1)
+ continue;
+ }
+
+ DECODE_SURROGATE(c)
+ insize = GET_INSIZE(c);
+
+ if (c <= 0xFFFF) {
+ EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
+ else TRYMAP_ENC(jisx0213_bmp, code, c) {
+ if (code == MULTIC) {
+ if (inleft < 2) {
+ if (flags & MBENC_FLUSH) {
+ code = find_pairencmap(
+ (ucs2_t)c, 0,
+ jisx0213_pair_encmap,
+ JISX0213_ENCPAIRS);
+ if (code == DBCINV)
+ return 1;
+ }
+ else
+ return MBERR_TOOFEW;
+ }
+ else {
+ code = find_pairencmap(
+ (ucs2_t)c, (*inbuf)[1],
+ jisx0213_pair_encmap,
+ JISX0213_ENCPAIRS);
+ if (code == DBCINV) {
+ code = find_pairencmap(
+ (ucs2_t)c, 0,
+ jisx0213_pair_encmap,
+ JISX0213_ENCPAIRS);
+ if (code == DBCINV)
+ return 1;
+ } else
+ insize = 2;
+ }
+ }
+ }
+ else TRYMAP_ENC(jisxcommon, code, c);
+ else if (c >= 0xff61 && c <= 0xff9f) {
+ /* JIS X 0201 half-width katakana */
+ WRITE2(0x8e, c - 0xfec0)
+ NEXT(1, 2)
+ continue;
+ }
+ else if (c == 0xff3c)
+ /* F/W REVERSE SOLIDUS (see NOTES) */
+ code = 0x2140;
+ else if (c == 0xff5e)
+ /* F/W TILDE (see NOTES) */
+ code = 0x2232;
+ else
+ return 1;
+ }
+ else if (c >> 16 == EMPBASE >> 16) {
+ EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
+ else TRYMAP_ENC(jisx0213_emp, code, c & 0xffff);
+ else return insize;
+ }
+ else
+ return insize;
+
+ if (code & 0x8000) {
+ /* Codeset 2 */
+ WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
+ NEXT(insize, 3)
+ } else {
+ /* Codeset 1 */
+ WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
+ NEXT(insize, 2)
+ }
+ }
+
+ return 0;
}
DECODER(euc_jis_2004)
{
- while (inleft > 0) {
- unsigned char c = IN1;
- ucs4_t code;
-
- REQUIRE_OUTBUF(1)
-
- if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
- continue;
- }
-
- if (c == 0x8e) {
- /* JIS X 0201 half-width katakana */
- unsigned char c2;
-
- REQUIRE_INBUF(2)
- c2 = IN2;
- if (c2 >= 0xa1 && c2 <= 0xdf) {
- OUT1(0xfec0 + c2)
- NEXT(2, 1)
- }
- else
- return 2;
- }
- else if (c == 0x8f) {
- unsigned char c2, c3;
-
- REQUIRE_INBUF(3)
- c2 = IN2 ^ 0x80;
- c3 = IN3 ^ 0x80;
-
- /* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */
- EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf, c2, c3)
- else TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c2, c3) ;
- else TRYMAP_DEC(jisx0213_2_emp, code, c2, c3) {
- WRITEUCS4(EMPBASE | code)
- NEXT_IN(3)
- continue;
- }
- else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ;
- else return 3;
- NEXT(3, 1)
- }
- else {
- unsigned char c2;
-
- REQUIRE_INBUF(2)
- c ^= 0x80;
- c2 = IN2 ^ 0x80;
-
- /* JIS X 0213 Plane 1 */
- EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf, c, c2)
- else if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c;
- else if (c == 0x22 && c2 == 0x32) **outbuf = 0xff5e;
- else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
- else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2);
- else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) {
- WRITEUCS4(EMPBASE | code)
- NEXT_IN(2)
- continue;
- }
- else TRYMAP_DEC(jisx0213_pair, code, c, c2) {
- WRITE2(code >> 16, code & 0xffff)
- NEXT(2, 2)
- continue;
- }
- else return 2;
- NEXT(2, 1)
- }
- }
-
- return 0;
+ while (inleft > 0) {
+ unsigned char c = IN1;
+ ucs4_t code;
+
+ REQUIRE_OUTBUF(1)
+
+ if (c < 0x80) {
+ OUT1(c)
+ NEXT(1, 1)
+ continue;
+ }
+
+ if (c == 0x8e) {
+ /* JIS X 0201 half-width katakana */
+ unsigned char c2;
+
+ REQUIRE_INBUF(2)
+ c2 = IN2;
+ if (c2 >= 0xa1 && c2 <= 0xdf) {
+ OUT1(0xfec0 + c2)
+ NEXT(2, 1)
+ }
+ else
+ return 2;
+ }
+ else if (c == 0x8f) {
+ unsigned char c2, c3;
+
+ REQUIRE_INBUF(3)
+ c2 = IN2 ^ 0x80;
+ c3 = IN3 ^ 0x80;
+
+ /* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */
+ EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf, c2, c3)
+ else TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c2, c3) ;
+ else TRYMAP_DEC(jisx0213_2_emp, code, c2, c3) {
+ WRITEUCS4(EMPBASE | code)
+ NEXT_IN(3)
+ continue;
+ }
+ else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ;
+ else return 3;
+ NEXT(3, 1)
+ }
+ else {
+ unsigned char c2;
+
+ REQUIRE_INBUF(2)
+ c ^= 0x80;
+ c2 = IN2 ^ 0x80;
+
+ /* JIS X 0213 Plane 1 */
+ EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf, c, c2)
+ else if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c;
+ else if (c == 0x22 && c2 == 0x32) **outbuf = 0xff5e;
+ else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
+ else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2);
+ else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) {
+ WRITEUCS4(EMPBASE | code)
+ NEXT_IN(2)
+ continue;
+ }
+ else TRYMAP_DEC(jisx0213_pair, code, c, c2) {
+ WRITE2(code >> 16, code & 0xffff)
+ NEXT(2, 2)
+ continue;
+ }
+ else return 2;
+ NEXT(2, 1)
+ }
+ }
+
+ return 0;
}
@@ -315,114 +315,114 @@ DECODER(euc_jis_2004)
ENCODER(euc_jp)
{
- while (inleft > 0) {
- Py_UNICODE c = IN1;
- DBCHAR code;
-
- if (c < 0x80) {
- WRITE1((unsigned char)c)
- NEXT(1, 1)
- continue;
- }
-
- UCS4INVALID(c)
-
- TRYMAP_ENC(jisxcommon, code, c);
- else if (c >= 0xff61 && c <= 0xff9f) {
- /* JIS X 0201 half-width katakana */
- WRITE2(0x8e, c - 0xfec0)
- NEXT(1, 2)
- continue;
- }
+ while (inleft > 0) {
+ Py_UNICODE c = IN1;
+ DBCHAR code;
+
+ if (c < 0x80) {
+ WRITE1((unsigned char)c)
+ NEXT(1, 1)
+ continue;
+ }
+
+ UCS4INVALID(c)
+
+ TRYMAP_ENC(jisxcommon, code, c);
+ else if (c >= 0xff61 && c <= 0xff9f) {
+ /* JIS X 0201 half-width katakana */
+ WRITE2(0x8e, c - 0xfec0)
+ NEXT(1, 2)
+ continue;
+ }
#ifndef STRICT_BUILD
- else if (c == 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */
- code = 0x2140;
- else if (c == 0xa5) { /* YEN SIGN */
- WRITE1(0x5c);
- NEXT(1, 1)
- continue;
- } else if (c == 0x203e) { /* OVERLINE */
- WRITE1(0x7e);
- NEXT(1, 1)
- continue;
- }
+ else if (c == 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */
+ code = 0x2140;
+ else if (c == 0xa5) { /* YEN SIGN */
+ WRITE1(0x5c);
+ NEXT(1, 1)
+ continue;
+ } else if (c == 0x203e) { /* OVERLINE */
+ WRITE1(0x7e);
+ NEXT(1, 1)
+ continue;
+ }
#endif
- else
- return 1;
-
- if (code & 0x8000) {
- /* JIS X 0212 */
- WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
- NEXT(1, 3)
- } else {
- /* JIS X 0208 */
- WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
- NEXT(1, 2)
- }
- }
-
- return 0;
+ else
+ return 1;
+
+ if (code & 0x8000) {
+ /* JIS X 0212 */
+ WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
+ NEXT(1, 3)
+ } else {
+ /* JIS X 0208 */
+ WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
+ NEXT(1, 2)
+ }
+ }
+
+ return 0;
}
DECODER(euc_jp)
{
- while (inleft > 0) {
- unsigned char c = IN1;
-
- REQUIRE_OUTBUF(1)
-
- if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
- continue;
- }
-
- if (c == 0x8e) {
- /* JIS X 0201 half-width katakana */
- unsigned char c2;
-
- REQUIRE_INBUF(2)
- c2 = IN2;
- if (c2 >= 0xa1 && c2 <= 0xdf) {
- OUT1(0xfec0 + c2)
- NEXT(2, 1)
- }
- else
- return 2;
- }
- else if (c == 0x8f) {
- unsigned char c2, c3;
-
- REQUIRE_INBUF(3)
- c2 = IN2;
- c3 = IN3;
- /* JIS X 0212 */
- TRYMAP_DEC(jisx0212, **outbuf, c2 ^ 0x80, c3 ^ 0x80) {
- NEXT(3, 1)
- }
- else
- return 3;
- }
- else {
- unsigned char c2;
-
- REQUIRE_INBUF(2)
- c2 = IN2;
- /* JIS X 0208 */
+ while (inleft > 0) {
+ unsigned char c = IN1;
+
+ REQUIRE_OUTBUF(1)
+
+ if (c < 0x80) {
+ OUT1(c)
+ NEXT(1, 1)
+ continue;
+ }
+
+ if (c == 0x8e) {
+ /* JIS X 0201 half-width katakana */
+ unsigned char c2;
+
+ REQUIRE_INBUF(2)
+ c2 = IN2;
+ if (c2 >= 0xa1 && c2 <= 0xdf) {
+ OUT1(0xfec0 + c2)
+ NEXT(2, 1)
+ }
+ else
+ return 2;
+ }
+ else if (c == 0x8f) {
+ unsigned char c2, c3;
+
+ REQUIRE_INBUF(3)
+ c2 = IN2;
+ c3 = IN3;
+ /* JIS X 0212 */
+ TRYMAP_DEC(jisx0212, **outbuf, c2 ^ 0x80, c3 ^ 0x80) {
+ NEXT(3, 1)
+ }
+ else
+ return 3;
+ }
+ else {
+ unsigned char c2;
+
+ REQUIRE_INBUF(2)
+ c2 = IN2;
+ /* JIS X 0208 */
#ifndef STRICT_BUILD
- if (c == 0xa1 && c2 == 0xc0)
- /* FULL-WIDTH REVERSE SOLIDUS */
- **outbuf = 0xff3c;
- else
+ if (c == 0xa1 && c2 == 0xc0)
+ /* FULL-WIDTH REVERSE SOLIDUS */
+ **outbuf = 0xff3c;
+ else
#endif
- TRYMAP_DEC(jisx0208, **outbuf,
- c ^ 0x80, c2 ^ 0x80) ;
- else return 2;
- NEXT(2, 1)
- }
- }
-
- return 0;
+ TRYMAP_DEC(jisx0208, **outbuf,
+ c ^ 0x80, c2 ^ 0x80) ;
+ else return 2;
+ NEXT(2, 1)
+ }
+ }
+
+ return 0;
}
@@ -432,105 +432,105 @@ DECODER(euc_jp)
ENCODER(shift_jis)
{
- while (inleft > 0) {
- Py_UNICODE c = IN1;
- DBCHAR code;
- unsigned char c1, c2;
+ while (inleft > 0) {
+ Py_UNICODE c = IN1;
+ DBCHAR code;
+ unsigned char c1, c2;
#ifdef STRICT_BUILD
- JISX0201_R_ENCODE(c, code)
+ JISX0201_R_ENCODE(c, code)
#else
- if (c < 0x80) code = c;
- else if (c == 0x00a5) code = 0x5c; /* YEN SIGN */
- else if (c == 0x203e) code = 0x7e; /* OVERLINE */
+ if (c < 0x80) code = c;
+ else if (c == 0x00a5) code = 0x5c; /* YEN SIGN */
+ else if (c == 0x203e) code = 0x7e; /* OVERLINE */
#endif
- else JISX0201_K_ENCODE(c, code)
- else UCS4INVALID(c)
- else code = NOCHAR;
+ else JISX0201_K_ENCODE(c, code)
+ else UCS4INVALID(c)
+ else code = NOCHAR;
- if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
- REQUIRE_OUTBUF(1)
+ if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
+ REQUIRE_OUTBUF(1)
- OUT1((unsigned char)code)
- NEXT(1, 1)
- continue;
- }
+ OUT1((unsigned char)code)
+ NEXT(1, 1)
+ continue;
+ }
- REQUIRE_OUTBUF(2)
+ REQUIRE_OUTBUF(2)
- if (code == NOCHAR) {
- TRYMAP_ENC(jisxcommon, code, c);
+ if (code == NOCHAR) {
+ TRYMAP_ENC(jisxcommon, code, c);
#ifndef STRICT_BUILD
- else if (c == 0xff3c)
- code = 0x2140; /* FULL-WIDTH REVERSE SOLIDUS */
+ else if (c == 0xff3c)
+ code = 0x2140; /* FULL-WIDTH REVERSE SOLIDUS */
#endif
- else
- return 1;
-
- if (code & 0x8000) /* MSB set: JIS X 0212 */
- return 1;
- }
-
- c1 = code >> 8;
- c2 = code & 0xff;
- c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
- c1 = (c1 - 0x21) >> 1;
- OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
- OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
- NEXT(1, 2)
- }
-
- return 0;
+ else
+ return 1;
+
+ if (code & 0x8000) /* MSB set: JIS X 0212 */
+ return 1;
+ }
+
+ c1 = code >> 8;
+ c2 = code & 0xff;
+ c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
+ c1 = (c1 - 0x21) >> 1;
+ OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
+ OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
+ NEXT(1, 2)
+ }
+
+ return 0;
}
DECODER(shift_jis)
{
- while (inleft > 0) {
- unsigned char c = IN1;
+ while (inleft > 0) {
+ unsigned char c = IN1;
- REQUIRE_OUTBUF(1)
+ REQUIRE_OUTBUF(1)
#ifdef STRICT_BUILD
- JISX0201_R_DECODE(c, **outbuf)
+ JISX0201_R_DECODE(c, **outbuf)
#else
- if (c < 0x80) **outbuf = c;
+ if (c < 0x80) **outbuf = c;
#endif
- else JISX0201_K_DECODE(c, **outbuf)
- else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
- unsigned char c1, c2;
+ else JISX0201_K_DECODE(c, **outbuf)
+ else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
+ unsigned char c1, c2;
- REQUIRE_INBUF(2)
- c2 = IN2;
- if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
- return 2;
+ REQUIRE_INBUF(2)
+ c2 = IN2;
+ if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
+ return 2;
- c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
- c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
- c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1) + 0x21);
- c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
+ c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
+ c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
+ c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1) + 0x21);
+ c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
#ifndef STRICT_BUILD
- if (c1 == 0x21 && c2 == 0x40) {
- /* FULL-WIDTH REVERSE SOLIDUS */
- OUT1(0xff3c)
- NEXT(2, 1)
- continue;
- }
+ if (c1 == 0x21 && c2 == 0x40) {
+ /* FULL-WIDTH REVERSE SOLIDUS */
+ OUT1(0xff3c)
+ NEXT(2, 1)
+ continue;
+ }
#endif
- TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
- NEXT(2, 1)
- continue;
- }
- else
- return 2;
- }
- else
- return 2;
-
- NEXT(1, 1) /* JIS X 0201 */
- }
-
- return 0;
+ TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
+ NEXT(2, 1)
+ continue;
+ }
+ else
+ return 2;
+ }
+ else
+ return 2;
+
+ NEXT(1, 1) /* JIS X 0201 */
+ }
+
+ return 0;
}
@@ -540,167 +540,167 @@ DECODER(shift_jis)
ENCODER(shift_jis_2004)
{
- while (inleft > 0) {
- ucs4_t c = IN1;
- DBCHAR code = NOCHAR;
- int c1, c2;
- Py_ssize_t insize;
-
- JISX0201_ENCODE(c, code)
- else DECODE_SURROGATE(c)
-
- if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
- WRITE1((unsigned char)code)
- NEXT(1, 1)
- continue;
- }
-
- REQUIRE_OUTBUF(2)
- insize = GET_INSIZE(c);
-
- if (code == NOCHAR) {
- if (c <= 0xffff) {
- EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
- else TRYMAP_ENC(jisx0213_bmp, code, c) {
- if (code == MULTIC) {
- if (inleft < 2) {
- if (flags & MBENC_FLUSH) {
- code = find_pairencmap
- ((ucs2_t)c, 0,
- jisx0213_pair_encmap,
- JISX0213_ENCPAIRS);
- if (code == DBCINV)
- return 1;
- }
- else
- return MBERR_TOOFEW;
- }
- else {
- code = find_pairencmap(
- (ucs2_t)c, IN2,
- jisx0213_pair_encmap,
- JISX0213_ENCPAIRS);
- if (code == DBCINV) {
- code = find_pairencmap(
- (ucs2_t)c, 0,
- jisx0213_pair_encmap,
- JISX0213_ENCPAIRS);
- if (code == DBCINV)
- return 1;
- }
- else
- insize = 2;
- }
- }
- }
- else TRYMAP_ENC(jisxcommon, code, c) {
- /* abandon JIS X 0212 codes */
- if (code & 0x8000)
- return 1;
- }
- else return 1;
- }
- else if (c >> 16 == EMPBASE >> 16) {
- EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
- else TRYMAP_ENC(jisx0213_emp, code, c&0xffff);
- else return insize;
- }
- else
- return insize;
- }
-
- c1 = code >> 8;
- c2 = (code & 0xff) - 0x21;
-
- if (c1 & 0x80) { /* Plane 2 */
- if (c1 >= 0xee) c1 -= 0x87;
- else if (c1 >= 0xac || c1 == 0xa8) c1 -= 0x49;
- else c1 -= 0x43;
- }
- else /* Plane 1 */
- c1 -= 0x21;
-
- if (c1 & 1) c2 += 0x5e;
- c1 >>= 1;
- OUT1(c1 + (c1 < 0x1f ? 0x81 : 0xc1))
- OUT2(c2 + (c2 < 0x3f ? 0x40 : 0x41))
-
- NEXT(insize, 2)
- }
-
- return 0;
+ while (inleft > 0) {
+ ucs4_t c = IN1;
+ DBCHAR code = NOCHAR;
+ int c1, c2;
+ Py_ssize_t insize;
+
+ JISX0201_ENCODE(c, code)
+ else DECODE_SURROGATE(c)
+
+ if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
+ WRITE1((unsigned char)code)
+ NEXT(1, 1)
+ continue;
+ }
+
+ REQUIRE_OUTBUF(2)
+ insize = GET_INSIZE(c);
+
+ if (code == NOCHAR) {
+ if (c <= 0xffff) {
+ EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
+ else TRYMAP_ENC(jisx0213_bmp, code, c) {
+ if (code == MULTIC) {
+ if (inleft < 2) {
+ if (flags & MBENC_FLUSH) {
+ code = find_pairencmap
+ ((ucs2_t)c, 0,
+ jisx0213_pair_encmap,
+ JISX0213_ENCPAIRS);
+ if (code == DBCINV)
+ return 1;
+ }
+ else
+ return MBERR_TOOFEW;
+ }
+ else {
+ code = find_pairencmap(
+ (ucs2_t)c, IN2,
+ jisx0213_pair_encmap,
+ JISX0213_ENCPAIRS);
+ if (code == DBCINV) {
+ code = find_pairencmap(
+ (ucs2_t)c, 0,
+ jisx0213_pair_encmap,
+ JISX0213_ENCPAIRS);
+ if (code == DBCINV)
+ return 1;
+ }
+ else
+ insize = 2;
+ }
+ }
+ }
+ else TRYMAP_ENC(jisxcommon, code, c) {
+ /* abandon JIS X 0212 codes */
+ if (code & 0x8000)
+ return 1;
+ }
+ else return 1;
+ }
+ else if (c >> 16 == EMPBASE >> 16) {
+ EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
+ else TRYMAP_ENC(jisx0213_emp, code, c&0xffff);
+ else return insize;
+ }
+ else
+ return insize;
+ }
+
+ c1 = code >> 8;
+ c2 = (code & 0xff) - 0x21;
+
+ if (c1 & 0x80) { /* Plane 2 */
+ if (c1 >= 0xee) c1 -= 0x87;
+ else if (c1 >= 0xac || c1 == 0xa8) c1 -= 0x49;
+ else c1 -= 0x43;
+ }
+ else /* Plane 1 */
+ c1 -= 0x21;
+
+ if (c1 & 1) c2 += 0x5e;
+ c1 >>= 1;
+ OUT1(c1 + (c1 < 0x1f ? 0x81 : 0xc1))
+ OUT2(c2 + (c2 < 0x3f ? 0x40 : 0x41))
+
+ NEXT(insize, 2)
+ }
+
+ return 0;
}
DECODER(shift_jis_2004)
{
- while (inleft > 0) {
- unsigned char c = IN1;
-
- REQUIRE_OUTBUF(1)
- JISX0201_DECODE(c, **outbuf)
- else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){
- unsigned char c1, c2;
- ucs4_t code;
-
- REQUIRE_INBUF(2)
- c2 = IN2;
- if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
- return 2;
-
- c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
- c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
- c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1));
- c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
-
- if (c1 < 0x5e) { /* Plane 1 */
- c1 += 0x21;
- EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf,
- c1, c2)
- else TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
- NEXT_OUT(1)
- }
- else TRYMAP_DEC(jisx0213_1_bmp, **outbuf,
- c1, c2) {
- NEXT_OUT(1)
- }
- else TRYMAP_DEC(jisx0213_1_emp, code, c1, c2) {
- WRITEUCS4(EMPBASE | code)
- }
- else TRYMAP_DEC(jisx0213_pair, code, c1, c2) {
- WRITE2(code >> 16, code & 0xffff)
- NEXT_OUT(2)
- }
- else
- return 2;
- NEXT_IN(2)
- }
- else { /* Plane 2 */
- if (c1 >= 0x67) c1 += 0x07;
- else if (c1 >= 0x63 || c1 == 0x5f) c1 -= 0x37;
- else c1 -= 0x3d;
-
- EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf,
- c1, c2)
- else TRYMAP_DEC(jisx0213_2_bmp, **outbuf,
- c1, c2) ;
- else TRYMAP_DEC(jisx0213_2_emp, code, c1, c2) {
- WRITEUCS4(EMPBASE | code)
- NEXT_IN(2)
- continue;
- }
- else
- return 2;
- NEXT(2, 1)
- }
- continue;
- }
- else
- return 2;
-
- NEXT(1, 1) /* JIS X 0201 */
- }
-
- return 0;
+ while (inleft > 0) {
+ unsigned char c = IN1;
+
+ REQUIRE_OUTBUF(1)
+ JISX0201_DECODE(c, **outbuf)
+ else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){
+ unsigned char c1, c2;
+ ucs4_t code;
+
+ REQUIRE_INBUF(2)
+ c2 = IN2;
+ if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
+ return 2;
+
+ c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
+ c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
+ c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1));
+ c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
+
+ if (c1 < 0x5e) { /* Plane 1 */
+ c1 += 0x21;
+ EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf,
+ c1, c2)
+ else TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
+ NEXT_OUT(1)
+ }
+ else TRYMAP_DEC(jisx0213_1_bmp, **outbuf,
+ c1, c2) {
+ NEXT_OUT(1)
+ }
+ else TRYMAP_DEC(jisx0213_1_emp, code, c1, c2) {
+ WRITEUCS4(EMPBASE | code)
+ }
+ else TRYMAP_DEC(jisx0213_pair, code, c1, c2) {
+ WRITE2(code >> 16, code & 0xffff)
+ NEXT_OUT(2)
+ }
+ else
+ return 2;
+ NEXT_IN(2)
+ }
+ else { /* Plane 2 */
+ if (c1 >= 0x67) c1 += 0x07;
+ else if (c1 >= 0x63 || c1 == 0x5f) c1 -= 0x37;
+ else c1 -= 0x3d;
+
+ EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf,
+ c1, c2)
+ else TRYMAP_DEC(jisx0213_2_bmp, **outbuf,
+ c1, c2) ;
+ else TRYMAP_DEC(jisx0213_2_emp, code, c1, c2) {
+ WRITEUCS4(EMPBASE | code)
+ NEXT_IN(2)
+ continue;
+ }
+ else
+ return 2;
+ NEXT(2, 1)
+ }
+ continue;
+ }
+ else
+ return 2;
+
+ NEXT(1, 1) /* JIS X 0201 */
+ }
+
+ return 0;
}
diff --git a/Modules/cjkcodecs/_codecs_kr.c b/Modules/cjkcodecs/_codecs_kr.c
index 161967e..9272e36 100644
--- a/Modules/cjkcodecs/_codecs_kr.c
+++ b/Modules/cjkcodecs/_codecs_kr.c
@@ -11,151 +11,151 @@
* EUC-KR codec
*/
-#define EUCKR_JAMO_FIRSTBYTE 0xA4
-#define EUCKR_JAMO_FILLER 0xD4
+#define EUCKR_JAMO_FIRSTBYTE 0xA4
+#define EUCKR_JAMO_FILLER 0xD4
static const unsigned char u2cgk_choseong[19] = {
- 0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2,
- 0xb3, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb,
- 0xbc, 0xbd, 0xbe
+ 0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2,
+ 0xb3, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb,
+ 0xbc, 0xbd, 0xbe
};
static const unsigned char u2cgk_jungseong[21] = {
- 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6,
- 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce,
- 0xcf, 0xd0, 0xd1, 0xd2, 0xd3
+ 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6,
+ 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce,
+ 0xcf, 0xd0, 0xd1, 0xd2, 0xd3
};
static const unsigned char u2cgk_jongseong[28] = {
- 0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
- 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
- 0xb1, 0xb2, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xba,
- 0xbb, 0xbc, 0xbd, 0xbe
+ 0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+ 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
+ 0xb1, 0xb2, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xba,
+ 0xbb, 0xbc, 0xbd, 0xbe
};
ENCODER(euc_kr)
{
- while (inleft > 0) {
- Py_UNICODE c = IN1;
- DBCHAR code;
-
- if (c < 0x80) {
- WRITE1((unsigned char)c)
- NEXT(1, 1)
- continue;
- }
- UCS4INVALID(c)
-
- REQUIRE_OUTBUF(2)
- TRYMAP_ENC(cp949, code, c);
- else return 1;
-
- if ((code & 0x8000) == 0) {
- /* KS X 1001 coded character */
- OUT1((code >> 8) | 0x80)
- OUT2((code & 0xFF) | 0x80)
- NEXT(1, 2)
- }
- else { /* Mapping is found in CP949 extension,
- * but we encode it in KS X 1001:1998 Annex 3,
- * make-up sequence for EUC-KR. */
-
- REQUIRE_OUTBUF(8)
-
- /* syllable composition precedence */
- OUT1(EUCKR_JAMO_FIRSTBYTE)
- OUT2(EUCKR_JAMO_FILLER)
-
- /* All codepoints in CP949 extension are in unicode
- * Hangul Syllable area. */
- assert(0xac00 <= c && c <= 0xd7a3);
- c -= 0xac00;
-
- OUT3(EUCKR_JAMO_FIRSTBYTE)
- OUT4(u2cgk_choseong[c / 588])
- NEXT_OUT(4)
-
- OUT1(EUCKR_JAMO_FIRSTBYTE)
- OUT2(u2cgk_jungseong[(c / 28) % 21])
- OUT3(EUCKR_JAMO_FIRSTBYTE)
- OUT4(u2cgk_jongseong[c % 28])
- NEXT(1, 4)
- }
- }
-
- return 0;
+ while (inleft > 0) {
+ Py_UNICODE c = IN1;
+ DBCHAR code;
+
+ if (c < 0x80) {
+ WRITE1((unsigned char)c)
+ NEXT(1, 1)
+ continue;
+ }
+ UCS4INVALID(c)
+
+ REQUIRE_OUTBUF(2)
+ TRYMAP_ENC(cp949, code, c);
+ else return 1;
+
+ if ((code & 0x8000) == 0) {
+ /* KS X 1001 coded character */
+ OUT1((code >> 8) | 0x80)
+ OUT2((code & 0xFF) | 0x80)
+ NEXT(1, 2)
+ }
+ else { /* Mapping is found in CP949 extension,
+ * but we encode it in KS X 1001:1998 Annex 3,
+ * make-up sequence for EUC-KR. */
+
+ REQUIRE_OUTBUF(8)
+
+ /* syllable composition precedence */
+ OUT1(EUCKR_JAMO_FIRSTBYTE)
+ OUT2(EUCKR_JAMO_FILLER)
+
+ /* All codepoints in CP949 extension are in unicode
+ * Hangul Syllable area. */
+ assert(0xac00 <= c && c <= 0xd7a3);
+ c -= 0xac00;
+
+ OUT3(EUCKR_JAMO_FIRSTBYTE)
+ OUT4(u2cgk_choseong[c / 588])
+ NEXT_OUT(4)
+
+ OUT1(EUCKR_JAMO_FIRSTBYTE)
+ OUT2(u2cgk_jungseong[(c / 28) % 21])
+ OUT3(EUCKR_JAMO_FIRSTBYTE)
+ OUT4(u2cgk_jongseong[c % 28])
+ NEXT(1, 4)
+ }
+ }
+
+ return 0;
}
-#define NONE 127
+#define NONE 127
static const unsigned char cgk2u_choseong[] = { /* [A1, BE] */
- 0, 1, NONE, 2, NONE, NONE, 3, 4,
- 5, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
- 6, 7, 8, NONE, 9, 10, 11, 12,
- 13, 14, 15, 16, 17, 18
+ 0, 1, NONE, 2, NONE, NONE, 3, 4,
+ 5, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
+ 6, 7, 8, NONE, 9, 10, 11, 12,
+ 13, 14, 15, 16, 17, 18
};
static const unsigned char cgk2u_jongseong[] = { /* [A1, BE] */
- 1, 2, 3, 4, 5, 6, 7, NONE,
- 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, NONE, 18, 19, 20, 21, 22,
- NONE, 23, 24, 25, 26, 27
+ 1, 2, 3, 4, 5, 6, 7, NONE,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, NONE, 18, 19, 20, 21, 22,
+ NONE, 23, 24, 25, 26, 27
};
DECODER(euc_kr)
{
- while (inleft > 0) {
- unsigned char c = IN1;
-
- REQUIRE_OUTBUF(1)
-
- if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
- continue;
- }
-
- REQUIRE_INBUF(2)
-
- if (c == EUCKR_JAMO_FIRSTBYTE &&
- IN2 == EUCKR_JAMO_FILLER) {
- /* KS X 1001:1998 Annex 3 make-up sequence */
- DBCHAR cho, jung, jong;
-
- REQUIRE_INBUF(8)
- if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE ||
- (*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE ||
- (*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE)
- return 8;
-
- c = (*inbuf)[3];
- if (0xa1 <= c && c <= 0xbe)
- cho = cgk2u_choseong[c - 0xa1];
- else
- cho = NONE;
-
- c = (*inbuf)[5];
- jung = (0xbf <= c && c <= 0xd3) ? c - 0xbf : NONE;
-
- c = (*inbuf)[7];
- if (c == EUCKR_JAMO_FILLER)
- jong = 0;
- else if (0xa1 <= c && c <= 0xbe)
- jong = cgk2u_jongseong[c - 0xa1];
- else
- jong = NONE;
-
- if (cho == NONE || jung == NONE || jong == NONE)
- return 8;
-
- OUT1(0xac00 + cho*588 + jung*28 + jong);
- NEXT(8, 1)
- }
- else TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
- NEXT(2, 1)
- }
- else
- return 2;
- }
-
- return 0;
+ while (inleft > 0) {
+ unsigned char c = IN1;
+
+ REQUIRE_OUTBUF(1)
+
+ if (c < 0x80) {
+ OUT1(c)
+ NEXT(1, 1)
+ continue;
+ }
+
+ REQUIRE_INBUF(2)
+
+ if (c == EUCKR_JAMO_FIRSTBYTE &&
+ IN2 == EUCKR_JAMO_FILLER) {
+ /* KS X 1001:1998 Annex 3 make-up sequence */
+ DBCHAR cho, jung, jong;
+
+ REQUIRE_INBUF(8)
+ if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE ||
+ (*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE ||
+ (*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE)
+ return 8;
+
+ c = (*inbuf)[3];
+ if (0xa1 <= c && c <= 0xbe)
+ cho = cgk2u_choseong[c - 0xa1];
+ else
+ cho = NONE;
+
+ c = (*inbuf)[5];
+ jung = (0xbf <= c && c <= 0xd3) ? c - 0xbf : NONE;
+
+ c = (*inbuf)[7];
+ if (c == EUCKR_JAMO_FILLER)
+ jong = 0;
+ else if (0xa1 <= c && c <= 0xbe)
+ jong = cgk2u_jongseong[c - 0xa1];
+ else
+ jong = NONE;
+
+ if (cho == NONE || jung == NONE || jong == NONE)
+ return 8;
+
+ OUT1(0xac00 + cho*588 + jung*28 + jong);
+ NEXT(8, 1)
+ }
+ else TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
+ NEXT(2, 1)
+ }
+ else
+ return 2;
+ }
+
+ return 0;
}
#undef NONE
@@ -166,54 +166,54 @@ DECODER(euc_kr)
ENCODER(cp949)
{
- while (inleft > 0) {
- Py_UNICODE c = IN1;
- DBCHAR code;
-
- if (c < 0x80) {
- WRITE1((unsigned char)c)
- NEXT(1, 1)
- continue;
- }
- UCS4INVALID(c)
-
- REQUIRE_OUTBUF(2)
- TRYMAP_ENC(cp949, code, c);
- else return 1;
-
- OUT1((code >> 8) | 0x80)
- if (code & 0x8000)
- OUT2(code & 0xFF) /* MSB set: CP949 */
- else
- OUT2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */
- NEXT(1, 2)
- }
-
- return 0;
+ while (inleft > 0) {
+ Py_UNICODE c = IN1;
+ DBCHAR code;
+
+ if (c < 0x80) {
+ WRITE1((unsigned char)c)
+ NEXT(1, 1)
+ continue;
+ }
+ UCS4INVALID(c)
+
+ REQUIRE_OUTBUF(2)
+ TRYMAP_ENC(cp949, code, c);
+ else return 1;
+
+ OUT1((code >> 8) | 0x80)
+ if (code & 0x8000)
+ OUT2(code & 0xFF) /* MSB set: CP949 */
+ else
+ OUT2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */
+ NEXT(1, 2)
+ }
+
+ return 0;
}
DECODER(cp949)
{
- while (inleft > 0) {
- unsigned char c = IN1;
+ while (inleft > 0) {
+ unsigned char c = IN1;
- REQUIRE_OUTBUF(1)
+ REQUIRE_OUTBUF(1)
- if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
- continue;
- }
+ if (c < 0x80) {
+ OUT1(c)
+ NEXT(1, 1)
+ continue;
+ }
- REQUIRE_INBUF(2)
- TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80);
- else TRYMAP_DEC(cp949ext, **outbuf, c, IN2);
- else return 2;
+ REQUIRE_INBUF(2)
+ TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80);
+ else TRYMAP_DEC(cp949ext, **outbuf, c, IN2);
+ else return 2;
- NEXT(2, 1)
- }
+ NEXT(2, 1)
+ }
- return 0;
+ return 0;
}
@@ -250,58 +250,58 @@ static const DBCHAR u2johabjamo[] = {
ENCODER(johab)
{
- while (inleft > 0) {
- Py_UNICODE c = IN1;
- DBCHAR code;
-
- if (c < 0x80) {
- WRITE1((unsigned char)c)
- NEXT(1, 1)
- continue;
- }
- UCS4INVALID(c)
-
- REQUIRE_OUTBUF(2)
-
- if (c >= 0xac00 && c <= 0xd7a3) {
- c -= 0xac00;
- code = 0x8000 |
- (u2johabidx_choseong[c / 588] << 10) |
- (u2johabidx_jungseong[(c / 28) % 21] << 5) |
- u2johabidx_jongseong[c % 28];
- }
- else if (c >= 0x3131 && c <= 0x3163)
- code = u2johabjamo[c - 0x3131];
- else TRYMAP_ENC(cp949, code, c) {
- unsigned char c1, c2, t2;
- unsigned short t1;
-
- assert((code & 0x8000) == 0);
- c1 = code >> 8;
- c2 = code & 0xff;
- if (((c1 >= 0x21 && c1 <= 0x2c) ||
- (c1 >= 0x4a && c1 <= 0x7d)) &&
- (c2 >= 0x21 && c2 <= 0x7e)) {
- t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) :
- (c1 - 0x21 + 0x197));
- t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21);
- OUT1(t1 >> 1)
- OUT2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43)
- NEXT(1, 2)
- continue;
- }
- else
- return 1;
- }
- else
- return 1;
-
- OUT1(code >> 8)
- OUT2(code & 0xff)
- NEXT(1, 2)
- }
-
- return 0;
+ while (inleft > 0) {
+ Py_UNICODE c = IN1;
+ DBCHAR code;
+
+ if (c < 0x80) {
+ WRITE1((unsigned char)c)
+ NEXT(1, 1)
+ continue;
+ }
+ UCS4INVALID(c)
+
+ REQUIRE_OUTBUF(2)
+
+ if (c >= 0xac00 && c <= 0xd7a3) {
+ c -= 0xac00;
+ code = 0x8000 |
+ (u2johabidx_choseong[c / 588] << 10) |
+ (u2johabidx_jungseong[(c / 28) % 21] << 5) |
+ u2johabidx_jongseong[c % 28];
+ }
+ else if (c >= 0x3131 && c <= 0x3163)
+ code = u2johabjamo[c - 0x3131];
+ else TRYMAP_ENC(cp949, code, c) {
+ unsigned char c1, c2, t2;
+ unsigned short t1;
+
+ assert((code & 0x8000) == 0);
+ c1 = code >> 8;
+ c2 = code & 0xff;
+ if (((c1 >= 0x21 && c1 <= 0x2c) ||
+ (c1 >= 0x4a && c1 <= 0x7d)) &&
+ (c2 >= 0x21 && c2 <= 0x7e)) {
+ t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) :
+ (c1 - 0x21 + 0x197));
+ t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21);
+ OUT1(t1 >> 1)
+ OUT2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43)
+ NEXT(1, 2)
+ continue;
+ }
+ else
+ return 1;
+ }
+ else
+ return 1;
+
+ OUT1(code >> 8)
+ OUT2(code & 0xff)
+ NEXT(1, 2)
+ }
+
+ return 0;
}
#define FILL 0xfd
@@ -347,91 +347,91 @@ static const unsigned char johabjamo_jongseong[32] = {
DECODER(johab)
{
- while (inleft > 0) {
- unsigned char c = IN1, c2;
-
- REQUIRE_OUTBUF(1)
-
- if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
- continue;
- }
-
- REQUIRE_INBUF(2)
- c2 = IN2;
-
- if (c < 0xd8) {
- /* johab hangul */
- unsigned char c_cho, c_jung, c_jong;
- unsigned char i_cho, i_jung, i_jong;
-
- c_cho = (c >> 2) & 0x1f;
- c_jung = ((c << 3) | c2 >> 5) & 0x1f;
- c_jong = c2 & 0x1f;
-
- i_cho = johabidx_choseong[c_cho];
- i_jung = johabidx_jungseong[c_jung];
- i_jong = johabidx_jongseong[c_jong];
-
- if (i_cho == NONE || i_jung == NONE || i_jong == NONE)
- return 2;
-
- /* we don't use U+1100 hangul jamo yet. */
- if (i_cho == FILL) {
- if (i_jung == FILL) {
- if (i_jong == FILL)
- OUT1(0x3000)
- else
- OUT1(0x3100 |
- johabjamo_jongseong[c_jong])
- }
- else {
- if (i_jong == FILL)
- OUT1(0x3100 |
- johabjamo_jungseong[c_jung])
- else
- return 2;
- }
- } else {
- if (i_jung == FILL) {
- if (i_jong == FILL)
- OUT1(0x3100 |
- johabjamo_choseong[c_cho])
- else
- return 2;
- }
- else
- OUT1(0xac00 +
- i_cho * 588 +
- i_jung * 28 +
- (i_jong == FILL ? 0 : i_jong))
- }
- NEXT(2, 1)
- } else {
- /* KS X 1001 except hangul jamos and syllables */
- if (c == 0xdf || c > 0xf9 ||
- c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) ||
- (c2 & 0x7f) == 0x7f ||
- (c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3)))
- return 2;
- else {
- unsigned char t1, t2;
-
- t1 = (c < 0xe0 ? 2 * (c - 0xd9) :
- 2 * c - 0x197);
- t2 = (c2 < 0x91 ? c2 - 0x31 : c2 - 0x43);
- t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21;
- t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
-
- TRYMAP_DEC(ksx1001, **outbuf, t1, t2);
- else return 2;
- NEXT(2, 1)
- }
- }
- }
-
- return 0;
+ while (inleft > 0) {
+ unsigned char c = IN1, c2;
+
+ REQUIRE_OUTBUF(1)
+
+ if (c < 0x80) {
+ OUT1(c)
+ NEXT(1, 1)
+ continue;
+ }
+
+ REQUIRE_INBUF(2)
+ c2 = IN2;
+
+ if (c < 0xd8) {
+ /* johab hangul */
+ unsigned char c_cho, c_jung, c_jong;
+ unsigned char i_cho, i_jung, i_jong;
+
+ c_cho = (c >> 2) & 0x1f;
+ c_jung = ((c << 3) | c2 >> 5) & 0x1f;
+ c_jong = c2 & 0x1f;
+
+ i_cho = johabidx_choseong[c_cho];
+ i_jung = johabidx_jungseong[c_jung];
+ i_jong = johabidx_jongseong[c_jong];
+
+ if (i_cho == NONE || i_jung == NONE || i_jong == NONE)
+ return 2;
+
+ /* we don't use U+1100 hangul jamo yet. */
+ if (i_cho == FILL) {
+ if (i_jung == FILL) {
+ if (i_jong == FILL)
+ OUT1(0x3000)
+ else
+ OUT1(0x3100 |
+ johabjamo_jongseong[c_jong])
+ }
+ else {
+ if (i_jong == FILL)
+ OUT1(0x3100 |
+ johabjamo_jungseong[c_jung])
+ else
+ return 2;
+ }
+ } else {
+ if (i_jung == FILL) {
+ if (i_jong == FILL)
+ OUT1(0x3100 |
+ johabjamo_choseong[c_cho])
+ else
+ return 2;
+ }
+ else
+ OUT1(0xac00 +
+ i_cho * 588 +
+ i_jung * 28 +
+ (i_jong == FILL ? 0 : i_jong))
+ }
+ NEXT(2, 1)
+ } else {
+ /* KS X 1001 except hangul jamos and syllables */
+ if (c == 0xdf || c > 0xf9 ||
+ c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) ||
+ (c2 & 0x7f) == 0x7f ||
+ (c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3)))
+ return 2;
+ else {
+ unsigned char t1, t2;
+
+ t1 = (c < 0xe0 ? 2 * (c - 0xd9) :
+ 2 * c - 0x197);
+ t2 = (c2 < 0x91 ? c2 - 0x31 : c2 - 0x43);
+ t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21;
+ t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
+
+ TRYMAP_DEC(ksx1001, **outbuf, t1, t2);
+ else return 2;
+ NEXT(2, 1)
+ }
+ }
+ }
+
+ return 0;
}
#undef NONE
#undef FILL
diff --git a/Modules/cjkcodecs/_codecs_tw.c b/Modules/cjkcodecs/_codecs_tw.c
index 8ccbca1..38cf723 100644
--- a/Modules/cjkcodecs/_codecs_tw.c
+++ b/Modules/cjkcodecs/_codecs_tw.c
@@ -13,52 +13,52 @@
ENCODER(big5)
{
- while (inleft > 0) {
- Py_UNICODE c = **inbuf;
- DBCHAR code;
+ while (inleft > 0) {
+ Py_UNICODE c = **inbuf;
+ DBCHAR code;
- if (c < 0x80) {
- REQUIRE_OUTBUF(1)
- **outbuf = (unsigned char)c;
- NEXT(1, 1)
- continue;
- }
- UCS4INVALID(c)
+ if (c < 0x80) {
+ REQUIRE_OUTBUF(1)
+ **outbuf = (unsigned char)c;
+ NEXT(1, 1)
+ continue;
+ }
+ UCS4INVALID(c)
- REQUIRE_OUTBUF(2)
+ REQUIRE_OUTBUF(2)
- TRYMAP_ENC(big5, code, c);
- else return 1;
+ TRYMAP_ENC(big5, code, c);
+ else return 1;
- OUT1(code >> 8)
- OUT2(code & 0xFF)
- NEXT(1, 2)
- }
+ OUT1(code >> 8)
+ OUT2(code & 0xFF)
+ NEXT(1, 2)
+ }
- return 0;
+ return 0;
}
DECODER(big5)
{
- while (inleft > 0) {
- unsigned char c = IN1;
+ while (inleft > 0) {
+ unsigned char c = IN1;
- REQUIRE_OUTBUF(1)
+ REQUIRE_OUTBUF(1)
- if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
- continue;
- }
+ if (c < 0x80) {
+ OUT1(c)
+ NEXT(1, 1)
+ continue;
+ }
- REQUIRE_INBUF(2)
- TRYMAP_DEC(big5, **outbuf, c, IN2) {
- NEXT(2, 1)
- }
- else return 2;
- }
+ REQUIRE_INBUF(2)
+ TRYMAP_DEC(big5, **outbuf, c, IN2) {
+ NEXT(2, 1)
+ }
+ else return 2;
+ }
- return 0;
+ return 0;
}
@@ -68,53 +68,53 @@ DECODER(big5)
ENCODER(cp950)
{
- while (inleft > 0) {
- Py_UNICODE c = IN1;
- DBCHAR code;
-
- if (c < 0x80) {
- WRITE1((unsigned char)c)
- NEXT(1, 1)
- continue;
- }
- UCS4INVALID(c)
-
- REQUIRE_OUTBUF(2)
- TRYMAP_ENC(cp950ext, code, c);
- else TRYMAP_ENC(big5, code, c);
- else return 1;
-
- OUT1(code >> 8)
- OUT2(code & 0xFF)
- NEXT(1, 2)
- }
-
- return 0;
+ while (inleft > 0) {
+ Py_UNICODE c = IN1;
+ DBCHAR code;
+
+ if (c < 0x80) {
+ WRITE1((unsigned char)c)
+ NEXT(1, 1)
+ continue;
+ }
+ UCS4INVALID(c)
+
+ REQUIRE_OUTBUF(2)
+ TRYMAP_ENC(cp950ext, code, c);
+ else TRYMAP_ENC(big5, code, c);
+ else return 1;
+
+ OUT1(code >> 8)
+ OUT2(code & 0xFF)
+ NEXT(1, 2)
+ }
+
+ return 0;
}
DECODER(cp950)
{
- while (inleft > 0) {
- unsigned char c = IN1;
+ while (inleft > 0) {
+ unsigned char c = IN1;
- REQUIRE_OUTBUF(1)
+ REQUIRE_OUTBUF(1)
- if (c < 0x80) {
- OUT1(c)
- NEXT(1, 1)
- continue;
- }
+ if (c < 0x80) {
+ OUT1(c)
+ NEXT(1, 1)
+ continue;
+ }
- REQUIRE_INBUF(2)
+ REQUIRE_INBUF(2)
- TRYMAP_DEC(cp950ext, **outbuf, c, IN2);
- else TRYMAP_DEC(big5, **outbuf, c, IN2);
- else return 2;
+ TRYMAP_DEC(cp950ext, **outbuf, c, IN2);
+ else TRYMAP_DEC(big5, **outbuf, c, IN2);
+ else return 2;
- NEXT(2, 1)
- }
+ NEXT(2, 1)
+ }
- return 0;
+ return 0;
}
diff --git a/Modules/cjkcodecs/alg_jisx0201.h b/Modules/cjkcodecs/alg_jisx0201.h
index 1fca06b..0bc7db5 100644
--- a/Modules/cjkcodecs/alg_jisx0201.h
+++ b/Modules/cjkcodecs/alg_jisx0201.h
@@ -1,24 +1,24 @@
-#define JISX0201_R_ENCODE(c, assi) \
- if ((c) < 0x80 && (c) != 0x5c && (c) != 0x7e) \
- (assi) = (c); \
- else if ((c) == 0x00a5) (assi) = 0x5c; \
- else if ((c) == 0x203e) (assi) = 0x7e;
-#define JISX0201_K_ENCODE(c, assi) \
- if ((c) >= 0xff61 && (c) <= 0xff9f) \
- (assi) = (c) - 0xfec0;
-#define JISX0201_ENCODE(c, assi) \
- JISX0201_R_ENCODE(c, assi) \
- else JISX0201_K_ENCODE(c, assi)
+#define JISX0201_R_ENCODE(c, assi) \
+ if ((c) < 0x80 && (c) != 0x5c && (c) != 0x7e) \
+ (assi) = (c); \
+ else if ((c) == 0x00a5) (assi) = 0x5c; \
+ else if ((c) == 0x203e) (assi) = 0x7e;
+#define JISX0201_K_ENCODE(c, assi) \
+ if ((c) >= 0xff61 && (c) <= 0xff9f) \
+ (assi) = (c) - 0xfec0;
+#define JISX0201_ENCODE(c, assi) \
+ JISX0201_R_ENCODE(c, assi) \
+ else JISX0201_K_ENCODE(c, assi)
-#define JISX0201_R_DECODE(c, assi) \
- if ((c) < 0x5c) (assi) = (c); \
- else if ((c) == 0x5c) (assi) = 0x00a5; \
- else if ((c) < 0x7e) (assi) = (c); \
- else if ((c) == 0x7e) (assi) = 0x203e; \
- else if ((c) == 0x7f) (assi) = 0x7f;
-#define JISX0201_K_DECODE(c, assi) \
- if ((c) >= 0xa1 && (c) <= 0xdf) \
- (assi) = 0xfec0 + (c);
-#define JISX0201_DECODE(c, assi) \
- JISX0201_R_DECODE(c, assi) \
- else JISX0201_K_DECODE(c, assi)
+#define JISX0201_R_DECODE(c, assi) \
+ if ((c) < 0x5c) (assi) = (c); \
+ else if ((c) == 0x5c) (assi) = 0x00a5; \
+ else if ((c) < 0x7e) (assi) = (c); \
+ else if ((c) == 0x7e) (assi) = 0x203e; \
+ else if ((c) == 0x7f) (assi) = 0x7f;
+#define JISX0201_K_DECODE(c, assi) \
+ if ((c) >= 0xa1 && (c) <= 0xdf) \
+ (assi) = 0xfec0 + (c);
+#define JISX0201_DECODE(c, assi) \
+ JISX0201_R_DECODE(c, assi) \
+ else JISX0201_K_DECODE(c, assi)
diff --git a/Modules/cjkcodecs/cjkcodecs.h b/Modules/cjkcodecs/cjkcodecs.h
index e630671..ab0682a 100644
--- a/Modules/cjkcodecs/cjkcodecs.h
+++ b/Modules/cjkcodecs/cjkcodecs.h
@@ -13,12 +13,12 @@
/* a unicode "undefined" codepoint */
-#define UNIINV 0xFFFE
+#define UNIINV 0xFFFE
/* internal-use DBCS codepoints which aren't used by any charsets */
-#define NOCHAR 0xFFFF
-#define MULTIC 0xFFFE
-#define DBCINV 0xFFFD
+#define NOCHAR 0xFFFF
+#define MULTIC 0xFFFE
+#define DBCINV 0xFFFD
/* shorter macros to save source size of mapping tables */
#define U UNIINV
@@ -27,94 +27,94 @@
#define D DBCINV
struct dbcs_index {
- const ucs2_t *map;
- unsigned char bottom, top;
+ const ucs2_t *map;
+ unsigned char bottom, top;
};
typedef struct dbcs_index decode_map;
struct widedbcs_index {
- const ucs4_t *map;
- unsigned char bottom, top;
+ const ucs4_t *map;
+ unsigned char bottom, top;
};
typedef struct widedbcs_index widedecode_map;
struct unim_index {
- const DBCHAR *map;
- unsigned char bottom, top;
+ const DBCHAR *map;
+ unsigned char bottom, top;
};
typedef struct unim_index encode_map;
struct unim_index_bytebased {
- const unsigned char *map;
- unsigned char bottom, top;
+ const unsigned char *map;
+ unsigned char bottom, top;
};
struct dbcs_map {
- const char *charset;
- const struct unim_index *encmap;
- const struct dbcs_index *decmap;
+ const char *charset;
+ const struct unim_index *encmap;
+ const struct dbcs_index *decmap;
};
struct pair_encodemap {
- ucs4_t uniseq;
- DBCHAR code;
+ ucs4_t uniseq;
+ DBCHAR code;
};
static const MultibyteCodec *codec_list;
static const struct dbcs_map *mapping_list;
-#define CODEC_INIT(encoding) \
- static int encoding##_codec_init(const void *config)
-
-#define ENCODER_INIT(encoding) \
- static int encoding##_encode_init( \
- MultibyteCodec_State *state, const void *config)
-#define ENCODER(encoding) \
- static Py_ssize_t encoding##_encode( \
- MultibyteCodec_State *state, const void *config, \
- const Py_UNICODE **inbuf, Py_ssize_t inleft, \
- unsigned char **outbuf, Py_ssize_t outleft, int flags)
-#define ENCODER_RESET(encoding) \
- static Py_ssize_t encoding##_encode_reset( \
- MultibyteCodec_State *state, const void *config, \
- unsigned char **outbuf, Py_ssize_t outleft)
-
-#define DECODER_INIT(encoding) \
- static int encoding##_decode_init( \
- MultibyteCodec_State *state, const void *config)
-#define DECODER(encoding) \
- static Py_ssize_t encoding##_decode( \
- MultibyteCodec_State *state, const void *config, \
- const unsigned char **inbuf, Py_ssize_t inleft, \
- Py_UNICODE **outbuf, Py_ssize_t outleft)
-#define DECODER_RESET(encoding) \
- static Py_ssize_t encoding##_decode_reset( \
- MultibyteCodec_State *state, const void *config)
+#define CODEC_INIT(encoding) \
+ static int encoding##_codec_init(const void *config)
+
+#define ENCODER_INIT(encoding) \
+ static int encoding##_encode_init( \
+ MultibyteCodec_State *state, const void *config)
+#define ENCODER(encoding) \
+ static Py_ssize_t encoding##_encode( \
+ MultibyteCodec_State *state, const void *config, \
+ const Py_UNICODE **inbuf, Py_ssize_t inleft, \
+ unsigned char **outbuf, Py_ssize_t outleft, int flags)
+#define ENCODER_RESET(encoding) \
+ static Py_ssize_t encoding##_encode_reset( \
+ MultibyteCodec_State *state, const void *config, \
+ unsigned char **outbuf, Py_ssize_t outleft)
+
+#define DECODER_INIT(encoding) \
+ static int encoding##_decode_init( \
+ MultibyteCodec_State *state, const void *config)
+#define DECODER(encoding) \
+ static Py_ssize_t encoding##_decode( \
+ MultibyteCodec_State *state, const void *config, \
+ const unsigned char **inbuf, Py_ssize_t inleft, \
+ Py_UNICODE **outbuf, Py_ssize_t outleft)
+#define DECODER_RESET(encoding) \
+ static Py_ssize_t encoding##_decode_reset( \
+ MultibyteCodec_State *state, const void *config)
#if Py_UNICODE_SIZE == 4
-#define UCS4INVALID(code) \
- if ((code) > 0xFFFF) \
- return 1;
+#define UCS4INVALID(code) \
+ if ((code) > 0xFFFF) \
+ return 1;
#else
-#define UCS4INVALID(code) \
- if (0) ;
+#define UCS4INVALID(code) \
+ if (0) ;
#endif
-#define NEXT_IN(i) \
- (*inbuf) += (i); \
- (inleft) -= (i);
-#define NEXT_OUT(o) \
- (*outbuf) += (o); \
- (outleft) -= (o);
-#define NEXT(i, o) \
- NEXT_IN(i) NEXT_OUT(o)
-
-#define REQUIRE_INBUF(n) \
- if (inleft < (n)) \
- return MBERR_TOOFEW;
-#define REQUIRE_OUTBUF(n) \
- if (outleft < (n)) \
- return MBERR_TOOSMALL;
+#define NEXT_IN(i) \
+ (*inbuf) += (i); \
+ (inleft) -= (i);
+#define NEXT_OUT(o) \
+ (*outbuf) += (o); \
+ (outleft) -= (o);
+#define NEXT(i, o) \
+ NEXT_IN(i) NEXT_OUT(o)
+
+#define REQUIRE_INBUF(n) \
+ if (inleft < (n)) \
+ return MBERR_TOOFEW;
+#define REQUIRE_OUTBUF(n) \
+ if (outleft < (n)) \
+ return MBERR_TOOSMALL;
#define IN1 ((*inbuf)[0])
#define IN2 ((*inbuf)[1])
@@ -126,289 +126,289 @@ static const struct dbcs_map *mapping_list;
#define OUT3(c) ((*outbuf)[2]) = (c);
#define OUT4(c) ((*outbuf)[3]) = (c);
-#define WRITE1(c1) \
- REQUIRE_OUTBUF(1) \
- (*outbuf)[0] = (c1);
-#define WRITE2(c1, c2) \
- REQUIRE_OUTBUF(2) \
- (*outbuf)[0] = (c1); \
- (*outbuf)[1] = (c2);
-#define WRITE3(c1, c2, c3) \
- REQUIRE_OUTBUF(3) \
- (*outbuf)[0] = (c1); \
- (*outbuf)[1] = (c2); \
- (*outbuf)[2] = (c3);
-#define WRITE4(c1, c2, c3, c4) \
- REQUIRE_OUTBUF(4) \
- (*outbuf)[0] = (c1); \
- (*outbuf)[1] = (c2); \
- (*outbuf)[2] = (c3); \
- (*outbuf)[3] = (c4);
+#define WRITE1(c1) \
+ REQUIRE_OUTBUF(1) \
+ (*outbuf)[0] = (c1);
+#define WRITE2(c1, c2) \
+ REQUIRE_OUTBUF(2) \
+ (*outbuf)[0] = (c1); \
+ (*outbuf)[1] = (c2);
+#define WRITE3(c1, c2, c3) \
+ REQUIRE_OUTBUF(3) \
+ (*outbuf)[0] = (c1); \
+ (*outbuf)[1] = (c2); \
+ (*outbuf)[2] = (c3);
+#define WRITE4(c1, c2, c3, c4) \
+ REQUIRE_OUTBUF(4) \
+ (*outbuf)[0] = (c1); \
+ (*outbuf)[1] = (c2); \
+ (*outbuf)[2] = (c3); \
+ (*outbuf)[3] = (c4);
#if Py_UNICODE_SIZE == 2
-# define WRITEUCS4(c) \
- REQUIRE_OUTBUF(2) \
- (*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10); \
- (*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff); \
- NEXT_OUT(2)
+# define WRITEUCS4(c) \
+ REQUIRE_OUTBUF(2) \
+ (*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10); \
+ (*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff); \
+ NEXT_OUT(2)
#else
-# define WRITEUCS4(c) \
- REQUIRE_OUTBUF(1) \
- **outbuf = (Py_UNICODE)(c); \
- NEXT_OUT(1)
+# define WRITEUCS4(c) \
+ REQUIRE_OUTBUF(1) \
+ **outbuf = (Py_UNICODE)(c); \
+ NEXT_OUT(1)
#endif
-#define _TRYMAP_ENC(m, assi, val) \
- ((m)->map != NULL && (val) >= (m)->bottom && \
- (val)<= (m)->top && ((assi) = (m)->map[(val) - \
- (m)->bottom]) != NOCHAR)
-#define TRYMAP_ENC_COND(charset, assi, uni) \
- _TRYMAP_ENC(&charset##_encmap[(uni) >> 8], assi, (uni) & 0xff)
-#define TRYMAP_ENC(charset, assi, uni) \
- if TRYMAP_ENC_COND(charset, assi, uni)
-
-#define _TRYMAP_DEC(m, assi, val) \
- ((m)->map != NULL && (val) >= (m)->bottom && \
- (val)<= (m)->top && ((assi) = (m)->map[(val) - \
- (m)->bottom]) != UNIINV)
-#define TRYMAP_DEC(charset, assi, c1, c2) \
- if _TRYMAP_DEC(&charset##_decmap[c1], assi, c2)
-
-#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) \
- ((m)->map != NULL && (val) >= (m)->bottom && \
- (val)<= (m)->top && \
- ((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \
- (((asshi) = (m)->map[((val) - (m)->bottom)*3 + 1]), 1) && \
- (((asslo) = (m)->map[((val) - (m)->bottom)*3 + 2]), 1))
-#define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni) \
- if _TRYMAP_ENC_MPLANE(&charset##_encmap[(uni) >> 8], \
- assplane, asshi, asslo, (uni) & 0xff)
-#define TRYMAP_DEC_MPLANE(charset, assi, plane, c1, c2) \
- if _TRYMAP_DEC(&charset##_decmap[plane][c1], assi, c2)
+#define _TRYMAP_ENC(m, assi, val) \
+ ((m)->map != NULL && (val) >= (m)->bottom && \
+ (val)<= (m)->top && ((assi) = (m)->map[(val) - \
+ (m)->bottom]) != NOCHAR)
+#define TRYMAP_ENC_COND(charset, assi, uni) \
+ _TRYMAP_ENC(&charset##_encmap[(uni) >> 8], assi, (uni) & 0xff)
+#define TRYMAP_ENC(charset, assi, uni) \
+ if TRYMAP_ENC_COND(charset, assi, uni)
+
+#define _TRYMAP_DEC(m, assi, val) \
+ ((m)->map != NULL && (val) >= (m)->bottom && \
+ (val)<= (m)->top && ((assi) = (m)->map[(val) - \
+ (m)->bottom]) != UNIINV)
+#define TRYMAP_DEC(charset, assi, c1, c2) \
+ if _TRYMAP_DEC(&charset##_decmap[c1], assi, c2)
+
+#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) \
+ ((m)->map != NULL && (val) >= (m)->bottom && \
+ (val)<= (m)->top && \
+ ((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \
+ (((asshi) = (m)->map[((val) - (m)->bottom)*3 + 1]), 1) && \
+ (((asslo) = (m)->map[((val) - (m)->bottom)*3 + 2]), 1))
+#define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni) \
+ if _TRYMAP_ENC_MPLANE(&charset##_encmap[(uni) >> 8], \
+ assplane, asshi, asslo, (uni) & 0xff)
+#define TRYMAP_DEC_MPLANE(charset, assi, plane, c1, c2) \
+ if _TRYMAP_DEC(&charset##_decmap[plane][c1], assi, c2)
#if Py_UNICODE_SIZE == 2
-#define DECODE_SURROGATE(c) \
- if (c >> 10 == 0xd800 >> 10) { /* high surrogate */ \
- REQUIRE_INBUF(2) \
- if (IN2 >> 10 == 0xdc00 >> 10) { /* low surrogate */ \
- c = 0x10000 + ((ucs4_t)(c - 0xd800) << 10) + \
- ((ucs4_t)(IN2) - 0xdc00); \
- } \
- }
-#define GET_INSIZE(c) ((c) > 0xffff ? 2 : 1)
+#define DECODE_SURROGATE(c) \
+ if (c >> 10 == 0xd800 >> 10) { /* high surrogate */ \
+ REQUIRE_INBUF(2) \
+ if (IN2 >> 10 == 0xdc00 >> 10) { /* low surrogate */ \
+ c = 0x10000 + ((ucs4_t)(c - 0xd800) << 10) + \
+ ((ucs4_t)(IN2) - 0xdc00); \
+ } \
+ }
+#define GET_INSIZE(c) ((c) > 0xffff ? 2 : 1)
#else
#define DECODE_SURROGATE(c) {;}
-#define GET_INSIZE(c) 1
+#define GET_INSIZE(c) 1
#endif
#define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = {
#define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL},
#define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap},
#define MAPPING_ENCDEC(enc) {#enc, (void*)enc##_encmap, (void*)enc##_decmap},
-#define END_MAPPINGS_LIST \
- {"", NULL, NULL} }; \
- static const struct dbcs_map *mapping_list = \
- (const struct dbcs_map *)_mapping_list;
+#define END_MAPPINGS_LIST \
+ {"", NULL, NULL} }; \
+ static const struct dbcs_map *mapping_list = \
+ (const struct dbcs_map *)_mapping_list;
#define BEGIN_CODECS_LIST static const MultibyteCodec _codec_list[] = {
-#define _STATEFUL_METHODS(enc) \
- enc##_encode, \
- enc##_encode_init, \
- enc##_encode_reset, \
- enc##_decode, \
- enc##_decode_init, \
- enc##_decode_reset,
-#define _STATELESS_METHODS(enc) \
- enc##_encode, NULL, NULL, \
- enc##_decode, NULL, NULL,
-#define CODEC_STATEFUL(enc) { \
- #enc, NULL, NULL, \
- _STATEFUL_METHODS(enc) \
+#define _STATEFUL_METHODS(enc) \
+ enc##_encode, \
+ enc##_encode_init, \
+ enc##_encode_reset, \
+ enc##_decode, \
+ enc##_decode_init, \
+ enc##_decode_reset,
+#define _STATELESS_METHODS(enc) \
+ enc##_encode, NULL, NULL, \
+ enc##_decode, NULL, NULL,
+#define CODEC_STATEFUL(enc) { \
+ #enc, NULL, NULL, \
+ _STATEFUL_METHODS(enc) \
},
-#define CODEC_STATELESS(enc) { \
- #enc, NULL, NULL, \
- _STATELESS_METHODS(enc) \
+#define CODEC_STATELESS(enc) { \
+ #enc, NULL, NULL, \
+ _STATELESS_METHODS(enc) \
},
-#define CODEC_STATELESS_WINIT(enc) { \
- #enc, NULL, \
- enc##_codec_init, \
- _STATELESS_METHODS(enc) \
+#define CODEC_STATELESS_WINIT(enc) { \
+ #enc, NULL, \
+ enc##_codec_init, \
+ _STATELESS_METHODS(enc) \
},
-#define END_CODECS_LIST \
- {"", NULL,} }; \
- static const MultibyteCodec *codec_list = \
- (const MultibyteCodec *)_codec_list;
+#define END_CODECS_LIST \
+ {"", NULL,} }; \
+ static const MultibyteCodec *codec_list = \
+ (const MultibyteCodec *)_codec_list;
static PyObject *
getmultibytecodec(void)
{
- static PyObject *cofunc = NULL;
-
- if (cofunc == NULL) {
- PyObject *mod = PyImport_ImportModuleNoBlock("_multibytecodec");
- if (mod == NULL)
- return NULL;
- cofunc = PyObject_GetAttrString(mod, "__create_codec");
- Py_DECREF(mod);
- }
- return cofunc;
+ static PyObject *cofunc = NULL;
+
+ if (cofunc == NULL) {
+ PyObject *mod = PyImport_ImportModuleNoBlock("_multibytecodec");
+ if (mod == NULL)
+ return NULL;
+ cofunc = PyObject_GetAttrString(mod, "__create_codec");
+ Py_DECREF(mod);
+ }
+ return cofunc;
}
static PyObject *
getcodec(PyObject *self, PyObject *encoding)
{
- PyObject *codecobj, *r, *cofunc;
- const MultibyteCodec *codec;
- const char *enc;
-
- if (!PyUnicode_Check(encoding)) {
- PyErr_SetString(PyExc_TypeError,
- "encoding name must be a string.");
- return NULL;
- }
- enc = _PyUnicode_AsString(encoding);
- if (enc == NULL)
- return NULL;
-
- cofunc = getmultibytecodec();
- if (cofunc == NULL)
- return NULL;
-
- for (codec = codec_list; codec->encoding[0]; codec++)
- if (strcmp(codec->encoding, enc) == 0)
- break;
-
- if (codec->encoding[0] == '\0') {
- PyErr_SetString(PyExc_LookupError,
- "no such codec is supported.");
- return NULL;
- }
-
- codecobj = PyCapsule_New((void *)codec, PyMultibyteCodec_CAPSULE_NAME, NULL);
- if (codecobj == NULL)
- return NULL;
-
- r = PyObject_CallFunctionObjArgs(cofunc, codecobj, NULL);
- Py_DECREF(codecobj);
-
- return r;
+ PyObject *codecobj, *r, *cofunc;
+ const MultibyteCodec *codec;
+ const char *enc;
+
+ if (!PyUnicode_Check(encoding)) {
+ PyErr_SetString(PyExc_TypeError,
+ "encoding name must be a string.");
+ return NULL;
+ }
+ enc = _PyUnicode_AsString(encoding);
+ if (enc == NULL)
+ return NULL;
+
+ cofunc = getmultibytecodec();
+ if (cofunc == NULL)
+ return NULL;
+
+ for (codec = codec_list; codec->encoding[0]; codec++)
+ if (strcmp(codec->encoding, enc) == 0)
+ break;
+
+ if (codec->encoding[0] == '\0') {
+ PyErr_SetString(PyExc_LookupError,
+ "no such codec is supported.");
+ return NULL;
+ }
+
+ codecobj = PyCapsule_New((void *)codec, PyMultibyteCodec_CAPSULE_NAME, NULL);
+ if (codecobj == NULL)
+ return NULL;
+
+ r = PyObject_CallFunctionObjArgs(cofunc, codecobj, NULL);
+ Py_DECREF(codecobj);
+
+ return r;
}
static struct PyMethodDef __methods[] = {
- {"getcodec", (PyCFunction)getcodec, METH_O, ""},
- {NULL, NULL},
+ {"getcodec", (PyCFunction)getcodec, METH_O, ""},
+ {NULL, NULL},
};
static int
register_maps(PyObject *module)
{
- const struct dbcs_map *h;
-
- for (h = mapping_list; h->charset[0] != '\0'; h++) {
- char mhname[256] = "__map_";
- int r;
- strcpy(mhname + sizeof("__map_") - 1, h->charset);
- r = PyModule_AddObject(module, mhname,
- PyCapsule_New((void *)h, PyMultibyteCodec_CAPSULE_NAME, NULL));
- if (r == -1)
- return -1;
- }
- return 0;
+ const struct dbcs_map *h;
+
+ for (h = mapping_list; h->charset[0] != '\0'; h++) {
+ char mhname[256] = "__map_";
+ int r;
+ strcpy(mhname + sizeof("__map_") - 1, h->charset);
+ r = PyModule_AddObject(module, mhname,
+ PyCapsule_New((void *)h, PyMultibyteCodec_CAPSULE_NAME, NULL));
+ if (r == -1)
+ return -1;
+ }
+ return 0;
}
#ifdef USING_BINARY_PAIR_SEARCH
static DBCHAR
find_pairencmap(ucs2_t body, ucs2_t modifier,
- const struct pair_encodemap *haystack, int haystacksize)
+ const struct pair_encodemap *haystack, int haystacksize)
{
- int pos, min, max;
- ucs4_t value = body << 16 | modifier;
-
- min = 0;
- max = haystacksize;
-
- for (pos = haystacksize >> 1; min != max; pos = (min + max) >> 1)
- if (value < haystack[pos].uniseq) {
- if (max == pos) break;
- else max = pos;
- }
- else if (value > haystack[pos].uniseq) {
- if (min == pos) break;
- else min = pos;
- }
- else
- break;
-
- if (value == haystack[pos].uniseq)
- return haystack[pos].code;
- else
- return DBCINV;
+ int pos, min, max;
+ ucs4_t value = body << 16 | modifier;
+
+ min = 0;
+ max = haystacksize;
+
+ for (pos = haystacksize >> 1; min != max; pos = (min + max) >> 1)
+ if (value < haystack[pos].uniseq) {
+ if (max == pos) break;
+ else max = pos;
+ }
+ else if (value > haystack[pos].uniseq) {
+ if (min == pos) break;
+ else min = pos;
+ }
+ else
+ break;
+
+ if (value == haystack[pos].uniseq)
+ return haystack[pos].code;
+ else
+ return DBCINV;
}
#endif
#ifdef USING_IMPORTED_MAPS
#define IMPORT_MAP(locale, charset, encmap, decmap) \
- importmap("_codecs_" #locale, "__map_" #charset, \
- (const void**)encmap, (const void**)decmap)
+ importmap("_codecs_" #locale, "__map_" #charset, \
+ (const void**)encmap, (const void**)decmap)
static int
importmap(const char *modname, const char *symbol,
- const void **encmap, const void **decmap)
+ const void **encmap, const void **decmap)
{
- PyObject *o, *mod;
-
- mod = PyImport_ImportModule((char *)modname);
- if (mod == NULL)
- return -1;
-
- o = PyObject_GetAttrString(mod, (char*)symbol);
- if (o == NULL)
- goto errorexit;
- else if (!PyCapsule_IsValid(o, PyMultibyteCodec_CAPSULE_NAME)) {
- PyErr_SetString(PyExc_ValueError,
- "map data must be a Capsule.");
- goto errorexit;
- }
- else {
- struct dbcs_map *map;
- map = PyCapsule_GetPointer(o, PyMultibyteCodec_CAPSULE_NAME);
- if (encmap != NULL)
- *encmap = map->encmap;
- if (decmap != NULL)
- *decmap = map->decmap;
- Py_DECREF(o);
- }
-
- Py_DECREF(mod);
- return 0;
+ PyObject *o, *mod;
+
+ mod = PyImport_ImportModule((char *)modname);
+ if (mod == NULL)
+ return -1;
+
+ o = PyObject_GetAttrString(mod, (char*)symbol);
+ if (o == NULL)
+ goto errorexit;
+ else if (!PyCapsule_IsValid(o, PyMultibyteCodec_CAPSULE_NAME)) {
+ PyErr_SetString(PyExc_ValueError,
+ "map data must be a Capsule.");
+ goto errorexit;
+ }
+ else {
+ struct dbcs_map *map;
+ map = PyCapsule_GetPointer(o, PyMultibyteCodec_CAPSULE_NAME);
+ if (encmap != NULL)
+ *encmap = map->encmap;
+ if (decmap != NULL)
+ *decmap = map->decmap;
+ Py_DECREF(o);
+ }
+
+ Py_DECREF(mod);
+ return 0;
errorexit:
- Py_DECREF(mod);
- return -1;
+ Py_DECREF(mod);
+ return -1;
}
#endif
-#define I_AM_A_MODULE_FOR(loc) \
- static struct PyModuleDef __module = { \
- PyModuleDef_HEAD_INIT, \
- "_codecs_"#loc, \
- NULL, \
- 0, \
- __methods, \
- NULL, \
- NULL, \
- NULL, \
- NULL \
- }; \
- PyObject* \
- PyInit__codecs_##loc(void) \
- { \
- PyObject *m = PyModule_Create(&__module); \
- if (m != NULL) \
- (void)register_maps(m); \
- return m; \
- }
+#define I_AM_A_MODULE_FOR(loc) \
+ static struct PyModuleDef __module = { \
+ PyModuleDef_HEAD_INIT, \
+ "_codecs_"#loc, \
+ NULL, \
+ 0, \
+ __methods, \
+ NULL, \
+ NULL, \
+ NULL, \
+ NULL \
+ }; \
+ PyObject* \
+ PyInit__codecs_##loc(void) \
+ { \
+ PyObject *m = PyModule_Create(&__module); \
+ if (m != NULL) \
+ (void)register_maps(m); \
+ return m; \
+ }
#endif
diff --git a/Modules/cjkcodecs/emu_jisx0213_2000.h b/Modules/cjkcodecs/emu_jisx0213_2000.h
index 250c673..4227fb2 100644
--- a/Modules/cjkcodecs/emu_jisx0213_2000.h
+++ b/Modules/cjkcodecs/emu_jisx0213_2000.h
@@ -5,39 +5,39 @@
#define EMULATE_JISX0213_2000_ENCODE_INVALID 1
#endif
-#define EMULATE_JISX0213_2000_ENCODE_BMP(assi, c) \
- if (config == (void *)2000 && ( \
- (c) == 0x9B1C || (c) == 0x4FF1 || \
- (c) == 0x525D || (c) == 0x541E || \
- (c) == 0x5653 || (c) == 0x59F8 || \
- (c) == 0x5C5B || (c) == 0x5E77 || \
- (c) == 0x7626 || (c) == 0x7E6B)) \
- return EMULATE_JISX0213_2000_ENCODE_INVALID; \
- else if (config == (void *)2000 && (c) == 0x9B1D) \
- (assi) = 0x8000 | 0x7d3b; \
+#define EMULATE_JISX0213_2000_ENCODE_BMP(assi, c) \
+ if (config == (void *)2000 && ( \
+ (c) == 0x9B1C || (c) == 0x4FF1 || \
+ (c) == 0x525D || (c) == 0x541E || \
+ (c) == 0x5653 || (c) == 0x59F8 || \
+ (c) == 0x5C5B || (c) == 0x5E77 || \
+ (c) == 0x7626 || (c) == 0x7E6B)) \
+ return EMULATE_JISX0213_2000_ENCODE_INVALID; \
+ else if (config == (void *)2000 && (c) == 0x9B1D) \
+ (assi) = 0x8000 | 0x7d3b; \
-#define EMULATE_JISX0213_2000_ENCODE_EMP(assi, c) \
- if (config == (void *)2000 && (c) == 0x20B9F) \
- return EMULATE_JISX0213_2000_ENCODE_INVALID;
+#define EMULATE_JISX0213_2000_ENCODE_EMP(assi, c) \
+ if (config == (void *)2000 && (c) == 0x20B9F) \
+ return EMULATE_JISX0213_2000_ENCODE_INVALID;
#ifndef EMULATE_JISX0213_2000_DECODE_INVALID
#define EMULATE_JISX0213_2000_DECODE_INVALID 2
#endif
-#define EMULATE_JISX0213_2000_DECODE_PLANE1(assi, c1, c2) \
- if (config == (void *)2000 && \
- (((c1) == 0x2E && (c2) == 0x21) || \
- ((c1) == 0x2F && (c2) == 0x7E) || \
- ((c1) == 0x4F && (c2) == 0x54) || \
- ((c1) == 0x4F && (c2) == 0x7E) || \
- ((c1) == 0x74 && (c2) == 0x27) || \
- ((c1) == 0x7E && (c2) == 0x7A) || \
- ((c1) == 0x7E && (c2) == 0x7B) || \
- ((c1) == 0x7E && (c2) == 0x7C) || \
- ((c1) == 0x7E && (c2) == 0x7D) || \
- ((c1) == 0x7E && (c2) == 0x7E))) \
- return EMULATE_JISX0213_2000_DECODE_INVALID;
+#define EMULATE_JISX0213_2000_DECODE_PLANE1(assi, c1, c2) \
+ if (config == (void *)2000 && \
+ (((c1) == 0x2E && (c2) == 0x21) || \
+ ((c1) == 0x2F && (c2) == 0x7E) || \
+ ((c1) == 0x4F && (c2) == 0x54) || \
+ ((c1) == 0x4F && (c2) == 0x7E) || \
+ ((c1) == 0x74 && (c2) == 0x27) || \
+ ((c1) == 0x7E && (c2) == 0x7A) || \
+ ((c1) == 0x7E && (c2) == 0x7B) || \
+ ((c1) == 0x7E && (c2) == 0x7C) || \
+ ((c1) == 0x7E && (c2) == 0x7D) || \
+ ((c1) == 0x7E && (c2) == 0x7E))) \
+ return EMULATE_JISX0213_2000_DECODE_INVALID;
-#define EMULATE_JISX0213_2000_DECODE_PLANE2(assi, c1, c2) \
- if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B) \
- (assi) = 0x9B1D;
+#define EMULATE_JISX0213_2000_DECODE_PLANE2(assi, c1, c2) \
+ if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B) \
+ (assi) = 0x9B1D;
diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c
index 1735dfd..af7ea5b 100644
--- a/Modules/cjkcodecs/multibytecodec.c
+++ b/Modules/cjkcodecs/multibytecodec.c
@@ -45,179 +45,179 @@ static char *incrementalkwarglist[] = {"input", "final", NULL};
static char *streamkwarglist[] = {"stream", "errors", NULL};
static PyObject *multibytecodec_encode(MultibyteCodec *,
- MultibyteCodec_State *, const Py_UNICODE **, Py_ssize_t,
- PyObject *, int);
+ MultibyteCodec_State *, const Py_UNICODE **, Py_ssize_t,
+ PyObject *, int);
-#define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */
+#define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */
static PyObject *
make_tuple(PyObject *object, Py_ssize_t len)
{
- PyObject *v, *w;
-
- if (object == NULL)
- return NULL;
-
- v = PyTuple_New(2);
- if (v == NULL) {
- Py_DECREF(object);
- return NULL;
- }
- PyTuple_SET_ITEM(v, 0, object);
-
- w = PyLong_FromSsize_t(len);
- if (w == NULL) {
- Py_DECREF(v);
- return NULL;
- }
- PyTuple_SET_ITEM(v, 1, w);
-
- return v;
+ PyObject *v, *w;
+
+ if (object == NULL)
+ return NULL;
+
+ v = PyTuple_New(2);
+ if (v == NULL) {
+ Py_DECREF(object);
+ return NULL;
+ }
+ PyTuple_SET_ITEM(v, 0, object);
+
+ w = PyLong_FromSsize_t(len);
+ if (w == NULL) {
+ Py_DECREF(v);
+ return NULL;
+ }
+ PyTuple_SET_ITEM(v, 1, w);
+
+ return v;
}
static PyObject *
internal_error_callback(const char *errors)
{
- if (errors == NULL || strcmp(errors, "strict") == 0)
- return ERROR_STRICT;
- else if (strcmp(errors, "ignore") == 0)
- return ERROR_IGNORE;
- else if (strcmp(errors, "replace") == 0)
- return ERROR_REPLACE;
- else
- return PyUnicode_FromString(errors);
+ if (errors == NULL || strcmp(errors, "strict") == 0)
+ return ERROR_STRICT;
+ else if (strcmp(errors, "ignore") == 0)
+ return ERROR_IGNORE;
+ else if (strcmp(errors, "replace") == 0)
+ return ERROR_REPLACE;
+ else
+ return PyUnicode_FromString(errors);
}
static PyObject *
call_error_callback(PyObject *errors, PyObject *exc)
{
- PyObject *args, *cb, *r;
- const char *str;
-
- assert(PyUnicode_Check(errors));
- str = _PyUnicode_AsString(errors);
- if (str == NULL)
- return NULL;
- cb = PyCodec_LookupError(str);
- if (cb == NULL)
- return NULL;
-
- args = PyTuple_New(1);
- if (args == NULL) {
- Py_DECREF(cb);
- return NULL;
- }
-
- PyTuple_SET_ITEM(args, 0, exc);
- Py_INCREF(exc);
-
- r = PyObject_CallObject(cb, args);
- Py_DECREF(args);
- Py_DECREF(cb);
- return r;
+ PyObject *args, *cb, *r;
+ const char *str;
+
+ assert(PyUnicode_Check(errors));
+ str = _PyUnicode_AsString(errors);
+ if (str == NULL)
+ return NULL;
+ cb = PyCodec_LookupError(str);
+ if (cb == NULL)
+ return NULL;
+
+ args = PyTuple_New(1);
+ if (args == NULL) {
+ Py_DECREF(cb);
+ return NULL;
+ }
+
+ PyTuple_SET_ITEM(args, 0, exc);
+ Py_INCREF(exc);
+
+ r = PyObject_CallObject(cb, args);
+ Py_DECREF(args);
+ Py_DECREF(cb);
+ return r;
}
static PyObject *
codecctx_errors_get(MultibyteStatefulCodecContext *self)
{
- const char *errors;
-
- if (self->errors == ERROR_STRICT)
- errors = "strict";
- else if (self->errors == ERROR_IGNORE)
- errors = "ignore";
- else if (self->errors == ERROR_REPLACE)
- errors = "replace";
- else {
- Py_INCREF(self->errors);
- return self->errors;
- }
-
- return PyUnicode_FromString(errors);
+ const char *errors;
+
+ if (self->errors == ERROR_STRICT)
+ errors = "strict";
+ else if (self->errors == ERROR_IGNORE)
+ errors = "ignore";
+ else if (self->errors == ERROR_REPLACE)
+ errors = "replace";
+ else {
+ Py_INCREF(self->errors);
+ return self->errors;
+ }
+
+ return PyUnicode_FromString(errors);
}
static int
codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,
- void *closure)
+ void *closure)
{
- PyObject *cb;
- const char *str;
+ PyObject *cb;
+ const char *str;
- if (!PyUnicode_Check(value)) {
- PyErr_SetString(PyExc_TypeError, "errors must be a string");
- return -1;
- }
+ if (!PyUnicode_Check(value)) {
+ PyErr_SetString(PyExc_TypeError, "errors must be a string");
+ return -1;
+ }
- str = _PyUnicode_AsString(value);
- if (str == NULL)
- return -1;
+ str = _PyUnicode_AsString(value);
+ if (str == NULL)
+ return -1;
- cb = internal_error_callback(str);
- if (cb == NULL)
- return -1;
+ cb = internal_error_callback(str);
+ if (cb == NULL)
+ return -1;
- ERROR_DECREF(self->errors);
- self->errors = cb;
- return 0;
+ ERROR_DECREF(self->errors);
+ self->errors = cb;
+ return 0;
}
/* This getset handlers list is used by all the stateful codec objects */
static PyGetSetDef codecctx_getsets[] = {
- {"errors", (getter)codecctx_errors_get,
- (setter)codecctx_errors_set,
- PyDoc_STR("how to treat errors")},
- {NULL,}
+ {"errors", (getter)codecctx_errors_get,
+ (setter)codecctx_errors_set,
+ PyDoc_STR("how to treat errors")},
+ {NULL,}
};
static int
expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
{
- Py_ssize_t orgpos, orgsize, incsize;
+ Py_ssize_t orgpos, orgsize, incsize;
- orgpos = (Py_ssize_t)((char *)buf->outbuf -
- PyBytes_AS_STRING(buf->outobj));
- orgsize = PyBytes_GET_SIZE(buf->outobj);
- incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize);
+ orgpos = (Py_ssize_t)((char *)buf->outbuf -
+ PyBytes_AS_STRING(buf->outobj));
+ orgsize = PyBytes_GET_SIZE(buf->outobj);
+ incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize);
- if (orgsize > PY_SSIZE_T_MAX - incsize)
- return -1;
+ if (orgsize > PY_SSIZE_T_MAX - incsize)
+ return -1;
- if (_PyBytes_Resize(&buf->outobj, orgsize + incsize) == -1)
- return -1;
+ if (_PyBytes_Resize(&buf->outobj, orgsize + incsize) == -1)
+ return -1;
- buf->outbuf = (unsigned char *)PyBytes_AS_STRING(buf->outobj) +orgpos;
- buf->outbuf_end = (unsigned char *)PyBytes_AS_STRING(buf->outobj)
- + PyBytes_GET_SIZE(buf->outobj);
+ buf->outbuf = (unsigned char *)PyBytes_AS_STRING(buf->outobj) +orgpos;
+ buf->outbuf_end = (unsigned char *)PyBytes_AS_STRING(buf->outobj)
+ + PyBytes_GET_SIZE(buf->outobj);
- return 0;
+ return 0;
}
-#define REQUIRE_ENCODEBUFFER(buf, s) { \
- if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \
- if (expand_encodebuffer(buf, s) == -1) \
- goto errorexit; \
+#define REQUIRE_ENCODEBUFFER(buf, s) { \
+ if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \
+ if (expand_encodebuffer(buf, s) == -1) \
+ goto errorexit; \
}
static int
expand_decodebuffer(MultibyteDecodeBuffer *buf, Py_ssize_t esize)
{
- Py_ssize_t orgpos, orgsize;
+ Py_ssize_t orgpos, orgsize;
- orgpos = (Py_ssize_t)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj));
- orgsize = PyUnicode_GET_SIZE(buf->outobj);
- if (PyUnicode_Resize(&buf->outobj, orgsize + (
- esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1)
- return -1;
+ orgpos = (Py_ssize_t)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj));
+ orgsize = PyUnicode_GET_SIZE(buf->outobj);
+ if (PyUnicode_Resize(&buf->outobj, orgsize + (
+ esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1)
+ return -1;
- buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj) + orgpos;
- buf->outbuf_end = PyUnicode_AS_UNICODE(buf->outobj)
- + PyUnicode_GET_SIZE(buf->outobj);
+ buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj) + orgpos;
+ buf->outbuf_end = PyUnicode_AS_UNICODE(buf->outobj)
+ + PyUnicode_GET_SIZE(buf->outobj);
- return 0;
+ return 0;
}
-#define REQUIRE_DECODEBUFFER(buf, s) { \
- if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \
- if (expand_decodebuffer(buf, s) == -1) \
- goto errorexit; \
+#define REQUIRE_DECODEBUFFER(buf, s) { \
+ if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \
+ if (expand_decodebuffer(buf, s) == -1) \
+ goto errorexit; \
}
@@ -227,504 +227,504 @@ expand_decodebuffer(MultibyteDecodeBuffer *buf, Py_ssize_t esize)
static int
multibytecodec_encerror(MultibyteCodec *codec,
- MultibyteCodec_State *state,
- MultibyteEncodeBuffer *buf,
- PyObject *errors, Py_ssize_t e)
+ MultibyteCodec_State *state,
+ MultibyteEncodeBuffer *buf,
+ PyObject *errors, Py_ssize_t e)
{
- PyObject *retobj = NULL, *retstr = NULL, *tobj;
- Py_ssize_t retstrsize, newpos;
- Py_ssize_t esize, start, end;
- const char *reason;
-
- if (e > 0) {
- reason = "illegal multibyte sequence";
- esize = e;
- }
- else {
- switch (e) {
- case MBERR_TOOSMALL:
- REQUIRE_ENCODEBUFFER(buf, -1);
- return 0; /* retry it */
- case MBERR_TOOFEW:
- reason = "incomplete multibyte sequence";
- esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
- break;
- case MBERR_INTERNAL:
- PyErr_SetString(PyExc_RuntimeError,
- "internal codec error");
- return -1;
- default:
- PyErr_SetString(PyExc_RuntimeError,
- "unknown runtime error");
- return -1;
- }
- }
-
- if (errors == ERROR_REPLACE) {
- const Py_UNICODE replchar = '?', *inbuf = &replchar;
- Py_ssize_t r;
-
- for (;;) {
- Py_ssize_t outleft;
-
- outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
- r = codec->encode(state, codec->config, &inbuf, 1,
- &buf->outbuf, outleft, 0);
- if (r == MBERR_TOOSMALL) {
- REQUIRE_ENCODEBUFFER(buf, -1);
- continue;
- }
- else
- break;
- }
-
- if (r != 0) {
- REQUIRE_ENCODEBUFFER(buf, 1);
- *buf->outbuf++ = '?';
- }
- }
- if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
- buf->inbuf += esize;
- return 0;
- }
-
- start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
- end = start + esize;
-
- /* use cached exception object if available */
- if (buf->excobj == NULL) {
- buf->excobj = PyUnicodeEncodeError_Create(codec->encoding,
- buf->inbuf_top,
- buf->inbuf_end - buf->inbuf_top,
- start, end, reason);
- if (buf->excobj == NULL)
- goto errorexit;
- }
- else
- if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 ||
- PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 ||
- PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0)
- goto errorexit;
-
- if (errors == ERROR_STRICT) {
- PyCodec_StrictErrors(buf->excobj);
- goto errorexit;
- }
-
- retobj = call_error_callback(errors, buf->excobj);
- if (retobj == NULL)
- goto errorexit;
-
- if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
- !PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) ||
- !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
- PyErr_SetString(PyExc_TypeError,
- "encoding error handler must return "
- "(unicode, int) tuple");
- goto errorexit;
- }
-
- {
- const Py_UNICODE *uraw = PyUnicode_AS_UNICODE(tobj);
-
- retstr = multibytecodec_encode(codec, state, &uraw,
- PyUnicode_GET_SIZE(tobj), ERROR_STRICT,
- MBENC_FLUSH);
- if (retstr == NULL)
- goto errorexit;
- }
-
- assert(PyBytes_Check(retstr));
- retstrsize = PyBytes_GET_SIZE(retstr);
- REQUIRE_ENCODEBUFFER(buf, retstrsize);
-
- memcpy(buf->outbuf, PyBytes_AS_STRING(retstr), retstrsize);
- buf->outbuf += retstrsize;
-
- newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
- if (newpos < 0 && !PyErr_Occurred())
- newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
- if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
- PyErr_Clear();
- PyErr_Format(PyExc_IndexError,
- "position %zd from error handler out of bounds",
- newpos);
- goto errorexit;
- }
- buf->inbuf = buf->inbuf_top + newpos;
-
- Py_DECREF(retobj);
- Py_DECREF(retstr);
- return 0;
+ PyObject *retobj = NULL, *retstr = NULL, *tobj;
+ Py_ssize_t retstrsize, newpos;
+ Py_ssize_t esize, start, end;
+ const char *reason;
+
+ if (e > 0) {
+ reason = "illegal multibyte sequence";
+ esize = e;
+ }
+ else {
+ switch (e) {
+ case MBERR_TOOSMALL:
+ REQUIRE_ENCODEBUFFER(buf, -1);
+ return 0; /* retry it */
+ case MBERR_TOOFEW:
+ reason = "incomplete multibyte sequence";
+ esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
+ break;
+ case MBERR_INTERNAL:
+ PyErr_SetString(PyExc_RuntimeError,
+ "internal codec error");
+ return -1;
+ default:
+ PyErr_SetString(PyExc_RuntimeError,
+ "unknown runtime error");
+ return -1;
+ }
+ }
+
+ if (errors == ERROR_REPLACE) {
+ const Py_UNICODE replchar = '?', *inbuf = &replchar;
+ Py_ssize_t r;
+
+ for (;;) {
+ Py_ssize_t outleft;
+
+ outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
+ r = codec->encode(state, codec->config, &inbuf, 1,
+ &buf->outbuf, outleft, 0);
+ if (r == MBERR_TOOSMALL) {
+ REQUIRE_ENCODEBUFFER(buf, -1);
+ continue;
+ }
+ else
+ break;
+ }
+
+ if (r != 0) {
+ REQUIRE_ENCODEBUFFER(buf, 1);
+ *buf->outbuf++ = '?';
+ }
+ }
+ if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
+ buf->inbuf += esize;
+ return 0;
+ }
+
+ start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
+ end = start + esize;
+
+ /* use cached exception object if available */
+ if (buf->excobj == NULL) {
+ buf->excobj = PyUnicodeEncodeError_Create(codec->encoding,
+ buf->inbuf_top,
+ buf->inbuf_end - buf->inbuf_top,
+ start, end, reason);
+ if (buf->excobj == NULL)
+ goto errorexit;
+ }
+ else
+ if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 ||
+ PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 ||
+ PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0)
+ goto errorexit;
+
+ if (errors == ERROR_STRICT) {
+ PyCodec_StrictErrors(buf->excobj);
+ goto errorexit;
+ }
+
+ retobj = call_error_callback(errors, buf->excobj);
+ if (retobj == NULL)
+ goto errorexit;
+
+ if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
+ !PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) ||
+ !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
+ PyErr_SetString(PyExc_TypeError,
+ "encoding error handler must return "
+ "(unicode, int) tuple");
+ goto errorexit;
+ }
+
+ {
+ const Py_UNICODE *uraw = PyUnicode_AS_UNICODE(tobj);
+
+ retstr = multibytecodec_encode(codec, state, &uraw,
+ PyUnicode_GET_SIZE(tobj), ERROR_STRICT,
+ MBENC_FLUSH);
+ if (retstr == NULL)
+ goto errorexit;
+ }
+
+ assert(PyBytes_Check(retstr));
+ retstrsize = PyBytes_GET_SIZE(retstr);
+ REQUIRE_ENCODEBUFFER(buf, retstrsize);
+
+ memcpy(buf->outbuf, PyBytes_AS_STRING(retstr), retstrsize);
+ buf->outbuf += retstrsize;
+
+ newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
+ if (newpos < 0 && !PyErr_Occurred())
+ newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
+ if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
+ PyErr_Clear();
+ PyErr_Format(PyExc_IndexError,
+ "position %zd from error handler out of bounds",
+ newpos);
+ goto errorexit;
+ }
+ buf->inbuf = buf->inbuf_top + newpos;
+
+ Py_DECREF(retobj);
+ Py_DECREF(retstr);
+ return 0;
errorexit:
- Py_XDECREF(retobj);
- Py_XDECREF(retstr);
- return -1;
+ Py_XDECREF(retobj);
+ Py_XDECREF(retstr);
+ return -1;
}
static int
multibytecodec_decerror(MultibyteCodec *codec,
- MultibyteCodec_State *state,
- MultibyteDecodeBuffer *buf,
- PyObject *errors, Py_ssize_t e)
+ MultibyteCodec_State *state,
+ MultibyteDecodeBuffer *buf,
+ PyObject *errors, Py_ssize_t e)
{
- PyObject *retobj = NULL, *retuni = NULL;
- Py_ssize_t retunisize, newpos;
- const char *reason;
- Py_ssize_t esize, start, end;
-
- if (e > 0) {
- reason = "illegal multibyte sequence";
- esize = e;
- }
- else {
- switch (e) {
- case MBERR_TOOSMALL:
- REQUIRE_DECODEBUFFER(buf, -1);
- return 0; /* retry it */
- case MBERR_TOOFEW:
- reason = "incomplete multibyte sequence";
- esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
- break;
- case MBERR_INTERNAL:
- PyErr_SetString(PyExc_RuntimeError,
- "internal codec error");
- return -1;
- default:
- PyErr_SetString(PyExc_RuntimeError,
- "unknown runtime error");
- return -1;
- }
- }
-
- if (errors == ERROR_REPLACE) {
- REQUIRE_DECODEBUFFER(buf, 1);
- *buf->outbuf++ = Py_UNICODE_REPLACEMENT_CHARACTER;
- }
- if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
- buf->inbuf += esize;
- return 0;
- }
-
- start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
- end = start + esize;
-
- /* use cached exception object if available */
- if (buf->excobj == NULL) {
- buf->excobj = PyUnicodeDecodeError_Create(codec->encoding,
- (const char *)buf->inbuf_top,
- (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top),
- start, end, reason);
- if (buf->excobj == NULL)
- goto errorexit;
- }
- else
- if (PyUnicodeDecodeError_SetStart(buf->excobj, start) ||
- PyUnicodeDecodeError_SetEnd(buf->excobj, end) ||
- PyUnicodeDecodeError_SetReason(buf->excobj, reason))
- goto errorexit;
-
- if (errors == ERROR_STRICT) {
- PyCodec_StrictErrors(buf->excobj);
- goto errorexit;
- }
-
- retobj = call_error_callback(errors, buf->excobj);
- if (retobj == NULL)
- goto errorexit;
-
- if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
- !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
- !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
- PyErr_SetString(PyExc_TypeError,
- "decoding error handler must return "
- "(unicode, int) tuple");
- goto errorexit;
- }
-
- retunisize = PyUnicode_GET_SIZE(retuni);
- if (retunisize > 0) {
- REQUIRE_DECODEBUFFER(buf, retunisize);
- memcpy((char *)buf->outbuf, PyUnicode_AS_DATA(retuni),
- retunisize * Py_UNICODE_SIZE);
- buf->outbuf += retunisize;
- }
-
- newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
- if (newpos < 0 && !PyErr_Occurred())
- newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
- if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
- PyErr_Clear();
- PyErr_Format(PyExc_IndexError,
- "position %zd from error handler out of bounds",
- newpos);
- goto errorexit;
- }
- buf->inbuf = buf->inbuf_top + newpos;
- Py_DECREF(retobj);
- return 0;
+ PyObject *retobj = NULL, *retuni = NULL;
+ Py_ssize_t retunisize, newpos;
+ const char *reason;
+ Py_ssize_t esize, start, end;
+
+ if (e > 0) {
+ reason = "illegal multibyte sequence";
+ esize = e;
+ }
+ else {
+ switch (e) {
+ case MBERR_TOOSMALL:
+ REQUIRE_DECODEBUFFER(buf, -1);
+ return 0; /* retry it */
+ case MBERR_TOOFEW:
+ reason = "incomplete multibyte sequence";
+ esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
+ break;
+ case MBERR_INTERNAL:
+ PyErr_SetString(PyExc_RuntimeError,
+ "internal codec error");
+ return -1;
+ default:
+ PyErr_SetString(PyExc_RuntimeError,
+ "unknown runtime error");
+ return -1;
+ }
+ }
+
+ if (errors == ERROR_REPLACE) {
+ REQUIRE_DECODEBUFFER(buf, 1);
+ *buf->outbuf++ = Py_UNICODE_REPLACEMENT_CHARACTER;
+ }
+ if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
+ buf->inbuf += esize;
+ return 0;
+ }
+
+ start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
+ end = start + esize;
+
+ /* use cached exception object if available */
+ if (buf->excobj == NULL) {
+ buf->excobj = PyUnicodeDecodeError_Create(codec->encoding,
+ (const char *)buf->inbuf_top,
+ (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top),
+ start, end, reason);
+ if (buf->excobj == NULL)
+ goto errorexit;
+ }
+ else
+ if (PyUnicodeDecodeError_SetStart(buf->excobj, start) ||
+ PyUnicodeDecodeError_SetEnd(buf->excobj, end) ||
+ PyUnicodeDecodeError_SetReason(buf->excobj, reason))
+ goto errorexit;
+
+ if (errors == ERROR_STRICT) {
+ PyCodec_StrictErrors(buf->excobj);
+ goto errorexit;
+ }
+
+ retobj = call_error_callback(errors, buf->excobj);
+ if (retobj == NULL)
+ goto errorexit;
+
+ if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
+ !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
+ !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
+ PyErr_SetString(PyExc_TypeError,
+ "decoding error handler must return "
+ "(unicode, int) tuple");
+ goto errorexit;
+ }
+
+ retunisize = PyUnicode_GET_SIZE(retuni);
+ if (retunisize > 0) {
+ REQUIRE_DECODEBUFFER(buf, retunisize);
+ memcpy((char *)buf->outbuf, PyUnicode_AS_DATA(retuni),
+ retunisize * Py_UNICODE_SIZE);
+ buf->outbuf += retunisize;
+ }
+
+ newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
+ if (newpos < 0 && !PyErr_Occurred())
+ newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
+ if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
+ PyErr_Clear();
+ PyErr_Format(PyExc_IndexError,
+ "position %zd from error handler out of bounds",
+ newpos);
+ goto errorexit;
+ }
+ buf->inbuf = buf->inbuf_top + newpos;
+ Py_DECREF(retobj);
+ return 0;
errorexit:
- Py_XDECREF(retobj);
- return -1;
+ Py_XDECREF(retobj);
+ return -1;
}
static PyObject *
multibytecodec_encode(MultibyteCodec *codec,
- MultibyteCodec_State *state,
- const Py_UNICODE **data, Py_ssize_t datalen,
- PyObject *errors, int flags)
+ MultibyteCodec_State *state,
+ const Py_UNICODE **data, Py_ssize_t datalen,
+ PyObject *errors, int flags)
{
- MultibyteEncodeBuffer buf;
- Py_ssize_t finalsize, r = 0;
-
- if (datalen == 0)
- return PyBytes_FromStringAndSize(NULL, 0);
-
- buf.excobj = NULL;
- buf.inbuf = buf.inbuf_top = *data;
- buf.inbuf_end = buf.inbuf_top + datalen;
-
- if (datalen > (PY_SSIZE_T_MAX - 16) / 2) {
- PyErr_NoMemory();
- goto errorexit;
- }
-
- buf.outobj = PyBytes_FromStringAndSize(NULL, datalen * 2 + 16);
- if (buf.outobj == NULL)
- goto errorexit;
- buf.outbuf = (unsigned char *)PyBytes_AS_STRING(buf.outobj);
- buf.outbuf_end = buf.outbuf + PyBytes_GET_SIZE(buf.outobj);
-
- while (buf.inbuf < buf.inbuf_end) {
- Py_ssize_t inleft, outleft;
-
- /* we don't reuse inleft and outleft here.
- * error callbacks can relocate the cursor anywhere on buffer*/
- inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
- outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
- r = codec->encode(state, codec->config, &buf.inbuf, inleft,
- &buf.outbuf, outleft, flags);
- if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH)))
- break;
- else if (multibytecodec_encerror(codec, state, &buf, errors,r))
- goto errorexit;
- else if (r == MBERR_TOOFEW)
- break;
- }
-
- if (codec->encreset != NULL)
- for (;;) {
- Py_ssize_t outleft;
-
- outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
- r = codec->encreset(state, codec->config, &buf.outbuf,
- outleft);
- if (r == 0)
- break;
- else if (multibytecodec_encerror(codec, state,
- &buf, errors, r))
- goto errorexit;
- }
-
- finalsize = (Py_ssize_t)((char *)buf.outbuf -
- PyBytes_AS_STRING(buf.outobj));
-
- if (finalsize != PyBytes_GET_SIZE(buf.outobj))
- if (_PyBytes_Resize(&buf.outobj, finalsize) == -1)
- goto errorexit;
-
- *data = buf.inbuf;
- Py_XDECREF(buf.excobj);
- return buf.outobj;
+ MultibyteEncodeBuffer buf;
+ Py_ssize_t finalsize, r = 0;
+
+ if (datalen == 0)
+ return PyBytes_FromStringAndSize(NULL, 0);
+
+ buf.excobj = NULL;
+ buf.inbuf = buf.inbuf_top = *data;
+ buf.inbuf_end = buf.inbuf_top + datalen;
+
+ if (datalen > (PY_SSIZE_T_MAX - 16) / 2) {
+ PyErr_NoMemory();
+ goto errorexit;
+ }
+
+ buf.outobj = PyBytes_FromStringAndSize(NULL, datalen * 2 + 16);
+ if (buf.outobj == NULL)
+ goto errorexit;
+ buf.outbuf = (unsigned char *)PyBytes_AS_STRING(buf.outobj);
+ buf.outbuf_end = buf.outbuf + PyBytes_GET_SIZE(buf.outobj);
+
+ while (buf.inbuf < buf.inbuf_end) {
+ Py_ssize_t inleft, outleft;
+
+ /* we don't reuse inleft and outleft here.
+ * error callbacks can relocate the cursor anywhere on buffer*/
+ inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
+ outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
+ r = codec->encode(state, codec->config, &buf.inbuf, inleft,
+ &buf.outbuf, outleft, flags);
+ if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH)))
+ break;
+ else if (multibytecodec_encerror(codec, state, &buf, errors,r))
+ goto errorexit;
+ else if (r == MBERR_TOOFEW)
+ break;
+ }
+
+ if (codec->encreset != NULL)
+ for (;;) {
+ Py_ssize_t outleft;
+
+ outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
+ r = codec->encreset(state, codec->config, &buf.outbuf,
+ outleft);
+ if (r == 0)
+ break;
+ else if (multibytecodec_encerror(codec, state,
+ &buf, errors, r))
+ goto errorexit;
+ }
+
+ finalsize = (Py_ssize_t)((char *)buf.outbuf -
+ PyBytes_AS_STRING(buf.outobj));
+
+ if (finalsize != PyBytes_GET_SIZE(buf.outobj))
+ if (_PyBytes_Resize(&buf.outobj, finalsize) == -1)
+ goto errorexit;
+
+ *data = buf.inbuf;
+ Py_XDECREF(buf.excobj);
+ return buf.outobj;
errorexit:
- Py_XDECREF(buf.excobj);
- Py_XDECREF(buf.outobj);
- return NULL;
+ Py_XDECREF(buf.excobj);
+ Py_XDECREF(buf.outobj);
+ return NULL;
}
static PyObject *
MultibyteCodec_Encode(MultibyteCodecObject *self,
- PyObject *args, PyObject *kwargs)
+ PyObject *args, PyObject *kwargs)
{
- MultibyteCodec_State state;
- Py_UNICODE *data;
- PyObject *errorcb, *r, *arg, *ucvt;
- const char *errors = NULL;
- Py_ssize_t datalen;
-
- if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|z:encode",
- codeckwarglist, &arg, &errors))
- return NULL;
-
- if (PyUnicode_Check(arg))
- ucvt = NULL;
- else {
- arg = ucvt = PyObject_Str(arg);
- if (arg == NULL)
- return NULL;
- else if (!PyUnicode_Check(arg)) {
- PyErr_SetString(PyExc_TypeError,
- "couldn't convert the object to unicode.");
- Py_DECREF(ucvt);
- return NULL;
- }
- }
-
- data = PyUnicode_AS_UNICODE(arg);
- datalen = PyUnicode_GET_SIZE(arg);
-
- errorcb = internal_error_callback(errors);
- if (errorcb == NULL) {
- Py_XDECREF(ucvt);
- return NULL;
- }
-
- if (self->codec->encinit != NULL &&
- self->codec->encinit(&state, self->codec->config) != 0)
- goto errorexit;
- r = multibytecodec_encode(self->codec, &state,
- (const Py_UNICODE **)&data, datalen, errorcb,
- MBENC_FLUSH | MBENC_RESET);
- if (r == NULL)
- goto errorexit;
-
- ERROR_DECREF(errorcb);
- Py_XDECREF(ucvt);
- return make_tuple(r, datalen);
+ MultibyteCodec_State state;
+ Py_UNICODE *data;
+ PyObject *errorcb, *r, *arg, *ucvt;
+ const char *errors = NULL;
+ Py_ssize_t datalen;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|z:encode",
+ codeckwarglist, &arg, &errors))
+ return NULL;
+
+ if (PyUnicode_Check(arg))
+ ucvt = NULL;
+ else {
+ arg = ucvt = PyObject_Str(arg);
+ if (arg == NULL)
+ return NULL;
+ else if (!PyUnicode_Check(arg)) {
+ PyErr_SetString(PyExc_TypeError,
+ "couldn't convert the object to unicode.");
+ Py_DECREF(ucvt);
+ return NULL;
+ }
+ }
+
+ data = PyUnicode_AS_UNICODE(arg);
+ datalen = PyUnicode_GET_SIZE(arg);
+
+ errorcb = internal_error_callback(errors);
+ if (errorcb == NULL) {
+ Py_XDECREF(ucvt);
+ return NULL;
+ }
+
+ if (self->codec->encinit != NULL &&
+ self->codec->encinit(&state, self->codec->config) != 0)
+ goto errorexit;
+ r = multibytecodec_encode(self->codec, &state,
+ (const Py_UNICODE **)&data, datalen, errorcb,
+ MBENC_FLUSH | MBENC_RESET);
+ if (r == NULL)
+ goto errorexit;
+
+ ERROR_DECREF(errorcb);
+ Py_XDECREF(ucvt);
+ return make_tuple(r, datalen);
errorexit:
- ERROR_DECREF(errorcb);
- Py_XDECREF(ucvt);
- return NULL;
+ ERROR_DECREF(errorcb);
+ Py_XDECREF(ucvt);
+ return NULL;
}
static PyObject *
MultibyteCodec_Decode(MultibyteCodecObject *self,
- PyObject *args, PyObject *kwargs)
+ PyObject *args, PyObject *kwargs)
{
- MultibyteCodec_State state;
- MultibyteDecodeBuffer buf;
- PyObject *errorcb;
- Py_buffer pdata;
- const char *data, *errors = NULL;
- Py_ssize_t datalen, finalsize;
-
- if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|z:decode",
- codeckwarglist, &pdata, &errors))
- return NULL;
- data = pdata.buf;
- datalen = pdata.len;
-
- errorcb = internal_error_callback(errors);
- if (errorcb == NULL) {
- PyBuffer_Release(&pdata);
- return NULL;
- }
-
- if (datalen == 0) {
- PyBuffer_Release(&pdata);
- ERROR_DECREF(errorcb);
- return make_tuple(PyUnicode_FromUnicode(NULL, 0), 0);
- }
-
- buf.excobj = NULL;
- buf.inbuf = buf.inbuf_top = (unsigned char *)data;
- buf.inbuf_end = buf.inbuf_top + datalen;
- buf.outobj = PyUnicode_FromUnicode(NULL, datalen);
- if (buf.outobj == NULL)
- goto errorexit;
- buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj);
- buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj);
-
- if (self->codec->decinit != NULL &&
- self->codec->decinit(&state, self->codec->config) != 0)
- goto errorexit;
-
- while (buf.inbuf < buf.inbuf_end) {
- Py_ssize_t inleft, outleft, r;
-
- inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
- outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
-
- r = self->codec->decode(&state, self->codec->config,
- &buf.inbuf, inleft, &buf.outbuf, outleft);
- if (r == 0)
- break;
- else if (multibytecodec_decerror(self->codec, &state,
- &buf, errorcb, r))
- goto errorexit;
- }
-
- finalsize = (Py_ssize_t)(buf.outbuf -
- PyUnicode_AS_UNICODE(buf.outobj));
-
- if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
- if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
- goto errorexit;
-
- PyBuffer_Release(&pdata);
- Py_XDECREF(buf.excobj);
- ERROR_DECREF(errorcb);
- return make_tuple(buf.outobj, datalen);
+ MultibyteCodec_State state;
+ MultibyteDecodeBuffer buf;
+ PyObject *errorcb;
+ Py_buffer pdata;
+ const char *data, *errors = NULL;
+ Py_ssize_t datalen, finalsize;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|z:decode",
+ codeckwarglist, &pdata, &errors))
+ return NULL;
+ data = pdata.buf;
+ datalen = pdata.len;
+
+ errorcb = internal_error_callback(errors);
+ if (errorcb == NULL) {
+ PyBuffer_Release(&pdata);
+ return NULL;
+ }
+
+ if (datalen == 0) {
+ PyBuffer_Release(&pdata);
+ ERROR_DECREF(errorcb);
+ return make_tuple(PyUnicode_FromUnicode(NULL, 0), 0);
+ }
+
+ buf.excobj = NULL;
+ buf.inbuf = buf.inbuf_top = (unsigned char *)data;
+ buf.inbuf_end = buf.inbuf_top + datalen;
+ buf.outobj = PyUnicode_FromUnicode(NULL, datalen);
+ if (buf.outobj == NULL)
+ goto errorexit;
+ buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj);
+ buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj);
+
+ if (self->codec->decinit != NULL &&
+ self->codec->decinit(&state, self->codec->config) != 0)
+ goto errorexit;
+
+ while (buf.inbuf < buf.inbuf_end) {
+ Py_ssize_t inleft, outleft, r;
+
+ inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
+ outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
+
+ r = self->codec->decode(&state, self->codec->config,
+ &buf.inbuf, inleft, &buf.outbuf, outleft);
+ if (r == 0)
+ break;
+ else if (multibytecodec_decerror(self->codec, &state,
+ &buf, errorcb, r))
+ goto errorexit;
+ }
+
+ finalsize = (Py_ssize_t)(buf.outbuf -
+ PyUnicode_AS_UNICODE(buf.outobj));
+
+ if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
+ if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
+ goto errorexit;
+
+ PyBuffer_Release(&pdata);
+ Py_XDECREF(buf.excobj);
+ ERROR_DECREF(errorcb);
+ return make_tuple(buf.outobj, datalen);
errorexit:
- PyBuffer_Release(&pdata);
- ERROR_DECREF(errorcb);
- Py_XDECREF(buf.excobj);
- Py_XDECREF(buf.outobj);
+ PyBuffer_Release(&pdata);
+ ERROR_DECREF(errorcb);
+ Py_XDECREF(buf.excobj);
+ Py_XDECREF(buf.outobj);
- return NULL;
+ return NULL;
}
static struct PyMethodDef multibytecodec_methods[] = {
- {"encode", (PyCFunction)MultibyteCodec_Encode,
- METH_VARARGS | METH_KEYWORDS,
- MultibyteCodec_Encode__doc__},
- {"decode", (PyCFunction)MultibyteCodec_Decode,
- METH_VARARGS | METH_KEYWORDS,
- MultibyteCodec_Decode__doc__},
- {NULL, NULL},
+ {"encode", (PyCFunction)MultibyteCodec_Encode,
+ METH_VARARGS | METH_KEYWORDS,
+ MultibyteCodec_Encode__doc__},
+ {"decode", (PyCFunction)MultibyteCodec_Decode,
+ METH_VARARGS | METH_KEYWORDS,
+ MultibyteCodec_Decode__doc__},
+ {NULL, NULL},
};
static void
multibytecodec_dealloc(MultibyteCodecObject *self)
{
- PyObject_Del(self);
+ PyObject_Del(self);
}
static PyTypeObject MultibyteCodec_Type = {
- PyVarObject_HEAD_INIT(NULL, 0)
- "MultibyteCodec", /* tp_name */
- sizeof(MultibyteCodecObject), /* tp_basicsize */
- 0, /* tp_itemsize */
- /* methods */
- (destructor)multibytecodec_dealloc, /* tp_dealloc */
- 0, /* tp_print */
- 0, /* tp_getattr */
- 0, /* tp_setattr */
- 0, /* tp_reserved */
- 0, /* tp_repr */
- 0, /* tp_as_number */
- 0, /* tp_as_sequence */
- 0, /* tp_as_mapping */
- 0, /* tp_hash */
- 0, /* tp_call */
- 0, /* tp_str */
- PyObject_GenericGetAttr, /* tp_getattro */
- 0, /* tp_setattro */
- 0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT, /* tp_flags */
- 0, /* tp_doc */
- 0, /* tp_traverse */
- 0, /* tp_clear */
- 0, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
- 0, /* tp_iter */
- 0, /* tp_iterext */
- multibytecodec_methods, /* tp_methods */
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "MultibyteCodec", /* tp_name */
+ sizeof(MultibyteCodecObject), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ /* methods */
+ (destructor)multibytecodec_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_reserved */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ PyObject_GenericGetAttr, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ 0, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iterext */
+ multibytecodec_methods, /* tp_methods */
};
@@ -732,150 +732,150 @@ static PyTypeObject MultibyteCodec_Type = {
* Utility functions for stateful codec mechanism
*/
-#define STATEFUL_DCTX(o) ((MultibyteStatefulDecoderContext *)(o))
-#define STATEFUL_ECTX(o) ((MultibyteStatefulEncoderContext *)(o))
+#define STATEFUL_DCTX(o) ((MultibyteStatefulDecoderContext *)(o))
+#define STATEFUL_ECTX(o) ((MultibyteStatefulEncoderContext *)(o))
static PyObject *
encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
- PyObject *unistr, int final)
+ PyObject *unistr, int final)
{
- PyObject *ucvt, *r = NULL;
- Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL;
- Py_ssize_t datalen, origpending;
-
- if (PyUnicode_Check(unistr))
- ucvt = NULL;
- else {
- unistr = ucvt = PyObject_Str(unistr);
- if (unistr == NULL)
- return NULL;
- else if (!PyUnicode_Check(unistr)) {
- PyErr_SetString(PyExc_TypeError,
- "couldn't convert the object to unicode.");
- Py_DECREF(ucvt);
- return NULL;
- }
- }
-
- datalen = PyUnicode_GET_SIZE(unistr);
- origpending = ctx->pendingsize;
-
- if (origpending > 0) {
- if (datalen > PY_SSIZE_T_MAX - ctx->pendingsize) {
- PyErr_NoMemory();
- /* inbuf_tmp == NULL */
- goto errorexit;
- }
- inbuf_tmp = PyMem_New(Py_UNICODE, datalen + ctx->pendingsize);
- if (inbuf_tmp == NULL)
- goto errorexit;
- memcpy(inbuf_tmp, ctx->pending,
- Py_UNICODE_SIZE * ctx->pendingsize);
- memcpy(inbuf_tmp + ctx->pendingsize,
- PyUnicode_AS_UNICODE(unistr),
- Py_UNICODE_SIZE * datalen);
- datalen += ctx->pendingsize;
- ctx->pendingsize = 0;
- inbuf = inbuf_tmp;
- }
- else
- inbuf = (Py_UNICODE *)PyUnicode_AS_UNICODE(unistr);
-
- inbuf_end = inbuf + datalen;
-
- r = multibytecodec_encode(ctx->codec, &ctx->state,
- (const Py_UNICODE **)&inbuf,
- datalen, ctx->errors, final ? MBENC_FLUSH : 0);
- if (r == NULL) {
- /* recover the original pending buffer */
- if (origpending > 0)
- memcpy(ctx->pending, inbuf_tmp,
- Py_UNICODE_SIZE * origpending);
- ctx->pendingsize = origpending;
- goto errorexit;
- }
-
- if (inbuf < inbuf_end) {
- ctx->pendingsize = (Py_ssize_t)(inbuf_end - inbuf);
- if (ctx->pendingsize > MAXENCPENDING) {
- /* normal codecs can't reach here */
- ctx->pendingsize = 0;
- PyErr_SetString(PyExc_UnicodeError,
- "pending buffer overflow");
- goto errorexit;
- }
- memcpy(ctx->pending, inbuf,
- ctx->pendingsize * Py_UNICODE_SIZE);
- }
-
- if (inbuf_tmp != NULL)
- PyMem_Del(inbuf_tmp);
- Py_XDECREF(ucvt);
- return r;
+ PyObject *ucvt, *r = NULL;
+ Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL;
+ Py_ssize_t datalen, origpending;
+
+ if (PyUnicode_Check(unistr))
+ ucvt = NULL;
+ else {
+ unistr = ucvt = PyObject_Str(unistr);
+ if (unistr == NULL)
+ return NULL;
+ else if (!PyUnicode_Check(unistr)) {
+ PyErr_SetString(PyExc_TypeError,
+ "couldn't convert the object to unicode.");
+ Py_DECREF(ucvt);
+ return NULL;
+ }
+ }
+
+ datalen = PyUnicode_GET_SIZE(unistr);
+ origpending = ctx->pendingsize;
+
+ if (origpending > 0) {
+ if (datalen > PY_SSIZE_T_MAX - ctx->pendingsize) {
+ PyErr_NoMemory();
+ /* inbuf_tmp == NULL */
+ goto errorexit;
+ }
+ inbuf_tmp = PyMem_New(Py_UNICODE, datalen + ctx->pendingsize);
+ if (inbuf_tmp == NULL)
+ goto errorexit;
+ memcpy(inbuf_tmp, ctx->pending,
+ Py_UNICODE_SIZE * ctx->pendingsize);
+ memcpy(inbuf_tmp + ctx->pendingsize,
+ PyUnicode_AS_UNICODE(unistr),
+ Py_UNICODE_SIZE * datalen);
+ datalen += ctx->pendingsize;
+ ctx->pendingsize = 0;
+ inbuf = inbuf_tmp;
+ }
+ else
+ inbuf = (Py_UNICODE *)PyUnicode_AS_UNICODE(unistr);
+
+ inbuf_end = inbuf + datalen;
+
+ r = multibytecodec_encode(ctx->codec, &ctx->state,
+ (const Py_UNICODE **)&inbuf,
+ datalen, ctx->errors, final ? MBENC_FLUSH : 0);
+ if (r == NULL) {
+ /* recover the original pending buffer */
+ if (origpending > 0)
+ memcpy(ctx->pending, inbuf_tmp,
+ Py_UNICODE_SIZE * origpending);
+ ctx->pendingsize = origpending;
+ goto errorexit;
+ }
+
+ if (inbuf < inbuf_end) {
+ ctx->pendingsize = (Py_ssize_t)(inbuf_end - inbuf);
+ if (ctx->pendingsize > MAXENCPENDING) {
+ /* normal codecs can't reach here */
+ ctx->pendingsize = 0;
+ PyErr_SetString(PyExc_UnicodeError,
+ "pending buffer overflow");
+ goto errorexit;
+ }
+ memcpy(ctx->pending, inbuf,
+ ctx->pendingsize * Py_UNICODE_SIZE);
+ }
+
+ if (inbuf_tmp != NULL)
+ PyMem_Del(inbuf_tmp);
+ Py_XDECREF(ucvt);
+ return r;
errorexit:
- if (inbuf_tmp != NULL)
- PyMem_Del(inbuf_tmp);
- Py_XDECREF(r);
- Py_XDECREF(ucvt);
- return NULL;
+ if (inbuf_tmp != NULL)
+ PyMem_Del(inbuf_tmp);
+ Py_XDECREF(r);
+ Py_XDECREF(ucvt);
+ return NULL;
}
static int
decoder_append_pending(MultibyteStatefulDecoderContext *ctx,
- MultibyteDecodeBuffer *buf)
+ MultibyteDecodeBuffer *buf)
{
- Py_ssize_t npendings;
-
- npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
- if (npendings + ctx->pendingsize > MAXDECPENDING ||
- npendings > PY_SSIZE_T_MAX - ctx->pendingsize) {
- PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow");
- return -1;
- }
- memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings);
- ctx->pendingsize += npendings;
- return 0;
+ Py_ssize_t npendings;
+
+ npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
+ if (npendings + ctx->pendingsize > MAXDECPENDING ||
+ npendings > PY_SSIZE_T_MAX - ctx->pendingsize) {
+ PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow");
+ return -1;
+ }
+ memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings);
+ ctx->pendingsize += npendings;
+ return 0;
}
static int
decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,
- Py_ssize_t size)
+ Py_ssize_t size)
{
- buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
- buf->inbuf_end = buf->inbuf_top + size;
- if (buf->outobj == NULL) { /* only if outobj is not allocated yet */
- buf->outobj = PyUnicode_FromUnicode(NULL, size);
- if (buf->outobj == NULL)
- return -1;
- buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj);
- buf->outbuf_end = buf->outbuf +
- PyUnicode_GET_SIZE(buf->outobj);
- }
-
- return 0;
+ buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
+ buf->inbuf_end = buf->inbuf_top + size;
+ if (buf->outobj == NULL) { /* only if outobj is not allocated yet */
+ buf->outobj = PyUnicode_FromUnicode(NULL, size);
+ if (buf->outobj == NULL)
+ return -1;
+ buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj);
+ buf->outbuf_end = buf->outbuf +
+ PyUnicode_GET_SIZE(buf->outobj);
+ }
+
+ return 0;
}
static int
decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx,
- MultibyteDecodeBuffer *buf)
+ MultibyteDecodeBuffer *buf)
{
- while (buf->inbuf < buf->inbuf_end) {
- Py_ssize_t inleft, outleft;
- Py_ssize_t r;
-
- inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
- outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
-
- r = ctx->codec->decode(&ctx->state, ctx->codec->config,
- &buf->inbuf, inleft, &buf->outbuf, outleft);
- if (r == 0 || r == MBERR_TOOFEW)
- break;
- else if (multibytecodec_decerror(ctx->codec, &ctx->state,
- buf, ctx->errors, r))
- return -1;
- }
- return 0;
+ while (buf->inbuf < buf->inbuf_end) {
+ Py_ssize_t inleft, outleft;
+ Py_ssize_t r;
+
+ inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
+ outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
+
+ r = ctx->codec->decode(&ctx->state, ctx->codec->config,
+ &buf->inbuf, inleft, &buf->outbuf, outleft);
+ if (r == 0 || r == MBERR_TOOFEW)
+ break;
+ else if (multibytecodec_decerror(ctx->codec, &ctx->state,
+ buf, ctx->errors, r))
+ return -1;
+ }
+ return 0;
}
@@ -885,142 +885,142 @@ decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx,
static PyObject *
mbiencoder_encode(MultibyteIncrementalEncoderObject *self,
- PyObject *args, PyObject *kwargs)
+ PyObject *args, PyObject *kwargs)
{
- PyObject *data;
- int final = 0;
+ PyObject *data;
+ int final = 0;
- if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:encode",
- incrementalkwarglist, &data, &final))
- return NULL;
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:encode",
+ incrementalkwarglist, &data, &final))
+ return NULL;
- return encoder_encode_stateful(STATEFUL_ECTX(self), data, final);
+ return encoder_encode_stateful(STATEFUL_ECTX(self), data, final);
}
static PyObject *
mbiencoder_reset(MultibyteIncrementalEncoderObject *self)
{
- if (self->codec->decreset != NULL &&
- self->codec->decreset(&self->state, self->codec->config) != 0)
- return NULL;
- self->pendingsize = 0;
+ if (self->codec->decreset != NULL &&
+ self->codec->decreset(&self->state, self->codec->config) != 0)
+ return NULL;
+ self->pendingsize = 0;
- Py_RETURN_NONE;
+ Py_RETURN_NONE;
}
static struct PyMethodDef mbiencoder_methods[] = {
- {"encode", (PyCFunction)mbiencoder_encode,
- METH_VARARGS | METH_KEYWORDS, NULL},
- {"reset", (PyCFunction)mbiencoder_reset,
- METH_NOARGS, NULL},
- {NULL, NULL},
+ {"encode", (PyCFunction)mbiencoder_encode,
+ METH_VARARGS | METH_KEYWORDS, NULL},
+ {"reset", (PyCFunction)mbiencoder_reset,
+ METH_NOARGS, NULL},
+ {NULL, NULL},
};
static PyObject *
mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
- MultibyteIncrementalEncoderObject *self;
- PyObject *codec = NULL;
- char *errors = NULL;
-
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder",
- incnewkwarglist, &errors))
- return NULL;
-
- self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0);
- if (self == NULL)
- return NULL;
-
- codec = PyObject_GetAttrString((PyObject *)type, "codec");
- if (codec == NULL)
- goto errorexit;
- if (!MultibyteCodec_Check(codec)) {
- PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
- goto errorexit;
- }
-
- self->codec = ((MultibyteCodecObject *)codec)->codec;
- self->pendingsize = 0;
- self->errors = internal_error_callback(errors);
- if (self->errors == NULL)
- goto errorexit;
- if (self->codec->encinit != NULL &&
- self->codec->encinit(&self->state, self->codec->config) != 0)
- goto errorexit;
-
- Py_DECREF(codec);
- return (PyObject *)self;
+ MultibyteIncrementalEncoderObject *self;
+ PyObject *codec = NULL;
+ char *errors = NULL;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder",
+ incnewkwarglist, &errors))
+ return NULL;
+
+ self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0);
+ if (self == NULL)
+ return NULL;
+
+ codec = PyObject_GetAttrString((PyObject *)type, "codec");
+ if (codec == NULL)
+ goto errorexit;
+ if (!MultibyteCodec_Check(codec)) {
+ PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
+ goto errorexit;
+ }
+
+ self->codec = ((MultibyteCodecObject *)codec)->codec;
+ self->pendingsize = 0;
+ self->errors = internal_error_callback(errors);
+ if (self->errors == NULL)
+ goto errorexit;
+ if (self->codec->encinit != NULL &&
+ self->codec->encinit(&self->state, self->codec->config) != 0)
+ goto errorexit;
+
+ Py_DECREF(codec);
+ return (PyObject *)self;
errorexit:
- Py_XDECREF(self);
- Py_XDECREF(codec);
- return NULL;
+ Py_XDECREF(self);
+ Py_XDECREF(codec);
+ return NULL;
}
static int
mbiencoder_init(PyObject *self, PyObject *args, PyObject *kwds)
{
- return 0;
+ return 0;
}
static int
mbiencoder_traverse(MultibyteIncrementalEncoderObject *self,
- visitproc visit, void *arg)
+ visitproc visit, void *arg)
{
- if (ERROR_ISCUSTOM(self->errors))
- Py_VISIT(self->errors);
- return 0;
+ if (ERROR_ISCUSTOM(self->errors))
+ Py_VISIT(self->errors);
+ return 0;
}
static void
mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self)
{
- PyObject_GC_UnTrack(self);
- ERROR_DECREF(self->errors);
- Py_TYPE(self)->tp_free(self);
+ PyObject_GC_UnTrack(self);
+ ERROR_DECREF(self->errors);
+ Py_TYPE(self)->tp_free(self);
}
static PyTypeObject MultibyteIncrementalEncoder_Type = {
- PyVarObject_HEAD_INIT(NULL, 0)
- "MultibyteIncrementalEncoder", /* tp_name */
- sizeof(MultibyteIncrementalEncoderObject), /* tp_basicsize */
- 0, /* tp_itemsize */
- /* methods */
- (destructor)mbiencoder_dealloc, /* tp_dealloc */
- 0, /* tp_print */
- 0, /* tp_getattr */
- 0, /* tp_setattr */
- 0, /* tp_reserved */
- 0, /* tp_repr */
- 0, /* tp_as_number */
- 0, /* tp_as_sequence */
- 0, /* tp_as_mapping */
- 0, /* tp_hash */
- 0, /* tp_call */
- 0, /* tp_str */
- PyObject_GenericGetAttr, /* tp_getattro */
- 0, /* tp_setattro */
- 0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
- | Py_TPFLAGS_BASETYPE, /* tp_flags */
- 0, /* tp_doc */
- (traverseproc)mbiencoder_traverse, /* tp_traverse */
- 0, /* tp_clear */
- 0, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
- 0, /* tp_iter */
- 0, /* tp_iterext */
- mbiencoder_methods, /* tp_methods */
- 0, /* tp_members */
- codecctx_getsets, /* tp_getset */
- 0, /* tp_base */
- 0, /* tp_dict */
- 0, /* tp_descr_get */
- 0, /* tp_descr_set */
- 0, /* tp_dictoffset */
- mbiencoder_init, /* tp_init */
- 0, /* tp_alloc */
- mbiencoder_new, /* tp_new */
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "MultibyteIncrementalEncoder", /* tp_name */
+ sizeof(MultibyteIncrementalEncoderObject), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ /* methods */
+ (destructor)mbiencoder_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_reserved */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ PyObject_GenericGetAttr, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
+ | Py_TPFLAGS_BASETYPE, /* tp_flags */
+ 0, /* tp_doc */
+ (traverseproc)mbiencoder_traverse, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iterext */
+ mbiencoder_methods, /* tp_methods */
+ 0, /* tp_members */
+ codecctx_getsets, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ mbiencoder_init, /* tp_init */
+ 0, /* tp_alloc */
+ mbiencoder_new, /* tp_new */
};
@@ -1030,206 +1030,206 @@ static PyTypeObject MultibyteIncrementalEncoder_Type = {
static PyObject *
mbidecoder_decode(MultibyteIncrementalDecoderObject *self,
- PyObject *args, PyObject *kwargs)
+ PyObject *args, PyObject *kwargs)
{
- MultibyteDecodeBuffer buf;
- char *data, *wdata = NULL;
- Py_buffer pdata;
- Py_ssize_t wsize, finalsize = 0, size, origpending;
- int final = 0;
-
- if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|i:decode",
- incrementalkwarglist, &pdata, &final))
- return NULL;
- data = pdata.buf;
- size = pdata.len;
-
- buf.outobj = buf.excobj = NULL;
- origpending = self->pendingsize;
-
- if (self->pendingsize == 0) {
- wsize = size;
- wdata = data;
- }
- else {
- if (size > PY_SSIZE_T_MAX - self->pendingsize) {
- PyErr_NoMemory();
- goto errorexit;
- }
- wsize = size + self->pendingsize;
- wdata = PyMem_Malloc(wsize);
- if (wdata == NULL)
- goto errorexit;
- memcpy(wdata, self->pending, self->pendingsize);
- memcpy(wdata + self->pendingsize, data, size);
- self->pendingsize = 0;
- }
-
- if (decoder_prepare_buffer(&buf, wdata, wsize) != 0)
- goto errorexit;
-
- if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf))
- goto errorexit;
-
- if (final && buf.inbuf < buf.inbuf_end) {
- if (multibytecodec_decerror(self->codec, &self->state,
- &buf, self->errors, MBERR_TOOFEW)) {
- /* recover the original pending buffer */
- memcpy(self->pending, wdata, origpending);
- self->pendingsize = origpending;
- goto errorexit;
- }
- }
-
- if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */
- if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0)
- goto errorexit;
- }
-
- finalsize = (Py_ssize_t)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj));
- if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
- if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
- goto errorexit;
-
- PyBuffer_Release(&pdata);
- if (wdata != data)
- PyMem_Del(wdata);
- Py_XDECREF(buf.excobj);
- return buf.outobj;
+ MultibyteDecodeBuffer buf;
+ char *data, *wdata = NULL;
+ Py_buffer pdata;
+ Py_ssize_t wsize, finalsize = 0, size, origpending;
+ int final = 0;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|i:decode",
+ incrementalkwarglist, &pdata, &final))
+ return NULL;
+ data = pdata.buf;
+ size = pdata.len;
+
+ buf.outobj = buf.excobj = NULL;
+ origpending = self->pendingsize;
+
+ if (self->pendingsize == 0) {
+ wsize = size;
+ wdata = data;
+ }
+ else {
+ if (size > PY_SSIZE_T_MAX - self->pendingsize) {
+ PyErr_NoMemory();
+ goto errorexit;
+ }
+ wsize = size + self->pendingsize;
+ wdata = PyMem_Malloc(wsize);
+ if (wdata == NULL)
+ goto errorexit;
+ memcpy(wdata, self->pending, self->pendingsize);
+ memcpy(wdata + self->pendingsize, data, size);
+ self->pendingsize = 0;
+ }
+
+ if (decoder_prepare_buffer(&buf, wdata, wsize) != 0)
+ goto errorexit;
+
+ if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf))
+ goto errorexit;
+
+ if (final && buf.inbuf < buf.inbuf_end) {
+ if (multibytecodec_decerror(self->codec, &self->state,
+ &buf, self->errors, MBERR_TOOFEW)) {
+ /* recover the original pending buffer */
+ memcpy(self->pending, wdata, origpending);
+ self->pendingsize = origpending;
+ goto errorexit;
+ }
+ }
+
+ if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */
+ if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0)
+ goto errorexit;
+ }
+
+ finalsize = (Py_ssize_t)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj));
+ if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
+ if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
+ goto errorexit;
+
+ PyBuffer_Release(&pdata);
+ if (wdata != data)
+ PyMem_Del(wdata);
+ Py_XDECREF(buf.excobj);
+ return buf.outobj;
errorexit:
- PyBuffer_Release(&pdata);
- if (wdata != NULL && wdata != data)
- PyMem_Del(wdata);
- Py_XDECREF(buf.excobj);
- Py_XDECREF(buf.outobj);
- return NULL;
+ PyBuffer_Release(&pdata);
+ if (wdata != NULL && wdata != data)
+ PyMem_Del(wdata);
+ Py_XDECREF(buf.excobj);
+ Py_XDECREF(buf.outobj);
+ return NULL;
}
static PyObject *
mbidecoder_reset(MultibyteIncrementalDecoderObject *self)
{
- if (self->codec->decreset != NULL &&
- self->codec->decreset(&self->state, self->codec->config) != 0)
- return NULL;
- self->pendingsize = 0;
+ if (self->codec->decreset != NULL &&
+ self->codec->decreset(&self->state, self->codec->config) != 0)
+ return NULL;
+ self->pendingsize = 0;
- Py_RETURN_NONE;
+ Py_RETURN_NONE;
}
static struct PyMethodDef mbidecoder_methods[] = {
- {"decode", (PyCFunction)mbidecoder_decode,
- METH_VARARGS | METH_KEYWORDS, NULL},
- {"reset", (PyCFunction)mbidecoder_reset,
- METH_NOARGS, NULL},
- {NULL, NULL},
+ {"decode", (PyCFunction)mbidecoder_decode,
+ METH_VARARGS | METH_KEYWORDS, NULL},
+ {"reset", (PyCFunction)mbidecoder_reset,
+ METH_NOARGS, NULL},
+ {NULL, NULL},
};
static PyObject *
mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
- MultibyteIncrementalDecoderObject *self;
- PyObject *codec = NULL;
- char *errors = NULL;
-
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder",
- incnewkwarglist, &errors))
- return NULL;
-
- self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0);
- if (self == NULL)
- return NULL;
-
- codec = PyObject_GetAttrString((PyObject *)type, "codec");
- if (codec == NULL)
- goto errorexit;
- if (!MultibyteCodec_Check(codec)) {
- PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
- goto errorexit;
- }
-
- self->codec = ((MultibyteCodecObject *)codec)->codec;
- self->pendingsize = 0;
- self->errors = internal_error_callback(errors);
- if (self->errors == NULL)
- goto errorexit;
- if (self->codec->decinit != NULL &&
- self->codec->decinit(&self->state, self->codec->config) != 0)
- goto errorexit;
-
- Py_DECREF(codec);
- return (PyObject *)self;
+ MultibyteIncrementalDecoderObject *self;
+ PyObject *codec = NULL;
+ char *errors = NULL;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder",
+ incnewkwarglist, &errors))
+ return NULL;
+
+ self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0);
+ if (self == NULL)
+ return NULL;
+
+ codec = PyObject_GetAttrString((PyObject *)type, "codec");
+ if (codec == NULL)
+ goto errorexit;
+ if (!MultibyteCodec_Check(codec)) {
+ PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
+ goto errorexit;
+ }
+
+ self->codec = ((MultibyteCodecObject *)codec)->codec;
+ self->pendingsize = 0;
+ self->errors = internal_error_callback(errors);
+ if (self->errors == NULL)
+ goto errorexit;
+ if (self->codec->decinit != NULL &&
+ self->codec->decinit(&self->state, self->codec->config) != 0)
+ goto errorexit;
+
+ Py_DECREF(codec);
+ return (PyObject *)self;
errorexit:
- Py_XDECREF(self);
- Py_XDECREF(codec);
- return NULL;
+ Py_XDECREF(self);
+ Py_XDECREF(codec);
+ return NULL;
}
static int
mbidecoder_init(PyObject *self, PyObject *args, PyObject *kwds)
{
- return 0;
+ return 0;
}
static int
mbidecoder_traverse(MultibyteIncrementalDecoderObject *self,
- visitproc visit, void *arg)
+ visitproc visit, void *arg)
{
- if (ERROR_ISCUSTOM(self->errors))
- Py_VISIT(self->errors);
- return 0;
+ if (ERROR_ISCUSTOM(self->errors))
+ Py_VISIT(self->errors);
+ return 0;
}
static void
mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self)
{
- PyObject_GC_UnTrack(self);
- ERROR_DECREF(self->errors);
- Py_TYPE(self)->tp_free(self);
+ PyObject_GC_UnTrack(self);
+ ERROR_DECREF(self->errors);
+ Py_TYPE(self)->tp_free(self);
}
static PyTypeObject MultibyteIncrementalDecoder_Type = {
- PyVarObject_HEAD_INIT(NULL, 0)
- "MultibyteIncrementalDecoder", /* tp_name */
- sizeof(MultibyteIncrementalDecoderObject), /* tp_basicsize */
- 0, /* tp_itemsize */
- /* methods */
- (destructor)mbidecoder_dealloc, /* tp_dealloc */
- 0, /* tp_print */
- 0, /* tp_getattr */
- 0, /* tp_setattr */
- 0, /* tp_reserved */
- 0, /* tp_repr */
- 0, /* tp_as_number */
- 0, /* tp_as_sequence */
- 0, /* tp_as_mapping */
- 0, /* tp_hash */
- 0, /* tp_call */
- 0, /* tp_str */
- PyObject_GenericGetAttr, /* tp_getattro */
- 0, /* tp_setattro */
- 0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
- | Py_TPFLAGS_BASETYPE, /* tp_flags */
- 0, /* tp_doc */
- (traverseproc)mbidecoder_traverse, /* tp_traverse */
- 0, /* tp_clear */
- 0, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
- 0, /* tp_iter */
- 0, /* tp_iterext */
- mbidecoder_methods, /* tp_methods */
- 0, /* tp_members */
- codecctx_getsets, /* tp_getset */
- 0, /* tp_base */
- 0, /* tp_dict */
- 0, /* tp_descr_get */
- 0, /* tp_descr_set */
- 0, /* tp_dictoffset */
- mbidecoder_init, /* tp_init */
- 0, /* tp_alloc */
- mbidecoder_new, /* tp_new */
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "MultibyteIncrementalDecoder", /* tp_name */
+ sizeof(MultibyteIncrementalDecoderObject), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ /* methods */
+ (destructor)mbidecoder_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_reserved */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ PyObject_GenericGetAttr, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
+ | Py_TPFLAGS_BASETYPE, /* tp_flags */
+ 0, /* tp_doc */
+ (traverseproc)mbidecoder_traverse, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iterext */
+ mbidecoder_methods, /* tp_methods */
+ 0, /* tp_members */
+ codecctx_getsets, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ mbidecoder_init, /* tp_init */
+ 0, /* tp_alloc */
+ mbidecoder_new, /* tp_new */
};
@@ -1239,327 +1239,327 @@ static PyTypeObject MultibyteIncrementalDecoder_Type = {
static PyObject *
mbstreamreader_iread(MultibyteStreamReaderObject *self,
- const char *method, Py_ssize_t sizehint)
+ const char *method, Py_ssize_t sizehint)
{
- MultibyteDecodeBuffer buf;
- PyObject *cres;
- Py_ssize_t rsize, finalsize = 0;
-
- if (sizehint == 0)
- return PyUnicode_FromUnicode(NULL, 0);
-
- buf.outobj = buf.excobj = NULL;
- cres = NULL;
-
- for (;;) {
- int endoffile;
-
- if (sizehint < 0)
- cres = PyObject_CallMethod(self->stream,
- (char *)method, NULL);
- else
- cres = PyObject_CallMethod(self->stream,
- (char *)method, "i", sizehint);
- if (cres == NULL)
- goto errorexit;
-
- if (!PyBytes_Check(cres)) {
- PyErr_Format(PyExc_TypeError,
- "stream function returned a "
- "non-bytes object (%.100s)",
- cres->ob_type->tp_name);
- goto errorexit;
- }
-
- endoffile = (PyBytes_GET_SIZE(cres) == 0);
-
- if (self->pendingsize > 0) {
- PyObject *ctr;
- char *ctrdata;
-
- if (PyBytes_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) {
- PyErr_NoMemory();
- goto errorexit;
- }
- rsize = PyBytes_GET_SIZE(cres) + self->pendingsize;
- ctr = PyBytes_FromStringAndSize(NULL, rsize);
- if (ctr == NULL)
- goto errorexit;
- ctrdata = PyBytes_AS_STRING(ctr);
- memcpy(ctrdata, self->pending, self->pendingsize);
- memcpy(ctrdata + self->pendingsize,
- PyBytes_AS_STRING(cres),
- PyBytes_GET_SIZE(cres));
- Py_DECREF(cres);
- cres = ctr;
- self->pendingsize = 0;
- }
-
- rsize = PyBytes_GET_SIZE(cres);
- if (decoder_prepare_buffer(&buf, PyBytes_AS_STRING(cres),
- rsize) != 0)
- goto errorexit;
-
- if (rsize > 0 && decoder_feed_buffer(
- (MultibyteStatefulDecoderContext *)self, &buf))
- goto errorexit;
-
- if (endoffile || sizehint < 0) {
- if (buf.inbuf < buf.inbuf_end &&
- multibytecodec_decerror(self->codec, &self->state,
- &buf, self->errors, MBERR_TOOFEW))
- goto errorexit;
- }
-
- if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
- if (decoder_append_pending(STATEFUL_DCTX(self),
- &buf) != 0)
- goto errorexit;
- }
-
- finalsize = (Py_ssize_t)(buf.outbuf -
- PyUnicode_AS_UNICODE(buf.outobj));
- Py_DECREF(cres);
- cres = NULL;
-
- if (sizehint < 0 || finalsize != 0 || rsize == 0)
- break;
-
- sizehint = 1; /* read 1 more byte and retry */
- }
-
- if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
- if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
- goto errorexit;
-
- Py_XDECREF(cres);
- Py_XDECREF(buf.excobj);
- return buf.outobj;
+ MultibyteDecodeBuffer buf;
+ PyObject *cres;
+ Py_ssize_t rsize, finalsize = 0;
+
+ if (sizehint == 0)
+ return PyUnicode_FromUnicode(NULL, 0);
+
+ buf.outobj = buf.excobj = NULL;
+ cres = NULL;
+
+ for (;;) {
+ int endoffile;
+
+ if (sizehint < 0)
+ cres = PyObject_CallMethod(self->stream,
+ (char *)method, NULL);
+ else
+ cres = PyObject_CallMethod(self->stream,
+ (char *)method, "i", sizehint);
+ if (cres == NULL)
+ goto errorexit;
+
+ if (!PyBytes_Check(cres)) {
+ PyErr_Format(PyExc_TypeError,
+ "stream function returned a "
+ "non-bytes object (%.100s)",
+ cres->ob_type->tp_name);
+ goto errorexit;
+ }
+
+ endoffile = (PyBytes_GET_SIZE(cres) == 0);
+
+ if (self->pendingsize > 0) {
+ PyObject *ctr;
+ char *ctrdata;
+
+ if (PyBytes_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) {
+ PyErr_NoMemory();
+ goto errorexit;
+ }
+ rsize = PyBytes_GET_SIZE(cres) + self->pendingsize;
+ ctr = PyBytes_FromStringAndSize(NULL, rsize);
+ if (ctr == NULL)
+ goto errorexit;
+ ctrdata = PyBytes_AS_STRING(ctr);
+ memcpy(ctrdata, self->pending, self->pendingsize);
+ memcpy(ctrdata + self->pendingsize,
+ PyBytes_AS_STRING(cres),
+ PyBytes_GET_SIZE(cres));
+ Py_DECREF(cres);
+ cres = ctr;
+ self->pendingsize = 0;
+ }
+
+ rsize = PyBytes_GET_SIZE(cres);
+ if (decoder_prepare_buffer(&buf, PyBytes_AS_STRING(cres),
+ rsize) != 0)
+ goto errorexit;
+
+ if (rsize > 0 && decoder_feed_buffer(
+ (MultibyteStatefulDecoderContext *)self, &buf))
+ goto errorexit;
+
+ if (endoffile || sizehint < 0) {
+ if (buf.inbuf < buf.inbuf_end &&
+ multibytecodec_decerror(self->codec, &self->state,
+ &buf, self->errors, MBERR_TOOFEW))
+ goto errorexit;
+ }
+
+ if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
+ if (decoder_append_pending(STATEFUL_DCTX(self),
+ &buf) != 0)
+ goto errorexit;
+ }
+
+ finalsize = (Py_ssize_t)(buf.outbuf -
+ PyUnicode_AS_UNICODE(buf.outobj));
+ Py_DECREF(cres);
+ cres = NULL;
+
+ if (sizehint < 0 || finalsize != 0 || rsize == 0)
+ break;
+
+ sizehint = 1; /* read 1 more byte and retry */
+ }
+
+ if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
+ if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
+ goto errorexit;
+
+ Py_XDECREF(cres);
+ Py_XDECREF(buf.excobj);
+ return buf.outobj;
errorexit:
- Py_XDECREF(cres);
- Py_XDECREF(buf.excobj);
- Py_XDECREF(buf.outobj);
- return NULL;
+ Py_XDECREF(cres);
+ Py_XDECREF(buf.excobj);
+ Py_XDECREF(buf.outobj);
+ return NULL;
}
static PyObject *
mbstreamreader_read(MultibyteStreamReaderObject *self, PyObject *args)
{
- PyObject *sizeobj = NULL;
- Py_ssize_t size;
+ PyObject *sizeobj = NULL;
+ Py_ssize_t size;
- if (!PyArg_UnpackTuple(args, "read", 0, 1, &sizeobj))
- return NULL;
+ if (!PyArg_UnpackTuple(args, "read", 0, 1, &sizeobj))
+ return NULL;
- if (sizeobj == Py_None || sizeobj == NULL)
- size = -1;
- else if (PyLong_Check(sizeobj))
- size = PyLong_AsSsize_t(sizeobj);
- else {
- PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
- return NULL;
- }
+ if (sizeobj == Py_None || sizeobj == NULL)
+ size = -1;
+ else if (PyLong_Check(sizeobj))
+ size = PyLong_AsSsize_t(sizeobj);
+ else {
+ PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
+ return NULL;
+ }
- if (size == -1 && PyErr_Occurred())
- return NULL;
+ if (size == -1 && PyErr_Occurred())
+ return NULL;
- return mbstreamreader_iread(self, "read", size);
+ return mbstreamreader_iread(self, "read", size);
}
static PyObject *
mbstreamreader_readline(MultibyteStreamReaderObject *self, PyObject *args)
{
- PyObject *sizeobj = NULL;
- Py_ssize_t size;
+ PyObject *sizeobj = NULL;
+ Py_ssize_t size;
- if (!PyArg_UnpackTuple(args, "readline", 0, 1, &sizeobj))
- return NULL;
+ if (!PyArg_UnpackTuple(args, "readline", 0, 1, &sizeobj))
+ return NULL;
- if (sizeobj == Py_None || sizeobj == NULL)
- size = -1;
- else if (PyLong_Check(sizeobj))
- size = PyLong_AsSsize_t(sizeobj);
- else {
- PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
- return NULL;
- }
+ if (sizeobj == Py_None || sizeobj == NULL)
+ size = -1;
+ else if (PyLong_Check(sizeobj))
+ size = PyLong_AsSsize_t(sizeobj);
+ else {
+ PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
+ return NULL;
+ }
- if (size == -1 && PyErr_Occurred())
- return NULL;
+ if (size == -1 && PyErr_Occurred())
+ return NULL;
- return mbstreamreader_iread(self, "readline", size);
+ return mbstreamreader_iread(self, "readline", size);
}
static PyObject *
mbstreamreader_readlines(MultibyteStreamReaderObject *self, PyObject *args)
{
- PyObject *sizehintobj = NULL, *r, *sr;
- Py_ssize_t sizehint;
-
- if (!PyArg_UnpackTuple(args, "readlines", 0, 1, &sizehintobj))
- return NULL;
-
- if (sizehintobj == Py_None || sizehintobj == NULL)
- sizehint = -1;
- else if (PyLong_Check(sizehintobj))
- sizehint = PyLong_AsSsize_t(sizehintobj);
- else {
- PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
- return NULL;
- }
-
- if (sizehint == -1 && PyErr_Occurred())
- return NULL;
-
- r = mbstreamreader_iread(self, "read", sizehint);
- if (r == NULL)
- return NULL;
-
- sr = PyUnicode_Splitlines(r, 1);
- Py_DECREF(r);
- return sr;
+ PyObject *sizehintobj = NULL, *r, *sr;
+ Py_ssize_t sizehint;
+
+ if (!PyArg_UnpackTuple(args, "readlines", 0, 1, &sizehintobj))
+ return NULL;
+
+ if (sizehintobj == Py_None || sizehintobj == NULL)
+ sizehint = -1;
+ else if (PyLong_Check(sizehintobj))
+ sizehint = PyLong_AsSsize_t(sizehintobj);
+ else {
+ PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
+ return NULL;
+ }
+
+ if (sizehint == -1 && PyErr_Occurred())
+ return NULL;
+
+ r = mbstreamreader_iread(self, "read", sizehint);
+ if (r == NULL)
+ return NULL;
+
+ sr = PyUnicode_Splitlines(r, 1);
+ Py_DECREF(r);
+ return sr;
}
static PyObject *
mbstreamreader_reset(MultibyteStreamReaderObject *self)
{
- if (self->codec->decreset != NULL &&
- self->codec->decreset(&self->state, self->codec->config) != 0)
- return NULL;
- self->pendingsize = 0;
+ if (self->codec->decreset != NULL &&
+ self->codec->decreset(&self->state, self->codec->config) != 0)
+ return NULL;
+ self->pendingsize = 0;
- Py_RETURN_NONE;
+ Py_RETURN_NONE;
}
static struct PyMethodDef mbstreamreader_methods[] = {
- {"read", (PyCFunction)mbstreamreader_read,
- METH_VARARGS, NULL},
- {"readline", (PyCFunction)mbstreamreader_readline,
- METH_VARARGS, NULL},
- {"readlines", (PyCFunction)mbstreamreader_readlines,
- METH_VARARGS, NULL},
- {"reset", (PyCFunction)mbstreamreader_reset,
- METH_NOARGS, NULL},
- {NULL, NULL},
+ {"read", (PyCFunction)mbstreamreader_read,
+ METH_VARARGS, NULL},
+ {"readline", (PyCFunction)mbstreamreader_readline,
+ METH_VARARGS, NULL},
+ {"readlines", (PyCFunction)mbstreamreader_readlines,
+ METH_VARARGS, NULL},
+ {"reset", (PyCFunction)mbstreamreader_reset,
+ METH_NOARGS, NULL},
+ {NULL, NULL},
};
static PyMemberDef mbstreamreader_members[] = {
- {"stream", T_OBJECT,
- offsetof(MultibyteStreamReaderObject, stream),
- READONLY, NULL},
- {NULL,}
+ {"stream", T_OBJECT,
+ offsetof(MultibyteStreamReaderObject, stream),
+ READONLY, NULL},
+ {NULL,}
};
static PyObject *
mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
- MultibyteStreamReaderObject *self;
- PyObject *stream, *codec = NULL;
- char *errors = NULL;
-
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader",
- streamkwarglist, &stream, &errors))
- return NULL;
-
- self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0);
- if (self == NULL)
- return NULL;
-
- codec = PyObject_GetAttrString((PyObject *)type, "codec");
- if (codec == NULL)
- goto errorexit;
- if (!MultibyteCodec_Check(codec)) {
- PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
- goto errorexit;
- }
-
- self->codec = ((MultibyteCodecObject *)codec)->codec;
- self->stream = stream;
- Py_INCREF(stream);
- self->pendingsize = 0;
- self->errors = internal_error_callback(errors);
- if (self->errors == NULL)
- goto errorexit;
- if (self->codec->decinit != NULL &&
- self->codec->decinit(&self->state, self->codec->config) != 0)
- goto errorexit;
-
- Py_DECREF(codec);
- return (PyObject *)self;
+ MultibyteStreamReaderObject *self;
+ PyObject *stream, *codec = NULL;
+ char *errors = NULL;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader",
+ streamkwarglist, &stream, &errors))
+ return NULL;
+
+ self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0);
+ if (self == NULL)
+ return NULL;
+
+ codec = PyObject_GetAttrString((PyObject *)type, "codec");
+ if (codec == NULL)
+ goto errorexit;
+ if (!MultibyteCodec_Check(codec)) {
+ PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
+ goto errorexit;
+ }
+
+ self->codec = ((MultibyteCodecObject *)codec)->codec;
+ self->stream = stream;
+ Py_INCREF(stream);
+ self->pendingsize = 0;
+ self->errors = internal_error_callback(errors);
+ if (self->errors == NULL)
+ goto errorexit;
+ if (self->codec->decinit != NULL &&
+ self->codec->decinit(&self->state, self->codec->config) != 0)
+ goto errorexit;
+
+ Py_DECREF(codec);
+ return (PyObject *)self;
errorexit:
- Py_XDECREF(self);
- Py_XDECREF(codec);
- return NULL;
+ Py_XDECREF(self);
+ Py_XDECREF(codec);
+ return NULL;
}
static int
mbstreamreader_init(PyObject *self, PyObject *args, PyObject *kwds)
{
- return 0;
+ return 0;
}
static int
mbstreamreader_traverse(MultibyteStreamReaderObject *self,
- visitproc visit, void *arg)
+ visitproc visit, void *arg)
{
- if (ERROR_ISCUSTOM(self->errors))
- Py_VISIT(self->errors);
- Py_VISIT(self->stream);
- return 0;
+ if (ERROR_ISCUSTOM(self->errors))
+ Py_VISIT(self->errors);
+ Py_VISIT(self->stream);
+ return 0;
}
static void
mbstreamreader_dealloc(MultibyteStreamReaderObject *self)
{
- PyObject_GC_UnTrack(self);
- ERROR_DECREF(self->errors);
- Py_XDECREF(self->stream);
- Py_TYPE(self)->tp_free(self);
+ PyObject_GC_UnTrack(self);
+ ERROR_DECREF(self->errors);
+ Py_XDECREF(self->stream);
+ Py_TYPE(self)->tp_free(self);
}
static PyTypeObject MultibyteStreamReader_Type = {
- PyVarObject_HEAD_INIT(NULL, 0)
- "MultibyteStreamReader", /* tp_name */
- sizeof(MultibyteStreamReaderObject), /* tp_basicsize */
- 0, /* tp_itemsize */
- /* methods */
- (destructor)mbstreamreader_dealloc, /* tp_dealloc */
- 0, /* tp_print */
- 0, /* tp_getattr */
- 0, /* tp_setattr */
- 0, /* tp_reserved */
- 0, /* tp_repr */
- 0, /* tp_as_number */
- 0, /* tp_as_sequence */
- 0, /* tp_as_mapping */
- 0, /* tp_hash */
- 0, /* tp_call */
- 0, /* tp_str */
- PyObject_GenericGetAttr, /* tp_getattro */
- 0, /* tp_setattro */
- 0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
- | Py_TPFLAGS_BASETYPE, /* tp_flags */
- 0, /* tp_doc */
- (traverseproc)mbstreamreader_traverse, /* tp_traverse */
- 0, /* tp_clear */
- 0, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
- 0, /* tp_iter */
- 0, /* tp_iterext */
- mbstreamreader_methods, /* tp_methods */
- mbstreamreader_members, /* tp_members */
- codecctx_getsets, /* tp_getset */
- 0, /* tp_base */
- 0, /* tp_dict */
- 0, /* tp_descr_get */
- 0, /* tp_descr_set */
- 0, /* tp_dictoffset */
- mbstreamreader_init, /* tp_init */
- 0, /* tp_alloc */
- mbstreamreader_new, /* tp_new */
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "MultibyteStreamReader", /* tp_name */
+ sizeof(MultibyteStreamReaderObject), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ /* methods */
+ (destructor)mbstreamreader_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_reserved */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ PyObject_GenericGetAttr, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
+ | Py_TPFLAGS_BASETYPE, /* tp_flags */
+ 0, /* tp_doc */
+ (traverseproc)mbstreamreader_traverse, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iterext */
+ mbstreamreader_methods, /* tp_methods */
+ mbstreamreader_members, /* tp_members */
+ codecctx_getsets, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ mbstreamreader_init, /* tp_init */
+ 0, /* tp_alloc */
+ mbstreamreader_new, /* tp_new */
};
@@ -1569,217 +1569,217 @@ static PyTypeObject MultibyteStreamReader_Type = {
static int
mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,
- PyObject *unistr)
+ PyObject *unistr)
{
- PyObject *str, *wr;
+ PyObject *str, *wr;
- str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0);
- if (str == NULL)
- return -1;
+ str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0);
+ if (str == NULL)
+ return -1;
- wr = PyObject_CallMethod(self->stream, "write", "O", str);
- Py_DECREF(str);
- if (wr == NULL)
- return -1;
+ wr = PyObject_CallMethod(self->stream, "write", "O", str);
+ Py_DECREF(str);
+ if (wr == NULL)
+ return -1;
- Py_DECREF(wr);
- return 0;
+ Py_DECREF(wr);
+ return 0;
}
static PyObject *
mbstreamwriter_write(MultibyteStreamWriterObject *self, PyObject *strobj)
{
- if (mbstreamwriter_iwrite(self, strobj))
- return NULL;
- else
- Py_RETURN_NONE;
+ if (mbstreamwriter_iwrite(self, strobj))
+ return NULL;
+ else
+ Py_RETURN_NONE;
}
static PyObject *
mbstreamwriter_writelines(MultibyteStreamWriterObject *self, PyObject *lines)
{
- PyObject *strobj;
- int i, r;
-
- if (!PySequence_Check(lines)) {
- PyErr_SetString(PyExc_TypeError,
- "arg must be a sequence object");
- return NULL;
- }
-
- for (i = 0; i < PySequence_Length(lines); i++) {
- /* length can be changed even within this loop */
- strobj = PySequence_GetItem(lines, i);
- if (strobj == NULL)
- return NULL;
-
- r = mbstreamwriter_iwrite(self, strobj);
- Py_DECREF(strobj);
- if (r == -1)
- return NULL;
- }
-
- Py_RETURN_NONE;
+ PyObject *strobj;
+ int i, r;
+
+ if (!PySequence_Check(lines)) {
+ PyErr_SetString(PyExc_TypeError,
+ "arg must be a sequence object");
+ return NULL;
+ }
+
+ for (i = 0; i < PySequence_Length(lines); i++) {
+ /* length can be changed even within this loop */
+ strobj = PySequence_GetItem(lines, i);
+ if (strobj == NULL)
+ return NULL;
+
+ r = mbstreamwriter_iwrite(self, strobj);
+ Py_DECREF(strobj);
+ if (r == -1)
+ return NULL;
+ }
+
+ Py_RETURN_NONE;
}
static PyObject *
mbstreamwriter_reset(MultibyteStreamWriterObject *self)
{
- const Py_UNICODE *pending;
- PyObject *pwrt;
-
- pending = self->pending;
- pwrt = multibytecodec_encode(self->codec, &self->state,
- &pending, self->pendingsize, self->errors,
- MBENC_FLUSH | MBENC_RESET);
- /* some pending buffer can be truncated when UnicodeEncodeError is
- * raised on 'strict' mode. but, 'reset' method is designed to
- * reset the pending buffer or states so failed string sequence
- * ought to be missed */
- self->pendingsize = 0;
- if (pwrt == NULL)
- return NULL;
-
- assert(PyBytes_Check(pwrt));
- if (PyBytes_Size(pwrt) > 0) {
- PyObject *wr;
- wr = PyObject_CallMethod(self->stream, "write", "O", pwrt);
- if (wr == NULL) {
- Py_DECREF(pwrt);
- return NULL;
- }
- }
- Py_DECREF(pwrt);
-
- Py_RETURN_NONE;
+ const Py_UNICODE *pending;
+ PyObject *pwrt;
+
+ pending = self->pending;
+ pwrt = multibytecodec_encode(self->codec, &self->state,
+ &pending, self->pendingsize, self->errors,
+ MBENC_FLUSH | MBENC_RESET);
+ /* some pending buffer can be truncated when UnicodeEncodeError is
+ * raised on 'strict' mode. but, 'reset' method is designed to
+ * reset the pending buffer or states so failed string sequence
+ * ought to be missed */
+ self->pendingsize = 0;
+ if (pwrt == NULL)
+ return NULL;
+
+ assert(PyBytes_Check(pwrt));
+ if (PyBytes_Size(pwrt) > 0) {
+ PyObject *wr;
+ wr = PyObject_CallMethod(self->stream, "write", "O", pwrt);
+ if (wr == NULL) {
+ Py_DECREF(pwrt);
+ return NULL;
+ }
+ }
+ Py_DECREF(pwrt);
+
+ Py_RETURN_NONE;
}
static PyObject *
mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
- MultibyteStreamWriterObject *self;
- PyObject *stream, *codec = NULL;
- char *errors = NULL;
-
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter",
- streamkwarglist, &stream, &errors))
- return NULL;
-
- self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0);
- if (self == NULL)
- return NULL;
-
- codec = PyObject_GetAttrString((PyObject *)type, "codec");
- if (codec == NULL)
- goto errorexit;
- if (!MultibyteCodec_Check(codec)) {
- PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
- goto errorexit;
- }
-
- self->codec = ((MultibyteCodecObject *)codec)->codec;
- self->stream = stream;
- Py_INCREF(stream);
- self->pendingsize = 0;
- self->errors = internal_error_callback(errors);
- if (self->errors == NULL)
- goto errorexit;
- if (self->codec->encinit != NULL &&
- self->codec->encinit(&self->state, self->codec->config) != 0)
- goto errorexit;
-
- Py_DECREF(codec);
- return (PyObject *)self;
+ MultibyteStreamWriterObject *self;
+ PyObject *stream, *codec = NULL;
+ char *errors = NULL;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter",
+ streamkwarglist, &stream, &errors))
+ return NULL;
+
+ self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0);
+ if (self == NULL)
+ return NULL;
+
+ codec = PyObject_GetAttrString((PyObject *)type, "codec");
+ if (codec == NULL)
+ goto errorexit;
+ if (!MultibyteCodec_Check(codec)) {
+ PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
+ goto errorexit;
+ }
+
+ self->codec = ((MultibyteCodecObject *)codec)->codec;
+ self->stream = stream;
+ Py_INCREF(stream);
+ self->pendingsize = 0;
+ self->errors = internal_error_callback(errors);
+ if (self->errors == NULL)
+ goto errorexit;
+ if (self->codec->encinit != NULL &&
+ self->codec->encinit(&self->state, self->codec->config) != 0)
+ goto errorexit;
+
+ Py_DECREF(codec);
+ return (PyObject *)self;
errorexit:
- Py_XDECREF(self);
- Py_XDECREF(codec);
- return NULL;
+ Py_XDECREF(self);
+ Py_XDECREF(codec);
+ return NULL;
}
static int
mbstreamwriter_init(PyObject *self, PyObject *args, PyObject *kwds)
{
- return 0;
+ return 0;
}
static int
mbstreamwriter_traverse(MultibyteStreamWriterObject *self,
- visitproc visit, void *arg)
+ visitproc visit, void *arg)
{
- if (ERROR_ISCUSTOM(self->errors))
- Py_VISIT(self->errors);
- Py_VISIT(self->stream);
- return 0;
+ if (ERROR_ISCUSTOM(self->errors))
+ Py_VISIT(self->errors);
+ Py_VISIT(self->stream);
+ return 0;
}
static void
mbstreamwriter_dealloc(MultibyteStreamWriterObject *self)
{
- PyObject_GC_UnTrack(self);
- ERROR_DECREF(self->errors);
- Py_XDECREF(self->stream);
- Py_TYPE(self)->tp_free(self);
+ PyObject_GC_UnTrack(self);
+ ERROR_DECREF(self->errors);
+ Py_XDECREF(self->stream);
+ Py_TYPE(self)->tp_free(self);
}
static struct PyMethodDef mbstreamwriter_methods[] = {
- {"write", (PyCFunction)mbstreamwriter_write,
- METH_O, NULL},
- {"writelines", (PyCFunction)mbstreamwriter_writelines,
- METH_O, NULL},
- {"reset", (PyCFunction)mbstreamwriter_reset,
- METH_NOARGS, NULL},
- {NULL, NULL},
+ {"write", (PyCFunction)mbstreamwriter_write,
+ METH_O, NULL},
+ {"writelines", (PyCFunction)mbstreamwriter_writelines,
+ METH_O, NULL},
+ {"reset", (PyCFunction)mbstreamwriter_reset,
+ METH_NOARGS, NULL},
+ {NULL, NULL},
};
static PyMemberDef mbstreamwriter_members[] = {
- {"stream", T_OBJECT,
- offsetof(MultibyteStreamWriterObject, stream),
- READONLY, NULL},
- {NULL,}
+ {"stream", T_OBJECT,
+ offsetof(MultibyteStreamWriterObject, stream),
+ READONLY, NULL},
+ {NULL,}
};
static PyTypeObject MultibyteStreamWriter_Type = {
- PyVarObject_HEAD_INIT(NULL, 0)
- "MultibyteStreamWriter", /* tp_name */
- sizeof(MultibyteStreamWriterObject), /* tp_basicsize */
- 0, /* tp_itemsize */
- /* methods */
- (destructor)mbstreamwriter_dealloc, /* tp_dealloc */
- 0, /* tp_print */
- 0, /* tp_getattr */
- 0, /* tp_setattr */
- 0, /* tp_reserved */
- 0, /* tp_repr */
- 0, /* tp_as_number */
- 0, /* tp_as_sequence */
- 0, /* tp_as_mapping */
- 0, /* tp_hash */
- 0, /* tp_call */
- 0, /* tp_str */
- PyObject_GenericGetAttr, /* tp_getattro */
- 0, /* tp_setattro */
- 0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
- | Py_TPFLAGS_BASETYPE, /* tp_flags */
- 0, /* tp_doc */
- (traverseproc)mbstreamwriter_traverse, /* tp_traverse */
- 0, /* tp_clear */
- 0, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
- 0, /* tp_iter */
- 0, /* tp_iterext */
- mbstreamwriter_methods, /* tp_methods */
- mbstreamwriter_members, /* tp_members */
- codecctx_getsets, /* tp_getset */
- 0, /* tp_base */
- 0, /* tp_dict */
- 0, /* tp_descr_get */
- 0, /* tp_descr_set */
- 0, /* tp_dictoffset */
- mbstreamwriter_init, /* tp_init */
- 0, /* tp_alloc */
- mbstreamwriter_new, /* tp_new */
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "MultibyteStreamWriter", /* tp_name */
+ sizeof(MultibyteStreamWriterObject), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ /* methods */
+ (destructor)mbstreamwriter_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_reserved */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ PyObject_GenericGetAttr, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
+ | Py_TPFLAGS_BASETYPE, /* tp_flags */
+ 0, /* tp_doc */
+ (traverseproc)mbstreamwriter_traverse, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iterext */
+ mbstreamwriter_methods, /* tp_methods */
+ mbstreamwriter_members, /* tp_members */
+ codecctx_getsets, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ mbstreamwriter_init, /* tp_init */
+ 0, /* tp_alloc */
+ mbstreamwriter_new, /* tp_new */
};
@@ -1790,76 +1790,76 @@ static PyTypeObject MultibyteStreamWriter_Type = {
static PyObject *
__create_codec(PyObject *ignore, PyObject *arg)
{
- MultibyteCodecObject *self;
- MultibyteCodec *codec;
+ MultibyteCodecObject *self;
+ MultibyteCodec *codec;
- if (!PyCapsule_IsValid(arg, PyMultibyteCodec_CAPSULE_NAME)) {
- PyErr_SetString(PyExc_ValueError, "argument type invalid");
- return NULL;
- }
+ if (!PyCapsule_IsValid(arg, PyMultibyteCodec_CAPSULE_NAME)) {
+ PyErr_SetString(PyExc_ValueError, "argument type invalid");
+ return NULL;
+ }
- codec = PyCapsule_GetPointer(arg, PyMultibyteCodec_CAPSULE_NAME);
- if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0)
- return NULL;
+ codec = PyCapsule_GetPointer(arg, PyMultibyteCodec_CAPSULE_NAME);
+ if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0)
+ return NULL;
- self = PyObject_New(MultibyteCodecObject, &MultibyteCodec_Type);
- if (self == NULL)
- return NULL;
- self->codec = codec;
+ self = PyObject_New(MultibyteCodecObject, &MultibyteCodec_Type);
+ if (self == NULL)
+ return NULL;
+ self->codec = codec;
- return (PyObject *)self;
+ return (PyObject *)self;
}
static struct PyMethodDef __methods[] = {
- {"__create_codec", (PyCFunction)__create_codec, METH_O},
- {NULL, NULL},
+ {"__create_codec", (PyCFunction)__create_codec, METH_O},
+ {NULL, NULL},
};
static struct PyModuleDef _multibytecodecmodule = {
- PyModuleDef_HEAD_INIT,
- "_multibytecodec",
- NULL,
- -1,
- __methods,
- NULL,
- NULL,
- NULL,
- NULL
+ PyModuleDef_HEAD_INIT,
+ "_multibytecodec",
+ NULL,
+ -1,
+ __methods,
+ NULL,
+ NULL,
+ NULL,
+ NULL
};
PyMODINIT_FUNC
PyInit__multibytecodec(void)
{
- int i;
- PyObject *m;
- PyTypeObject *typelist[] = {
- &MultibyteIncrementalEncoder_Type,
- &MultibyteIncrementalDecoder_Type,
- &MultibyteStreamReader_Type,
- &MultibyteStreamWriter_Type,
- NULL
- };
-
- if (PyType_Ready(&MultibyteCodec_Type) < 0)
- return NULL;
-
- m = PyModule_Create(&_multibytecodecmodule);
- if (m == NULL)
- return NULL;
-
- for (i = 0; typelist[i] != NULL; i++) {
- if (PyType_Ready(typelist[i]) < 0)
- return NULL;
- Py_INCREF(typelist[i]);
- PyModule_AddObject(m, typelist[i]->tp_name,
- (PyObject *)typelist[i]);
- }
-
- if (PyErr_Occurred()) {
- Py_FatalError("can't initialize the _multibytecodec module");
- Py_DECREF(m);
- m = NULL;
- }
- return m;
+ int i;
+ PyObject *m;
+ PyTypeObject *typelist[] = {
+ &MultibyteIncrementalEncoder_Type,
+ &MultibyteIncrementalDecoder_Type,
+ &MultibyteStreamReader_Type,
+ &MultibyteStreamWriter_Type,
+ NULL
+ };
+
+ if (PyType_Ready(&MultibyteCodec_Type) < 0)
+ return NULL;
+
+ m = PyModule_Create(&_multibytecodecmodule);
+ if (m == NULL)
+ return NULL;
+
+ for (i = 0; typelist[i] != NULL; i++) {
+ if (PyType_Ready(typelist[i]) < 0)
+ return NULL;
+ Py_INCREF(typelist[i]);
+ PyModule_AddObject(m, typelist[i]->tp_name,
+ (PyObject *)typelist[i]);
+ }
+
+ if (PyErr_Occurred()) {
+ Py_FatalError("can't initialize the _multibytecodec module");
+ Py_DECREF(m);
+ m = NULL;
+ }
+ return m;
}
diff --git a/Modules/cjkcodecs/multibytecodec.h b/Modules/cjkcodecs/multibytecodec.h
index 71c02cc..1b6ef55 100644
--- a/Modules/cjkcodecs/multibytecodec.h
+++ b/Modules/cjkcodecs/multibytecodec.h
@@ -23,114 +23,114 @@ typedef unsigned short ucs2_t, DBCHAR;
#endif
typedef union {
- void *p;
- int i;
- unsigned char c[8];
- ucs2_t u2[4];
- ucs4_t u4[2];
+ void *p;
+ int i;
+ unsigned char c[8];
+ ucs2_t u2[4];
+ ucs4_t u4[2];
} MultibyteCodec_State;
typedef int (*mbcodec_init)(const void *config);
typedef Py_ssize_t (*mbencode_func)(MultibyteCodec_State *state,
- const void *config,
- const Py_UNICODE **inbuf, Py_ssize_t inleft,
- unsigned char **outbuf, Py_ssize_t outleft,
- int flags);
+ const void *config,
+ const Py_UNICODE **inbuf, Py_ssize_t inleft,
+ unsigned char **outbuf, Py_ssize_t outleft,
+ int flags);
typedef int (*mbencodeinit_func)(MultibyteCodec_State *state,
- const void *config);
+ const void *config);
typedef Py_ssize_t (*mbencodereset_func)(MultibyteCodec_State *state,
- const void *config,
- unsigned char **outbuf, Py_ssize_t outleft);
+ const void *config,
+ unsigned char **outbuf, Py_ssize_t outleft);
typedef Py_ssize_t (*mbdecode_func)(MultibyteCodec_State *state,
- const void *config,
- const unsigned char **inbuf, Py_ssize_t inleft,
- Py_UNICODE **outbuf, Py_ssize_t outleft);
+ const void *config,
+ const unsigned char **inbuf, Py_ssize_t inleft,
+ Py_UNICODE **outbuf, Py_ssize_t outleft);
typedef int (*mbdecodeinit_func)(MultibyteCodec_State *state,
- const void *config);
+ const void *config);
typedef Py_ssize_t (*mbdecodereset_func)(MultibyteCodec_State *state,
- const void *config);
+ const void *config);
typedef struct {
- const char *encoding;
- const void *config;
- mbcodec_init codecinit;
- mbencode_func encode;
- mbencodeinit_func encinit;
- mbencodereset_func encreset;
- mbdecode_func decode;
- mbdecodeinit_func decinit;
- mbdecodereset_func decreset;
+ const char *encoding;
+ const void *config;
+ mbcodec_init codecinit;
+ mbencode_func encode;
+ mbencodeinit_func encinit;
+ mbencodereset_func encreset;
+ mbdecode_func decode;
+ mbdecodeinit_func decinit;
+ mbdecodereset_func decreset;
} MultibyteCodec;
typedef struct {
- PyObject_HEAD
- MultibyteCodec *codec;
+ PyObject_HEAD
+ MultibyteCodec *codec;
} MultibyteCodecObject;
#define MultibyteCodec_Check(op) ((op)->ob_type == &MultibyteCodec_Type)
-#define _MultibyteStatefulCodec_HEAD \
- PyObject_HEAD \
- MultibyteCodec *codec; \
- MultibyteCodec_State state; \
- PyObject *errors;
+#define _MultibyteStatefulCodec_HEAD \
+ PyObject_HEAD \
+ MultibyteCodec *codec; \
+ MultibyteCodec_State state; \
+ PyObject *errors;
typedef struct {
- _MultibyteStatefulCodec_HEAD
+ _MultibyteStatefulCodec_HEAD
} MultibyteStatefulCodecContext;
-#define MAXENCPENDING 2
-#define _MultibyteStatefulEncoder_HEAD \
- _MultibyteStatefulCodec_HEAD \
- Py_UNICODE pending[MAXENCPENDING]; \
- Py_ssize_t pendingsize;
+#define MAXENCPENDING 2
+#define _MultibyteStatefulEncoder_HEAD \
+ _MultibyteStatefulCodec_HEAD \
+ Py_UNICODE pending[MAXENCPENDING]; \
+ Py_ssize_t pendingsize;
typedef struct {
- _MultibyteStatefulEncoder_HEAD
+ _MultibyteStatefulEncoder_HEAD
} MultibyteStatefulEncoderContext;
-#define MAXDECPENDING 8
-#define _MultibyteStatefulDecoder_HEAD \
- _MultibyteStatefulCodec_HEAD \
- unsigned char pending[MAXDECPENDING]; \
- Py_ssize_t pendingsize;
+#define MAXDECPENDING 8
+#define _MultibyteStatefulDecoder_HEAD \
+ _MultibyteStatefulCodec_HEAD \
+ unsigned char pending[MAXDECPENDING]; \
+ Py_ssize_t pendingsize;
typedef struct {
- _MultibyteStatefulDecoder_HEAD
+ _MultibyteStatefulDecoder_HEAD
} MultibyteStatefulDecoderContext;
typedef struct {
- _MultibyteStatefulEncoder_HEAD
+ _MultibyteStatefulEncoder_HEAD
} MultibyteIncrementalEncoderObject;
typedef struct {
- _MultibyteStatefulDecoder_HEAD
+ _MultibyteStatefulDecoder_HEAD
} MultibyteIncrementalDecoderObject;
typedef struct {
- _MultibyteStatefulDecoder_HEAD
- PyObject *stream;
+ _MultibyteStatefulDecoder_HEAD
+ PyObject *stream;
} MultibyteStreamReaderObject;
typedef struct {
- _MultibyteStatefulEncoder_HEAD
- PyObject *stream;
+ _MultibyteStatefulEncoder_HEAD
+ PyObject *stream;
} MultibyteStreamWriterObject;
/* positive values for illegal sequences */
-#define MBERR_TOOSMALL (-1) /* insufficient output buffer space */
-#define MBERR_TOOFEW (-2) /* incomplete input buffer */
-#define MBERR_INTERNAL (-3) /* internal runtime error */
-
-#define ERROR_STRICT (PyObject *)(1)
-#define ERROR_IGNORE (PyObject *)(2)
-#define ERROR_REPLACE (PyObject *)(3)
-#define ERROR_ISCUSTOM(p) ((p) < ERROR_STRICT || ERROR_REPLACE < (p))
-#define ERROR_DECREF(p) do { \
- if (p != NULL && ERROR_ISCUSTOM(p)) { \
- Py_DECREF(p); \
- } \
+#define MBERR_TOOSMALL (-1) /* insufficient output buffer space */
+#define MBERR_TOOFEW (-2) /* incomplete input buffer */
+#define MBERR_INTERNAL (-3) /* internal runtime error */
+
+#define ERROR_STRICT (PyObject *)(1)
+#define ERROR_IGNORE (PyObject *)(2)
+#define ERROR_REPLACE (PyObject *)(3)
+#define ERROR_ISCUSTOM(p) ((p) < ERROR_STRICT || ERROR_REPLACE < (p))
+#define ERROR_DECREF(p) do { \
+ if (p != NULL && ERROR_ISCUSTOM(p)) { \
+ Py_DECREF(p); \
+ } \
} while (0);
-#define MBENC_FLUSH 0x0001 /* encode all characters encodable */
-#define MBENC_MAX MBENC_FLUSH
+#define MBENC_FLUSH 0x0001 /* encode all characters encodable */
+#define MBENC_MAX MBENC_FLUSH
#define PyMultibyteCodec_CAPSULE_NAME "multibytecodec.__map_*"