summaryrefslogtreecommitdiffstats
path: root/Modules/cjkcodecs/_codecs_iso2022.c
diff options
context:
space:
mode:
authorHye-Shik Chang <hyeshik@gmail.com>2006-09-05 12:07:09 (GMT)
committerHye-Shik Chang <hyeshik@gmail.com>2006-09-05 12:07:09 (GMT)
commit199f1db1fa75e1f1b7cfe73259d6a811aa494a36 (patch)
treedafe1e12cea693de74824077b7871a3fdc4df5a2 /Modules/cjkcodecs/_codecs_iso2022.c
parentd042132268d75461e157bd87a3f4358ce603d311 (diff)
downloadcpython-199f1db1fa75e1f1b7cfe73259d6a811aa494a36.zip
cpython-199f1db1fa75e1f1b7cfe73259d6a811aa494a36.tar.gz
cpython-199f1db1fa75e1f1b7cfe73259d6a811aa494a36.tar.bz2
Fix a few bugs on cjkcodecs found by Oren Tirosh:
- gbk and gb18030 codec now handle U+30FB KATAKANA MIDDLE DOT correctly. - iso2022_jp_2 codec now encodes into G0 for KS X 1001, GB2312 codepoints to conform the standard. - iso2022_jp_3 and iso2022_jp_2004 codec can encode JIS X 2013:2 codepoints now.
Diffstat (limited to 'Modules/cjkcodecs/_codecs_iso2022.c')
-rw-r--r--Modules/cjkcodecs/_codecs_iso2022.c15
1 files changed, 9 insertions, 6 deletions
diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c
index 8a2ab7e..2a11e9a 100644
--- a/Modules/cjkcodecs/_codecs_iso2022.c
+++ b/Modules/cjkcodecs/_codecs_iso2022.c
@@ -854,7 +854,7 @@ jisx0213_2000_2_encoder(const ucs4_t *data, Py_ssize_t *length)
if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
return coded;
else if (coded & 0x8000)
- return coded;
+ return coded & 0x7fff;
else
return MAP_UNMAPPABLE;
}
@@ -901,7 +901,7 @@ jisx0213_2004_2_encoder(const ucs4_t *data, Py_ssize_t *length)
if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
return coded;
else if (coded & 0x8000)
- return coded;
+ return coded & 0x7fff;
else
return MAP_UNMAPPABLE;
}
@@ -992,7 +992,10 @@ dummy_encoder(const ucs4_t *data, Py_ssize_t *length)
/*-*- registry tables -*-*/
-#define REGISTRY_KSX1001 { CHARSET_KSX1001, 1, 2, \
+#define REGISTRY_KSX1001_G0 { CHARSET_KSX1001, 0, 2, \
+ ksx1001_init, \
+ ksx1001_decoder, ksx1001_encoder }
+#define REGISTRY_KSX1001_G1 { CHARSET_KSX1001, 1, 2, \
ksx1001_init, \
ksx1001_decoder, ksx1001_encoder }
#define REGISTRY_JISX0201_R { CHARSET_JISX0201_R, 0, 1, \
@@ -1034,7 +1037,7 @@ dummy_encoder(const ucs4_t *data, Py_ssize_t *length)
jisx0213_init, \
jisx0213_2004_2_decoder, \
jisx0213_2004_2_encoder }
-#define REGISTRY_GB2312 { CHARSET_GB2312, 1, 2, \
+#define REGISTRY_GB2312 { CHARSET_GB2312, 0, 2, \
gb2312_init, \
gb2312_decoder, gb2312_encoder }
#define REGISTRY_CNS11643_1 { CHARSET_CNS11643_1, 1, 2, \
@@ -1054,7 +1057,7 @@ dummy_encoder(const ucs4_t *data, Py_ssize_t *length)
};
static const struct iso2022_designation iso2022_kr_designations[] = {
- REGISTRY_KSX1001, REGISTRY_SENTINEL
+ REGISTRY_KSX1001_G1, REGISTRY_SENTINEL
};
CONFIGDEF(kr, 0)
@@ -1071,7 +1074,7 @@ static const struct iso2022_designation iso2022_jp_1_designations[] = {
CONFIGDEF(jp_1, NO_SHIFT | USE_JISX0208_EXT)
static const struct iso2022_designation iso2022_jp_2_designations[] = {
- REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_KSX1001,
+ REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_KSX1001_G0,
REGISTRY_GB2312, REGISTRY_JISX0201_R, REGISTRY_JISX0208_O,
REGISTRY_ISO8859_1, REGISTRY_ISO8859_7, REGISTRY_SENTINEL
};