summaryrefslogtreecommitdiffstats
path: root/Modules/cjkcodecs/cjkcodecs.h
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2013-04-11 20:09:04 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2013-04-11 20:09:04 (GMT)
commita0dd0213cc457bdf2b04206548f5a269db256d4d (patch)
treed282d6b615e9feef64d68fa48cac54e22fd97150 /Modules/cjkcodecs/cjkcodecs.h
parentd8a5cc91e6559e11ca28e6a915017433b14b12d1 (diff)
downloadcpython-a0dd0213cc457bdf2b04206548f5a269db256d4d.zip
cpython-a0dd0213cc457bdf2b04206548f5a269db256d4d.tar.gz
cpython-a0dd0213cc457bdf2b04206548f5a269db256d4d.tar.bz2
Close #17693: Rewrite CJK decoders to use the _PyUnicodeWriter API instead of
the legacy Py_UNICODE API. Add also a new _PyUnicodeWriter_WriteChar() function.
Diffstat (limited to 'Modules/cjkcodecs/cjkcodecs.h')
-rw-r--r--Modules/cjkcodecs/cjkcodecs.h87
1 files changed, 55 insertions, 32 deletions
diff --git a/Modules/cjkcodecs/cjkcodecs.h b/Modules/cjkcodecs/cjkcodecs.h
index 3a00d41..65b5c07 100644
--- a/Modules/cjkcodecs/cjkcodecs.h
+++ b/Modules/cjkcodecs/cjkcodecs.h
@@ -33,7 +33,7 @@ struct dbcs_index {
typedef struct dbcs_index decode_map;
struct widedbcs_index {
- const ucs4_t *map;
+ const Py_UCS4 *map;
unsigned char bottom, top;
};
typedef struct widedbcs_index widedecode_map;
@@ -56,7 +56,7 @@ struct dbcs_map {
};
struct pair_encodemap {
- ucs4_t uniseq;
+ Py_UCS4 uniseq;
DBCHAR code;
};
@@ -86,7 +86,7 @@ static const struct dbcs_map *mapping_list;
static Py_ssize_t encoding##_decode( \
MultibyteCodec_State *state, const void *config, \
const unsigned char **inbuf, Py_ssize_t inleft, \
- Py_UNICODE **outbuf, Py_ssize_t outleft)
+ _PyUnicodeWriter *writer)
#define DECODER_RESET(encoding) \
static Py_ssize_t encoding##_decode_reset( \
MultibyteCodec_State *state, const void *config)
@@ -101,13 +101,15 @@ static const struct dbcs_map *mapping_list;
#endif
#define NEXT_IN(i) \
- (*inbuf) += (i); \
- (inleft) -= (i);
+ do { \
+ (*inbuf) += (i); \
+ (inleft) -= (i); \
+ } while (0)
#define NEXT_OUT(o) \
(*outbuf) += (o); \
(outleft) -= (o);
#define NEXT(i, o) \
- NEXT_IN(i) NEXT_OUT(o)
+ NEXT_IN(i); NEXT_OUT(o)
#define REQUIRE_INBUF(n) \
if (inleft < (n)) \
@@ -121,6 +123,23 @@ static const struct dbcs_map *mapping_list;
#define IN3 ((*inbuf)[2])
#define IN4 ((*inbuf)[3])
+#define OUTCHAR(c) \
+ do { \
+ if (_PyUnicodeWriter_WriteChar(writer, (c)) < 0) \
+ return MBERR_TOOSMALL; \
+ } while (0)
+
+#define OUTCHAR2(c1, c2) \
+ do { \
+ Py_UCS4 _c1 = (c1); \
+ Py_UCS4 _c2 = (c2); \
+ if (_PyUnicodeWriter_Prepare(writer, 2, Py_MAX(_c1, c2)) < 0) \
+ return MBERR_TOOSMALL; \
+ PyUnicode_WRITE(writer->kind, writer->data, writer->pos, _c1); \
+ PyUnicode_WRITE(writer->kind, writer->data, writer->pos + 1, _c2); \
+ writer->pos += 2; \
+ } while (0)
+
#define OUT1(c) ((*outbuf)[0]) = (c);
#define OUT2(c) ((*outbuf)[1]) = (c);
#define OUT3(c) ((*outbuf)[2]) = (c);
@@ -145,19 +164,6 @@ static const struct dbcs_map *mapping_list;
(*outbuf)[2] = (c3); \
(*outbuf)[3] = (c4);
-#if Py_UNICODE_SIZE == 2
-# define WRITEUCS4(c) \
- REQUIRE_OUTBUF(2) \
- (*outbuf)[0] = Py_UNICODE_HIGH_SURROGATE(c); \
- (*outbuf)[1] = Py_UNICODE_LOW_SURROGATE(c); \
- NEXT_OUT(2)
-#else
-# define WRITEUCS4(c) \
- REQUIRE_OUTBUF(1) \
- **outbuf = (Py_UNICODE)(c); \
- NEXT_OUT(1)
-#endif
-
#define _TRYMAP_ENC(m, assi, val) \
((m)->map != NULL && (val) >= (m)->bottom && \
(val)<= (m)->top && ((assi) = (m)->map[(val) - \
@@ -167,24 +173,41 @@ static const struct dbcs_map *mapping_list;
#define TRYMAP_ENC(charset, assi, uni) \
if TRYMAP_ENC_COND(charset, assi, uni)
-#define _TRYMAP_DEC(m, assi, val) \
- ((m)->map != NULL && (val) >= (m)->bottom && \
- (val)<= (m)->top && ((assi) = (m)->map[(val) - \
- (m)->bottom]) != UNIINV)
-#define TRYMAP_DEC(charset, assi, c1, c2) \
- if _TRYMAP_DEC(&charset##_decmap[c1], assi, c2)
+Py_LOCAL_INLINE(int)
+_TRYMAP_DEC_WRITE(_PyUnicodeWriter *writer, Py_UCS4 c)
+{
+ if (c == UNIINV || _PyUnicodeWriter_WriteChar(writer, c) < 0)
+ return UNIINV;
+ else
+ return c;
+}
-#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) \
- ((m)->map != NULL && (val) >= (m)->bottom && \
- (val)<= (m)->top && \
- ((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \
+#define _TRYMAP_DEC(m, writer, val) \
+ ((m)->map != NULL && \
+ (val) >= (m)->bottom && \
+ (val)<= (m)->top && \
+ _TRYMAP_DEC_WRITE(writer, (m)->map[(val) - (m)->bottom]) != UNIINV)
+#define _TRYMAP_DEC_CHAR(m, assi, val) \
+ ((m)->map != NULL && \
+ (val) >= (m)->bottom && \
+ (val)<= (m)->top && \
+ ((assi) = (m)->map[(val) - (m)->bottom]) != UNIINV)
+#define TRYMAP_DEC(charset, writer, c1, c2) \
+ if _TRYMAP_DEC(&charset##_decmap[c1], writer, c2)
+#define TRYMAP_DEC_CHAR(charset, assi, c1, c2) \
+ if _TRYMAP_DEC_CHAR(&charset##_decmap[c1], assi, c2)
+
+#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) \
+ ((m)->map != NULL && (val) >= (m)->bottom && \
+ (val)<= (m)->top && \
+ ((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \
(((asshi) = (m)->map[((val) - (m)->bottom)*3 + 1]), 1) && \
(((asslo) = (m)->map[((val) - (m)->bottom)*3 + 2]), 1))
#define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni) \
if _TRYMAP_ENC_MPLANE(&charset##_encmap[(uni) >> 8], \
assplane, asshi, asslo, (uni) & 0xff)
-#define TRYMAP_DEC_MPLANE(charset, assi, plane, c1, c2) \
- if _TRYMAP_DEC(&charset##_decmap[plane][c1], assi, c2)
+#define TRYMAP_DEC_MPLANE(charset, writer, plane, c1, c2) \
+ if _TRYMAP_DEC(&charset##_decmap[plane][c1], writer, c2)
#if Py_UNICODE_SIZE == 2
#define DECODE_SURROGATE(c) \
@@ -323,7 +346,7 @@ find_pairencmap(ucs2_t body, ucs2_t modifier,
const struct pair_encodemap *haystack, int haystacksize)
{
int pos, min, max;
- ucs4_t value = body << 16 | modifier;
+ Py_UCS4 value = body << 16 | modifier;
min = 0;
max = haystacksize;