diff options
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/abstract.c | 3 | ||||
-rw-r--r-- | Objects/classobject.c | 17 | ||||
-rw-r--r-- | Objects/descrobject.c | 26 | ||||
-rw-r--r-- | Objects/exceptions.c | 1 | ||||
-rw-r--r-- | Objects/fileobject.c | 19 | ||||
-rw-r--r-- | Objects/obmalloc.c | 242 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 337 |
7 files changed, 466 insertions, 179 deletions
diff --git a/Objects/abstract.c b/Objects/abstract.c index 13a9473..6b76ba6 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -216,7 +216,8 @@ PyObject_DelItemString(PyObject *o, char *key) return ret; } -int PyObject_AsCharBuffer(PyObject *obj, +int +PyObject_AsCharBuffer(PyObject *obj, const char **buffer, Py_ssize_t *buffer_len) { diff --git a/Objects/classobject.c b/Objects/classobject.c index 79656f5..aab35c7 100644 --- a/Objects/classobject.c +++ b/Objects/classobject.c @@ -2217,9 +2217,17 @@ instancemethod_dealloc(register PyMethodObject *im) static int instancemethod_compare(PyMethodObject *a, PyMethodObject *b) { - if (a->im_self != b->im_self) + int cmp; + cmp = PyObject_Compare(a->im_func, b->im_func); + if (cmp) + return cmp; + + if (a->im_self == b->im_self) + return 0; + if (a->im_self == NULL || b->im_self == NULL) return (a->im_self < b->im_self) ? -1 : 1; - return PyObject_Compare(a->im_func, b->im_func); + else + return PyObject_Compare(a->im_self, b->im_self); } static PyObject * @@ -2295,7 +2303,10 @@ instancemethod_hash(PyMethodObject *a) y = PyObject_Hash(a->im_func); if (y == -1) return -1; - return x ^ y; + x = x ^ y; + if (x == -1) + x = -2; + return x; } static int diff --git a/Objects/descrobject.c b/Objects/descrobject.c index 561ba4a5..606ef05 100644 --- a/Objects/descrobject.c +++ b/Objects/descrobject.c @@ -901,16 +901,28 @@ wrapper_dealloc(wrapperobject *wp) static int wrapper_compare(wrapperobject *a, wrapperobject *b) { - if (a->descr == b->descr) { - if (a->self == b->self) - return 0; - else - return (a->self < b->self) ? -1 : 1; - } + if (a->descr == b->descr) + return PyObject_Compare(a->self, b->self); else return (a->descr < b->descr) ? -1 : 1; } +static long +wrapper_hash(wrapperobject *wp) +{ + int x, y; + x = _Py_HashPointer(wp->descr); + if (x == -1) + return -1; + y = PyObject_Hash(wp->self); + if (y == -1) + return -1; + x = x ^ y; + if (x == -1) + x = -2; + return x; +} + static PyObject * wrapper_repr(wrapperobject *wp) { @@ -1008,7 +1020,7 @@ static PyTypeObject wrappertype = { 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ - 0, /* tp_hash */ + (hashfunc)wrapper_hash, /* tp_hash */ (ternaryfunc)wrapper_call, /* tp_call */ 0, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 3b79307..369365b 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -851,7 +851,6 @@ WindowsError_str(PyWindowsErrorObject *self) PyTuple_SET_ITEM(tuple, 1, Py_None); } - Py_INCREF(repr); PyTuple_SET_ITEM(tuple, 2, repr); rtnval = PyString_Format(fmt, tuple); diff --git a/Objects/fileobject.c b/Objects/fileobject.c index 997792a..0f7572f 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -1640,6 +1640,20 @@ file_self(PyFileObject *f) return (PyObject *)f; } +static PyObject * +file_exit(PyFileObject *f, PyObject *args) +{ + PyObject *ret = file_close(f); + if (!ret) + /* If error occurred, pass through */ + return NULL; + Py_DECREF(ret); + /* We cannot return the result of close since a true + * value will be interpreted as "yes, swallow the + * exception if one was raised inside the with block". */ + Py_RETURN_NONE; +} + PyDoc_STRVAR(readline_doc, "readline([size]) -> next line from the file, as a string.\n" "\n" @@ -1721,6 +1735,9 @@ PyDoc_STRVAR(isatty_doc, PyDoc_STRVAR(enter_doc, "__enter__() -> self."); +PyDoc_STRVAR(exit_doc, + "__exit__(*excinfo) -> None. Closes the file."); + static PyMethodDef file_methods[] = { {"readline", (PyCFunction)file_readline, METH_VARARGS, readline_doc}, {"read", (PyCFunction)file_read, METH_VARARGS, read_doc}, @@ -1738,7 +1755,7 @@ static PyMethodDef file_methods[] = { {"close", (PyCFunction)file_close, METH_NOARGS, close_doc}, {"isatty", (PyCFunction)file_isatty, METH_NOARGS, isatty_doc}, {"__enter__", (PyCFunction)file_self, METH_NOARGS, enter_doc}, - {"__exit__", (PyCFunction)file_close, METH_VARARGS, close_doc}, + {"__exit__", (PyCFunction)file_exit, METH_VARARGS, exit_doc}, {NULL, NULL} /* sentinel */ }; diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index a393cbc..0ca8f2b 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -491,13 +491,13 @@ static struct arena_object* usable_arenas = NULL; #define INITIAL_ARENA_OBJECTS 16 /* Number of arenas allocated that haven't been free()'d. */ -static ulong narenas_currently_allocated = 0; +static size_t narenas_currently_allocated = 0; #ifdef PYMALLOC_DEBUG /* Total number of times malloc() called to allocate an arena. */ -static ulong ntimes_arena_allocated = 0; +static size_t ntimes_arena_allocated = 0; /* High water mark (max value ever seen) for narenas_currently_allocated. */ -static ulong narenas_highwater = 0; +static size_t narenas_highwater = 0; #endif /* Allocate a new arena. If we run out of memory, return NULL. Else @@ -1220,39 +1220,45 @@ PyObject_Free(void *p) #define DEADBYTE 0xDB /* dead (newly freed) memory */ #define FORBIDDENBYTE 0xFB /* untouchable bytes at each end of a block */ -static ulong serialno = 0; /* incremented on each debug {m,re}alloc */ +static size_t serialno = 0; /* incremented on each debug {m,re}alloc */ /* serialno is always incremented via calling this routine. The point is - to supply a single place to set a breakpoint. -*/ + * to supply a single place to set a breakpoint. + */ static void bumpserialno(void) { ++serialno; } +#define SST SIZEOF_SIZE_T -/* Read 4 bytes at p as a big-endian ulong. */ -static ulong -read4(const void *p) +/* Read sizeof(size_t) bytes at p as a big-endian size_t. */ +static size_t +read_size_t(const void *p) { const uchar *q = (const uchar *)p; - return ((ulong)q[0] << 24) | - ((ulong)q[1] << 16) | - ((ulong)q[2] << 8) | - (ulong)q[3]; + size_t result = *q++; + int i; + + for (i = SST; --i > 0; ++q) + result = (result << 8) | *q; + return result; } -/* Write the 4 least-significant bytes of n as a big-endian unsigned int, - MSB at address p, LSB at p+3. */ +/* Write n as a big-endian size_t, MSB at address p, LSB at + * p + sizeof(size_t) - 1. + */ static void -write4(void *p, ulong n) +write_size_t(void *p, size_t n) { - uchar *q = (uchar *)p; - q[0] = (uchar)((n >> 24) & 0xff); - q[1] = (uchar)((n >> 16) & 0xff); - q[2] = (uchar)((n >> 8) & 0xff); - q[3] = (uchar)( n & 0xff); + uchar *q = (uchar *)p + SST - 1; + int i; + + for (i = SST; --i >= 0; --q) { + *q = (uchar)(n & 0xff); + n >>= 8; + } } #ifdef Py_DEBUG @@ -1280,25 +1286,25 @@ pool_is_in_list(const poolp target, poolp list) #endif /* Py_DEBUG */ -/* The debug malloc asks for 16 extra bytes and fills them with useful stuff, - here calling the underlying malloc's result p: +/* Let S = sizeof(size_t). The debug malloc asks for 4*S extra bytes and + fills them with useful stuff, here calling the underlying malloc's result p: -p[0:4] - Number of bytes originally asked for. 4-byte unsigned integer, - big-endian (easier to read in a memory dump). -p[4:8] +p[0: S] + Number of bytes originally asked for. This is a size_t, big-endian (easier + to read in a memory dump). +p[S: 2*S] Copies of FORBIDDENBYTE. Used to catch under- writes and reads. -p[8:8+n] +p[2*S: 2*S+n] The requested memory, filled with copies of CLEANBYTE. Used to catch reference to uninitialized memory. - &p[8] is returned. Note that this is 8-byte aligned if pymalloc + &p[2*S] is returned. Note that this is 8-byte aligned if pymalloc handled the request itself. -p[8+n:8+n+4] +p[2*S+n: 2*S+n+S] Copies of FORBIDDENBYTE. Used to catch over- writes and reads. -p[8+n+4:8+n+8] +p[2*S+n+S: 2*S+n+2*S] A serial number, incremented by 1 on each call to _PyObject_DebugMalloc and _PyObject_DebugRealloc. - 4-byte unsigned integer, big-endian. + This is a big-endian size_t. If "bad memory" is detected later, the serial number gives an excellent way to set a breakpoint on the next run, to capture the instant at which this block was passed out. @@ -1308,41 +1314,33 @@ void * _PyObject_DebugMalloc(size_t nbytes) { uchar *p; /* base address of malloc'ed block */ - uchar *tail; /* p + 8 + nbytes == pointer to tail pad bytes */ - size_t total; /* nbytes + 16 */ + uchar *tail; /* p + 2*SST + nbytes == pointer to tail pad bytes */ + size_t total; /* nbytes + 4*SST */ bumpserialno(); - total = nbytes + 16; -#if SIZEOF_SIZE_T < 8 - /* XXX do this check only on 32-bit machines */ - if (total < nbytes || (total >> 31) > 1) { - /* overflow, or we can't represent it in 4 bytes */ - /* Obscure: can't do (total >> 32) != 0 instead, because - C doesn't define what happens for a right-shift of 32 - when size_t is a 32-bit type. At least C guarantees - size_t is an unsigned type. */ + total = nbytes + 4*SST; + if (total < nbytes) + /* overflow: can't represent total as a size_t */ return NULL; - } -#endif p = (uchar *)PyObject_Malloc(total); if (p == NULL) return NULL; - write4(p, (ulong)nbytes); - p[4] = p[5] = p[6] = p[7] = FORBIDDENBYTE; + write_size_t(p, nbytes); + memset(p + SST, FORBIDDENBYTE, SST); if (nbytes > 0) - memset(p+8, CLEANBYTE, nbytes); + memset(p + 2*SST, CLEANBYTE, nbytes); - tail = p + 8 + nbytes; - tail[0] = tail[1] = tail[2] = tail[3] = FORBIDDENBYTE; - write4(tail + 4, serialno); + tail = p + 2*SST + nbytes; + memset(tail, FORBIDDENBYTE, SST); + write_size_t(tail + SST, serialno); - return p+8; + return p + 2*SST; } -/* The debug free first checks the 8 bytes on each end for sanity (in +/* The debug free first checks the 2*SST bytes on each end for sanity (in particular, that the FORBIDDENBYTEs are still intact). Then fills the original bytes with DEADBYTE. Then calls the underlying free. @@ -1350,16 +1348,16 @@ _PyObject_DebugMalloc(size_t nbytes) void _PyObject_DebugFree(void *p) { - uchar *q = (uchar *)p; + uchar *q = (uchar *)p - 2*SST; /* address returned from malloc */ size_t nbytes; if (p == NULL) return; _PyObject_DebugCheckAddress(p); - nbytes = read4(q-8); + nbytes = read_size_t(q); if (nbytes > 0) memset(q, DEADBYTE, nbytes); - PyObject_Free(q-8); + PyObject_Free(q); } void * @@ -1367,20 +1365,20 @@ _PyObject_DebugRealloc(void *p, size_t nbytes) { uchar *q = (uchar *)p; uchar *tail; - size_t total; /* nbytes + 16 */ + size_t total; /* nbytes + 4*SST */ size_t original_nbytes; + int i; if (p == NULL) return _PyObject_DebugMalloc(nbytes); _PyObject_DebugCheckAddress(p); bumpserialno(); - original_nbytes = read4(q-8); - total = nbytes + 16; - if (total < nbytes || (total >> 31) > 1) { - /* overflow, or we can't represent it in 4 bytes */ + original_nbytes = read_size_t(q - 2*SST); + total = nbytes + 4*SST; + if (total < nbytes) + /* overflow: can't represent total as a size_t */ return NULL; - } if (nbytes < original_nbytes) { /* shrinking: mark old extra memory dead */ @@ -1388,19 +1386,17 @@ _PyObject_DebugRealloc(void *p, size_t nbytes) } /* Resize and add decorations. */ - q = (uchar *)PyObject_Realloc(q-8, total); + q = (uchar *)PyObject_Realloc(q - 2*SST, total); if (q == NULL) return NULL; - write4(q, (ulong)nbytes); - assert(q[4] == FORBIDDENBYTE && - q[5] == FORBIDDENBYTE && - q[6] == FORBIDDENBYTE && - q[7] == FORBIDDENBYTE); - q += 8; + write_size_t(q, nbytes); + for (i = 0; i < SST; ++i) + assert(q[SST + i] == FORBIDDENBYTE); + q += 2*SST; tail = q + nbytes; - tail[0] = tail[1] = tail[2] = tail[3] = FORBIDDENBYTE; - write4(tail + 4, serialno); + memset(tail, FORBIDDENBYTE, SST); + write_size_t(tail + SST, serialno); if (nbytes > original_nbytes) { /* growing: mark new extra memory clean */ @@ -1420,7 +1416,7 @@ _PyObject_DebugCheckAddress(const void *p) { const uchar *q = (const uchar *)p; char *msg; - ulong nbytes; + size_t nbytes; const uchar *tail; int i; @@ -1433,16 +1429,16 @@ _PyObject_DebugCheckAddress(const void *p) * corruption, the number-of-bytes field may be nuts, and checking * the tail could lead to a segfault then. */ - for (i = 4; i >= 1; --i) { + for (i = SST; i >= 1; --i) { if (*(q-i) != FORBIDDENBYTE) { msg = "bad leading pad byte"; goto error; } } - nbytes = read4(q-8); + nbytes = read_size_t(q - 2*SST); tail = q + nbytes; - for (i = 0; i < 4; ++i) { + for (i = 0; i < SST; ++i) { if (tail[i] != FORBIDDENBYTE) { msg = "bad trailing pad byte"; goto error; @@ -1462,28 +1458,33 @@ _PyObject_DebugDumpAddress(const void *p) { const uchar *q = (const uchar *)p; const uchar *tail; - ulong nbytes, serial; + size_t nbytes, serial; int i; + int ok; fprintf(stderr, "Debug memory block at address p=%p:\n", p); if (p == NULL) return; - nbytes = read4(q-8); - fprintf(stderr, " %lu bytes originally requested\n", nbytes); + nbytes = read_size_t(q - 2*SST); + fprintf(stderr, " %" PY_FORMAT_SIZE_T "u bytes originally " + "requested\n", nbytes); /* In case this is nuts, check the leading pad bytes first. */ - fputs(" The 4 pad bytes at p-4 are ", stderr); - if (*(q-4) == FORBIDDENBYTE && - *(q-3) == FORBIDDENBYTE && - *(q-2) == FORBIDDENBYTE && - *(q-1) == FORBIDDENBYTE) { - fputs("FORBIDDENBYTE, as expected.\n", stderr); + fprintf(stderr, " The %d pad bytes at p-%d are ", SST, SST); + ok = 1; + for (i = 1; i <= SST; ++i) { + if (*(q-i) != FORBIDDENBYTE) { + ok = 0; + break; + } } + if (ok) + fputs("FORBIDDENBYTE, as expected.\n", stderr); else { fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n", FORBIDDENBYTE); - for (i = 4; i >= 1; --i) { + for (i = SST; i >= 1; --i) { const uchar byte = *(q-i); fprintf(stderr, " at p-%d: 0x%02x", i, byte); if (byte != FORBIDDENBYTE) @@ -1498,17 +1499,20 @@ _PyObject_DebugDumpAddress(const void *p) } tail = q + nbytes; - fprintf(stderr, " The 4 pad bytes at tail=%p are ", tail); - if (tail[0] == FORBIDDENBYTE && - tail[1] == FORBIDDENBYTE && - tail[2] == FORBIDDENBYTE && - tail[3] == FORBIDDENBYTE) { - fputs("FORBIDDENBYTE, as expected.\n", stderr); + fprintf(stderr, " The %d pad bytes at tail=%p are ", SST, tail); + ok = 1; + for (i = 0; i < SST; ++i) { + if (tail[i] != FORBIDDENBYTE) { + ok = 0; + break; + } } + if (ok) + fputs("FORBIDDENBYTE, as expected.\n", stderr); else { fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n", FORBIDDENBYTE); - for (i = 0; i < 4; ++i) { + for (i = 0; i < SST; ++i) { const uchar byte = tail[i]; fprintf(stderr, " at tail+%d: 0x%02x", i, byte); @@ -1518,12 +1522,12 @@ _PyObject_DebugDumpAddress(const void *p) } } - serial = read4(tail+4); - fprintf(stderr, " The block was made by call #%lu to " - "debug malloc/realloc.\n", serial); + serial = read_size_t(tail + SST); + fprintf(stderr, " The block was made by call #%" PY_FORMAT_SIZE_T + "u to debug malloc/realloc.\n", serial); if (nbytes > 0) { - int i = 0; + i = 0; fputs(" Data at p:", stderr); /* print up to 8 bytes at the start */ while (q < tail && i < 8) { @@ -1546,12 +1550,12 @@ _PyObject_DebugDumpAddress(const void *p) } } -static ulong -printone(const char* msg, ulong value) +static size_t +printone(const char* msg, size_t value) { int i, k; char buf[100]; - ulong origvalue = value; + size_t origvalue = value; fputs(msg, stderr); for (i = (int)strlen(msg); i < 35; ++i) @@ -1564,8 +1568,8 @@ printone(const char* msg, ulong value) buf[i--] = '\n'; k = 3; do { - ulong nextvalue = value / 10UL; - uint digit = value - nextvalue * 10UL; + size_t nextvalue = value / 10; + uint digit = (uint)(value - nextvalue * 10); value = nextvalue; buf[i--] = (char)(digit + '0'); --k; @@ -1592,28 +1596,28 @@ _PyObject_DebugMallocStats(void) uint i; const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT; /* # of pools, allocated blocks, and free blocks per class index */ - ulong numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT]; - ulong numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT]; - ulong numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT]; + size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT]; + size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT]; + size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT]; /* total # of allocated bytes in used and full pools */ - ulong allocated_bytes = 0; + size_t allocated_bytes = 0; /* total # of available bytes in used pools */ - ulong available_bytes = 0; + size_t available_bytes = 0; /* # of free pools + pools not yet carved out of current arena */ uint numfreepools = 0; /* # of bytes for arena alignment padding */ - ulong arena_alignment = 0; + size_t arena_alignment = 0; /* # of bytes in used and full pools used for pool_headers */ - ulong pool_header_bytes = 0; + size_t pool_header_bytes = 0; /* # of bytes in used and full pools wasted due to quantization, * i.e. the necessarily leftover space at the ends of used and * full pools. */ - ulong quantization = 0; + size_t quantization = 0; /* # of arenas actually allocated. */ - ulong narenas = 0; + size_t narenas = 0; /* running total -- should equal narenas * ARENA_SIZE */ - ulong total; + size_t total; char buf[128]; fprintf(stderr, "Small block threshold = %d, in %u size classes.\n", @@ -1678,15 +1682,18 @@ _PyObject_DebugMallocStats(void) stderr); for (i = 0; i < numclasses; ++i) { - ulong p = numpools[i]; - ulong b = numblocks[i]; - ulong f = numfreeblocks[i]; + size_t p = numpools[i]; + size_t b = numblocks[i]; + size_t f = numfreeblocks[i]; uint size = INDEX2SIZE(i); if (p == 0) { assert(b == 0 && f == 0); continue; } - fprintf(stderr, "%5u %6u %11lu %15lu %13lu\n", + fprintf(stderr, "%5u %6u " + "%11" PY_FORMAT_SIZE_T "u " + "%15" PY_FORMAT_SIZE_T "u " + "%13" PY_FORMAT_SIZE_T "u\n", i, size, p, b, f); allocated_bytes += b * size; available_bytes += f * size; @@ -1702,7 +1709,8 @@ _PyObject_DebugMallocStats(void) (void)printone("# arenas allocated current", narenas); PyOS_snprintf(buf, sizeof(buf), - "%lu arenas * %d bytes/arena", narenas, ARENA_SIZE); + "%" PY_FORMAT_SIZE_T "u arenas * %d bytes/arena", + narenas, ARENA_SIZE); (void)printone(buf, narenas * ARENA_SIZE); fputc('\n', stderr); @@ -1712,7 +1720,7 @@ _PyObject_DebugMallocStats(void) PyOS_snprintf(buf, sizeof(buf), "%u unused pools * %d bytes", numfreepools, POOL_SIZE); - total += printone(buf, (ulong)numfreepools * POOL_SIZE); + total += printone(buf, (size_t)numfreepools * POOL_SIZE); total += printone("# bytes lost to pool headers", pool_header_bytes); total += printone("# bytes lost to quantization", quantization); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 6cdb0fc..dcc9274 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3057,6 +3057,221 @@ PyObject *PyUnicode_DecodeCharmap(const char *s, return NULL; } +/* Charmap encoding: the lookup table */ + +struct encoding_map{ + PyObject_HEAD + unsigned char level1[32]; + int count2, count3; + unsigned char level23[1]; +}; + +static PyObject* +encoding_map_size(PyObject *obj, PyObject* args) +{ + struct encoding_map *map = (struct encoding_map*)obj; + return PyInt_FromLong(sizeof(*map) - 1 + 16*map->count2 + + 128*map->count3); +} + +static PyMethodDef encoding_map_methods[] = { + {"size", encoding_map_size, METH_NOARGS, + PyDoc_STR("Return the size (in bytes) of this object") }, + { 0 } +}; + +static void +encoding_map_dealloc(PyObject* o) +{ + PyObject_FREE(o); +} + +static PyTypeObject EncodingMapType = { + PyObject_HEAD_INIT(NULL) + 0, /*ob_size*/ + "EncodingMap", /*tp_name*/ + sizeof(struct encoding_map), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + /* methods */ + encoding_map_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash*/ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT, /*tp_flags*/ + 0, /*tp_doc*/ + 0, /*tp_traverse*/ + 0, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + encoding_map_methods, /*tp_methods*/ + 0, /*tp_members*/ + 0, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + 0, /*tp_dictoffset*/ + 0, /*tp_init*/ + 0, /*tp_alloc*/ + 0, /*tp_new*/ + 0, /*tp_free*/ + 0, /*tp_is_gc*/ +}; + +PyObject* +PyUnicode_BuildEncodingMap(PyObject* string) +{ + Py_UNICODE *decode; + PyObject *result; + struct encoding_map *mresult; + int i; + int need_dict = 0; + unsigned char level1[32]; + unsigned char level2[512]; + unsigned char *mlevel1, *mlevel2, *mlevel3; + int count2 = 0, count3 = 0; + + if (!PyUnicode_Check(string) || PyUnicode_GetSize(string) != 256) { + PyErr_BadArgument(); + return NULL; + } + decode = PyUnicode_AS_UNICODE(string); + memset(level1, 0xFF, sizeof level1); + memset(level2, 0xFF, sizeof level2); + + /* If there isn't a one-to-one mapping of NULL to \0, + or if there are non-BMP characters, we need to use + a mapping dictionary. */ + if (decode[0] != 0) + need_dict = 1; + for (i = 1; i < 256; i++) { + int l1, l2; + if (decode[i] == 0 + #ifdef Py_UNICODE_WIDE + || decode[i] > 0xFFFF + #endif + ) { + need_dict = 1; + break; + } + if (decode[i] == 0xFFFE) + /* unmapped character */ + continue; + l1 = decode[i] >> 11; + l2 = decode[i] >> 7; + if (level1[l1] == 0xFF) + level1[l1] = count2++; + if (level2[l2] == 0xFF) + level2[l2] = count3++; + } + + if (count2 >= 0xFF || count3 >= 0xFF) + need_dict = 1; + + if (need_dict) { + PyObject *result = PyDict_New(); + PyObject *key, *value; + if (!result) + return NULL; + for (i = 0; i < 256; i++) { + key = value = NULL; + key = PyInt_FromLong(decode[i]); + value = PyInt_FromLong(i); + if (!key || !value) + goto failed1; + if (PyDict_SetItem(result, key, value) == -1) + goto failed1; + Py_DECREF(key); + Py_DECREF(value); + } + return result; + failed1: + Py_XDECREF(key); + Py_XDECREF(value); + Py_DECREF(result); + return NULL; + } + + /* Create a three-level trie */ + result = PyObject_MALLOC(sizeof(struct encoding_map) + + 16*count2 + 128*count3 - 1); + if (!result) + return PyErr_NoMemory(); + PyObject_Init(result, &EncodingMapType); + mresult = (struct encoding_map*)result; + mresult->count2 = count2; + mresult->count3 = count3; + mlevel1 = mresult->level1; + mlevel2 = mresult->level23; + mlevel3 = mresult->level23 + 16*count2; + memcpy(mlevel1, level1, 32); + memset(mlevel2, 0xFF, 16*count2); + memset(mlevel3, 0, 128*count3); + count3 = 0; + for (i = 1; i < 256; i++) { + int o1, o2, o3, i2, i3; + if (decode[i] == 0xFFFE) + /* unmapped character */ + continue; + o1 = decode[i]>>11; + o2 = (decode[i]>>7) & 0xF; + i2 = 16*mlevel1[o1] + o2; + if (mlevel2[i2] == 0xFF) + mlevel2[i2] = count3++; + o3 = decode[i] & 0x7F; + i3 = 128*mlevel2[i2] + o3; + mlevel3[i3] = i; + } + return result; +} + +static int +encoding_map_lookup(Py_UNICODE c, PyObject *mapping) +{ + struct encoding_map *map = (struct encoding_map*)mapping; + int l1 = c>>11; + int l2 = (c>>7) & 0xF; + int l3 = c & 0x7F; + int i; + +#ifdef Py_UNICODE_WIDE + if (c > 0xFFFF) { + return -1; + } +#endif + if (c == 0) + return 0; + /* level 1*/ + i = map->level1[l1]; + if (i == 0xFF) { + return -1; + } + /* level 2*/ + i = map->level23[16*i+l2]; + if (i == 0xFF) { + return -1; + } + /* level 3 */ + i = map->level23[16*map->count2 + 128*i + l3]; + if (i == 0) { + return -1; + } + return i; +} + /* Lookup the character ch in the mapping. If the character can't be found, Py_None is returned (or NULL, if another error occurred). */ @@ -3102,6 +3317,22 @@ static PyObject *charmapencode_lookup(Py_UNICODE c, PyObject *mapping) } } +static int +charmapencode_resize(PyObject **outobj, Py_ssize_t *outpos, Py_ssize_t requiredsize) +{ + Py_ssize_t outsize = PyString_GET_SIZE(*outobj); + /* exponentially overallocate to minimize reallocations */ + if (requiredsize < 2*outsize) + requiredsize = 2*outsize; + if (_PyString_Resize(outobj, requiredsize)) { + return 0; + } + return 1; +} + +typedef enum charmapencode_result { + enc_SUCCESS, enc_FAILED, enc_EXCEPTION +}charmapencode_result; /* lookup the character, put the result in the output string and adjust various state variables. Reallocate the output string if not enough space is available. Return a new reference to the object that @@ -3109,51 +3340,59 @@ static PyObject *charmapencode_lookup(Py_UNICODE c, PyObject *mapping) (in which case no character was written) or NULL, if a reallocation error occurred. The caller must decref the result */ static -PyObject *charmapencode_output(Py_UNICODE c, PyObject *mapping, +charmapencode_result charmapencode_output(Py_UNICODE c, PyObject *mapping, PyObject **outobj, Py_ssize_t *outpos) { - PyObject *rep = charmapencode_lookup(c, mapping); + PyObject *rep; + char *outstart; + Py_ssize_t outsize = PyString_GET_SIZE(*outobj); + if (mapping->ob_type == &EncodingMapType) { + int res = encoding_map_lookup(c, mapping); + Py_ssize_t requiredsize = *outpos+1; + if (res == -1) + return enc_FAILED; + if (outsize<requiredsize) + if (!charmapencode_resize(outobj, outpos, requiredsize)) + return enc_EXCEPTION; + outstart = PyString_AS_STRING(*outobj); + outstart[(*outpos)++] = (char)res; + return enc_SUCCESS; + } + + rep = charmapencode_lookup(c, mapping); if (rep==NULL) - return NULL; - else if (rep==Py_None) - return rep; - else { - char *outstart = PyString_AS_STRING(*outobj); - Py_ssize_t outsize = PyString_GET_SIZE(*outobj); + return enc_EXCEPTION; + else if (rep==Py_None) { + Py_DECREF(rep); + return enc_FAILED; + } else { if (PyInt_Check(rep)) { Py_ssize_t requiredsize = *outpos+1; - if (outsize<requiredsize) { - /* exponentially overallocate to minimize reallocations */ - if (requiredsize < 2*outsize) - requiredsize = 2*outsize; - if (_PyString_Resize(outobj, requiredsize)) { + if (outsize<requiredsize) + if (!charmapencode_resize(outobj, outpos, requiredsize)) { Py_DECREF(rep); - return NULL; + return enc_EXCEPTION; } - outstart = PyString_AS_STRING(*outobj); - } + outstart = PyString_AS_STRING(*outobj); outstart[(*outpos)++] = (char)PyInt_AS_LONG(rep); } else { const char *repchars = PyString_AS_STRING(rep); Py_ssize_t repsize = PyString_GET_SIZE(rep); Py_ssize_t requiredsize = *outpos+repsize; - if (outsize<requiredsize) { - /* exponentially overallocate to minimize reallocations */ - if (requiredsize < 2*outsize) - requiredsize = 2*outsize; - if (_PyString_Resize(outobj, requiredsize)) { + if (outsize<requiredsize) + if (!charmapencode_resize(outobj, outpos, requiredsize)) { Py_DECREF(rep); - return NULL; + return enc_EXCEPTION; } - outstart = PyString_AS_STRING(*outobj); - } + outstart = PyString_AS_STRING(*outobj); memcpy(outstart + *outpos, repchars, repsize); *outpos += repsize; } } - return rep; + Py_DECREF(rep); + return enc_SUCCESS; } /* handle an error in PyUnicode_EncodeCharmap @@ -3175,18 +3414,27 @@ int charmap_encoding_error( Py_ssize_t collpos; char *encoding = "charmap"; char *reason = "character maps to <undefined>"; + charmapencode_result x; - PyObject *x; /* find all unencodable characters */ while (collendpos < size) { - x = charmapencode_lookup(p[collendpos], mapping); - if (x==NULL) + PyObject *rep; + if (mapping->ob_type == &EncodingMapType) { + int res = encoding_map_lookup(p[collendpos], mapping); + if (res != -1) + break; + ++collendpos; + continue; + } + + rep = charmapencode_lookup(p[collendpos], mapping); + if (rep==NULL) return -1; - else if (x!=Py_None) { - Py_DECREF(x); + else if (rep!=Py_None) { + Py_DECREF(rep); break; } - Py_DECREF(x); + Py_DECREF(rep); ++collendpos; } /* cache callback name lookup @@ -3210,15 +3458,13 @@ int charmap_encoding_error( case 2: /* replace */ for (collpos = collstartpos; collpos<collendpos; ++collpos) { x = charmapencode_output('?', mapping, res, respos); - if (x==NULL) { + if (x==enc_EXCEPTION) { return -1; } - else if (x==Py_None) { - Py_DECREF(x); + else if (x==enc_FAILED) { raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason); return -1; } - Py_DECREF(x); } /* fall through */ case 3: /* ignore */ @@ -3232,14 +3478,12 @@ int charmap_encoding_error( sprintf(buffer, "&#%d;", (int)p[collpos]); for (cp = buffer; *cp; ++cp) { x = charmapencode_output(*cp, mapping, res, respos); - if (x==NULL) + if (x==enc_EXCEPTION) return -1; - else if (x==Py_None) { - Py_DECREF(x); + else if (x==enc_FAILED) { raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason); return -1; } - Py_DECREF(x); } } *inpos = collendpos; @@ -3254,17 +3498,14 @@ int charmap_encoding_error( repsize = PyUnicode_GET_SIZE(repunicode); for (uni2 = PyUnicode_AS_UNICODE(repunicode); repsize-->0; ++uni2) { x = charmapencode_output(*uni2, mapping, res, respos); - if (x==NULL) { - Py_DECREF(repunicode); + if (x==enc_EXCEPTION) { return -1; } - else if (x==Py_None) { + else if (x==enc_FAILED) { Py_DECREF(repunicode); - Py_DECREF(x); raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason); return -1; } - Py_DECREF(x); } *inpos = newpos; Py_DECREF(repunicode); @@ -3304,22 +3545,20 @@ PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p, while (inpos<size) { /* try to encode it */ - PyObject *x = charmapencode_output(p[inpos], mapping, &res, &respos); - if (x==NULL) /* error */ + charmapencode_result x = charmapencode_output(p[inpos], mapping, &res, &respos); + if (x==enc_EXCEPTION) /* error */ goto onError; - if (x==Py_None) { /* unencodable character */ + if (x==enc_FAILED) { /* unencodable character */ if (charmap_encoding_error(p, size, &inpos, mapping, &exc, &known_errorHandler, &errorHandler, errors, &res, &respos)) { - Py_DECREF(x); goto onError; } } else /* done with this character => adjust input position */ ++inpos; - Py_DECREF(x); } /* Resize if we allocated to much */ |