summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
Diffstat (limited to 'Objects')
-rw-r--r--Objects/abstract.c3
-rw-r--r--Objects/classobject.c17
-rw-r--r--Objects/descrobject.c26
-rw-r--r--Objects/exceptions.c1
-rw-r--r--Objects/fileobject.c19
-rw-r--r--Objects/obmalloc.c242
-rw-r--r--Objects/unicodeobject.c337
7 files changed, 466 insertions, 179 deletions
diff --git a/Objects/abstract.c b/Objects/abstract.c
index 13a9473..6b76ba6 100644
--- a/Objects/abstract.c
+++ b/Objects/abstract.c
@@ -216,7 +216,8 @@ PyObject_DelItemString(PyObject *o, char *key)
return ret;
}
-int PyObject_AsCharBuffer(PyObject *obj,
+int
+PyObject_AsCharBuffer(PyObject *obj,
const char **buffer,
Py_ssize_t *buffer_len)
{
diff --git a/Objects/classobject.c b/Objects/classobject.c
index 79656f5..aab35c7 100644
--- a/Objects/classobject.c
+++ b/Objects/classobject.c
@@ -2217,9 +2217,17 @@ instancemethod_dealloc(register PyMethodObject *im)
static int
instancemethod_compare(PyMethodObject *a, PyMethodObject *b)
{
- if (a->im_self != b->im_self)
+ int cmp;
+ cmp = PyObject_Compare(a->im_func, b->im_func);
+ if (cmp)
+ return cmp;
+
+ if (a->im_self == b->im_self)
+ return 0;
+ if (a->im_self == NULL || b->im_self == NULL)
return (a->im_self < b->im_self) ? -1 : 1;
- return PyObject_Compare(a->im_func, b->im_func);
+ else
+ return PyObject_Compare(a->im_self, b->im_self);
}
static PyObject *
@@ -2295,7 +2303,10 @@ instancemethod_hash(PyMethodObject *a)
y = PyObject_Hash(a->im_func);
if (y == -1)
return -1;
- return x ^ y;
+ x = x ^ y;
+ if (x == -1)
+ x = -2;
+ return x;
}
static int
diff --git a/Objects/descrobject.c b/Objects/descrobject.c
index 561ba4a5..606ef05 100644
--- a/Objects/descrobject.c
+++ b/Objects/descrobject.c
@@ -901,16 +901,28 @@ wrapper_dealloc(wrapperobject *wp)
static int
wrapper_compare(wrapperobject *a, wrapperobject *b)
{
- if (a->descr == b->descr) {
- if (a->self == b->self)
- return 0;
- else
- return (a->self < b->self) ? -1 : 1;
- }
+ if (a->descr == b->descr)
+ return PyObject_Compare(a->self, b->self);
else
return (a->descr < b->descr) ? -1 : 1;
}
+static long
+wrapper_hash(wrapperobject *wp)
+{
+ int x, y;
+ x = _Py_HashPointer(wp->descr);
+ if (x == -1)
+ return -1;
+ y = PyObject_Hash(wp->self);
+ if (y == -1)
+ return -1;
+ x = x ^ y;
+ if (x == -1)
+ x = -2;
+ return x;
+}
+
static PyObject *
wrapper_repr(wrapperobject *wp)
{
@@ -1008,7 +1020,7 @@ static PyTypeObject wrappertype = {
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
- 0, /* tp_hash */
+ (hashfunc)wrapper_hash, /* tp_hash */
(ternaryfunc)wrapper_call, /* tp_call */
0, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index 3b79307..369365b 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -851,7 +851,6 @@ WindowsError_str(PyWindowsErrorObject *self)
PyTuple_SET_ITEM(tuple, 1, Py_None);
}
- Py_INCREF(repr);
PyTuple_SET_ITEM(tuple, 2, repr);
rtnval = PyString_Format(fmt, tuple);
diff --git a/Objects/fileobject.c b/Objects/fileobject.c
index 997792a..0f7572f 100644
--- a/Objects/fileobject.c
+++ b/Objects/fileobject.c
@@ -1640,6 +1640,20 @@ file_self(PyFileObject *f)
return (PyObject *)f;
}
+static PyObject *
+file_exit(PyFileObject *f, PyObject *args)
+{
+ PyObject *ret = file_close(f);
+ if (!ret)
+ /* If error occurred, pass through */
+ return NULL;
+ Py_DECREF(ret);
+ /* We cannot return the result of close since a true
+ * value will be interpreted as "yes, swallow the
+ * exception if one was raised inside the with block". */
+ Py_RETURN_NONE;
+}
+
PyDoc_STRVAR(readline_doc,
"readline([size]) -> next line from the file, as a string.\n"
"\n"
@@ -1721,6 +1735,9 @@ PyDoc_STRVAR(isatty_doc,
PyDoc_STRVAR(enter_doc,
"__enter__() -> self.");
+PyDoc_STRVAR(exit_doc,
+ "__exit__(*excinfo) -> None. Closes the file.");
+
static PyMethodDef file_methods[] = {
{"readline", (PyCFunction)file_readline, METH_VARARGS, readline_doc},
{"read", (PyCFunction)file_read, METH_VARARGS, read_doc},
@@ -1738,7 +1755,7 @@ static PyMethodDef file_methods[] = {
{"close", (PyCFunction)file_close, METH_NOARGS, close_doc},
{"isatty", (PyCFunction)file_isatty, METH_NOARGS, isatty_doc},
{"__enter__", (PyCFunction)file_self, METH_NOARGS, enter_doc},
- {"__exit__", (PyCFunction)file_close, METH_VARARGS, close_doc},
+ {"__exit__", (PyCFunction)file_exit, METH_VARARGS, exit_doc},
{NULL, NULL} /* sentinel */
};
diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c
index a393cbc..0ca8f2b 100644
--- a/Objects/obmalloc.c
+++ b/Objects/obmalloc.c
@@ -491,13 +491,13 @@ static struct arena_object* usable_arenas = NULL;
#define INITIAL_ARENA_OBJECTS 16
/* Number of arenas allocated that haven't been free()'d. */
-static ulong narenas_currently_allocated = 0;
+static size_t narenas_currently_allocated = 0;
#ifdef PYMALLOC_DEBUG
/* Total number of times malloc() called to allocate an arena. */
-static ulong ntimes_arena_allocated = 0;
+static size_t ntimes_arena_allocated = 0;
/* High water mark (max value ever seen) for narenas_currently_allocated. */
-static ulong narenas_highwater = 0;
+static size_t narenas_highwater = 0;
#endif
/* Allocate a new arena. If we run out of memory, return NULL. Else
@@ -1220,39 +1220,45 @@ PyObject_Free(void *p)
#define DEADBYTE 0xDB /* dead (newly freed) memory */
#define FORBIDDENBYTE 0xFB /* untouchable bytes at each end of a block */
-static ulong serialno = 0; /* incremented on each debug {m,re}alloc */
+static size_t serialno = 0; /* incremented on each debug {m,re}alloc */
/* serialno is always incremented via calling this routine. The point is
- to supply a single place to set a breakpoint.
-*/
+ * to supply a single place to set a breakpoint.
+ */
static void
bumpserialno(void)
{
++serialno;
}
+#define SST SIZEOF_SIZE_T
-/* Read 4 bytes at p as a big-endian ulong. */
-static ulong
-read4(const void *p)
+/* Read sizeof(size_t) bytes at p as a big-endian size_t. */
+static size_t
+read_size_t(const void *p)
{
const uchar *q = (const uchar *)p;
- return ((ulong)q[0] << 24) |
- ((ulong)q[1] << 16) |
- ((ulong)q[2] << 8) |
- (ulong)q[3];
+ size_t result = *q++;
+ int i;
+
+ for (i = SST; --i > 0; ++q)
+ result = (result << 8) | *q;
+ return result;
}
-/* Write the 4 least-significant bytes of n as a big-endian unsigned int,
- MSB at address p, LSB at p+3. */
+/* Write n as a big-endian size_t, MSB at address p, LSB at
+ * p + sizeof(size_t) - 1.
+ */
static void
-write4(void *p, ulong n)
+write_size_t(void *p, size_t n)
{
- uchar *q = (uchar *)p;
- q[0] = (uchar)((n >> 24) & 0xff);
- q[1] = (uchar)((n >> 16) & 0xff);
- q[2] = (uchar)((n >> 8) & 0xff);
- q[3] = (uchar)( n & 0xff);
+ uchar *q = (uchar *)p + SST - 1;
+ int i;
+
+ for (i = SST; --i >= 0; --q) {
+ *q = (uchar)(n & 0xff);
+ n >>= 8;
+ }
}
#ifdef Py_DEBUG
@@ -1280,25 +1286,25 @@ pool_is_in_list(const poolp target, poolp list)
#endif /* Py_DEBUG */
-/* The debug malloc asks for 16 extra bytes and fills them with useful stuff,
- here calling the underlying malloc's result p:
+/* Let S = sizeof(size_t). The debug malloc asks for 4*S extra bytes and
+ fills them with useful stuff, here calling the underlying malloc's result p:
-p[0:4]
- Number of bytes originally asked for. 4-byte unsigned integer,
- big-endian (easier to read in a memory dump).
-p[4:8]
+p[0: S]
+ Number of bytes originally asked for. This is a size_t, big-endian (easier
+ to read in a memory dump).
+p[S: 2*S]
Copies of FORBIDDENBYTE. Used to catch under- writes and reads.
-p[8:8+n]
+p[2*S: 2*S+n]
The requested memory, filled with copies of CLEANBYTE.
Used to catch reference to uninitialized memory.
- &p[8] is returned. Note that this is 8-byte aligned if pymalloc
+ &p[2*S] is returned. Note that this is 8-byte aligned if pymalloc
handled the request itself.
-p[8+n:8+n+4]
+p[2*S+n: 2*S+n+S]
Copies of FORBIDDENBYTE. Used to catch over- writes and reads.
-p[8+n+4:8+n+8]
+p[2*S+n+S: 2*S+n+2*S]
A serial number, incremented by 1 on each call to _PyObject_DebugMalloc
and _PyObject_DebugRealloc.
- 4-byte unsigned integer, big-endian.
+ This is a big-endian size_t.
If "bad memory" is detected later, the serial number gives an
excellent way to set a breakpoint on the next run, to capture the
instant at which this block was passed out.
@@ -1308,41 +1314,33 @@ void *
_PyObject_DebugMalloc(size_t nbytes)
{
uchar *p; /* base address of malloc'ed block */
- uchar *tail; /* p + 8 + nbytes == pointer to tail pad bytes */
- size_t total; /* nbytes + 16 */
+ uchar *tail; /* p + 2*SST + nbytes == pointer to tail pad bytes */
+ size_t total; /* nbytes + 4*SST */
bumpserialno();
- total = nbytes + 16;
-#if SIZEOF_SIZE_T < 8
- /* XXX do this check only on 32-bit machines */
- if (total < nbytes || (total >> 31) > 1) {
- /* overflow, or we can't represent it in 4 bytes */
- /* Obscure: can't do (total >> 32) != 0 instead, because
- C doesn't define what happens for a right-shift of 32
- when size_t is a 32-bit type. At least C guarantees
- size_t is an unsigned type. */
+ total = nbytes + 4*SST;
+ if (total < nbytes)
+ /* overflow: can't represent total as a size_t */
return NULL;
- }
-#endif
p = (uchar *)PyObject_Malloc(total);
if (p == NULL)
return NULL;
- write4(p, (ulong)nbytes);
- p[4] = p[5] = p[6] = p[7] = FORBIDDENBYTE;
+ write_size_t(p, nbytes);
+ memset(p + SST, FORBIDDENBYTE, SST);
if (nbytes > 0)
- memset(p+8, CLEANBYTE, nbytes);
+ memset(p + 2*SST, CLEANBYTE, nbytes);
- tail = p + 8 + nbytes;
- tail[0] = tail[1] = tail[2] = tail[3] = FORBIDDENBYTE;
- write4(tail + 4, serialno);
+ tail = p + 2*SST + nbytes;
+ memset(tail, FORBIDDENBYTE, SST);
+ write_size_t(tail + SST, serialno);
- return p+8;
+ return p + 2*SST;
}
-/* The debug free first checks the 8 bytes on each end for sanity (in
+/* The debug free first checks the 2*SST bytes on each end for sanity (in
particular, that the FORBIDDENBYTEs are still intact).
Then fills the original bytes with DEADBYTE.
Then calls the underlying free.
@@ -1350,16 +1348,16 @@ _PyObject_DebugMalloc(size_t nbytes)
void
_PyObject_DebugFree(void *p)
{
- uchar *q = (uchar *)p;
+ uchar *q = (uchar *)p - 2*SST; /* address returned from malloc */
size_t nbytes;
if (p == NULL)
return;
_PyObject_DebugCheckAddress(p);
- nbytes = read4(q-8);
+ nbytes = read_size_t(q);
if (nbytes > 0)
memset(q, DEADBYTE, nbytes);
- PyObject_Free(q-8);
+ PyObject_Free(q);
}
void *
@@ -1367,20 +1365,20 @@ _PyObject_DebugRealloc(void *p, size_t nbytes)
{
uchar *q = (uchar *)p;
uchar *tail;
- size_t total; /* nbytes + 16 */
+ size_t total; /* nbytes + 4*SST */
size_t original_nbytes;
+ int i;
if (p == NULL)
return _PyObject_DebugMalloc(nbytes);
_PyObject_DebugCheckAddress(p);
bumpserialno();
- original_nbytes = read4(q-8);
- total = nbytes + 16;
- if (total < nbytes || (total >> 31) > 1) {
- /* overflow, or we can't represent it in 4 bytes */
+ original_nbytes = read_size_t(q - 2*SST);
+ total = nbytes + 4*SST;
+ if (total < nbytes)
+ /* overflow: can't represent total as a size_t */
return NULL;
- }
if (nbytes < original_nbytes) {
/* shrinking: mark old extra memory dead */
@@ -1388,19 +1386,17 @@ _PyObject_DebugRealloc(void *p, size_t nbytes)
}
/* Resize and add decorations. */
- q = (uchar *)PyObject_Realloc(q-8, total);
+ q = (uchar *)PyObject_Realloc(q - 2*SST, total);
if (q == NULL)
return NULL;
- write4(q, (ulong)nbytes);
- assert(q[4] == FORBIDDENBYTE &&
- q[5] == FORBIDDENBYTE &&
- q[6] == FORBIDDENBYTE &&
- q[7] == FORBIDDENBYTE);
- q += 8;
+ write_size_t(q, nbytes);
+ for (i = 0; i < SST; ++i)
+ assert(q[SST + i] == FORBIDDENBYTE);
+ q += 2*SST;
tail = q + nbytes;
- tail[0] = tail[1] = tail[2] = tail[3] = FORBIDDENBYTE;
- write4(tail + 4, serialno);
+ memset(tail, FORBIDDENBYTE, SST);
+ write_size_t(tail + SST, serialno);
if (nbytes > original_nbytes) {
/* growing: mark new extra memory clean */
@@ -1420,7 +1416,7 @@ _PyObject_DebugCheckAddress(const void *p)
{
const uchar *q = (const uchar *)p;
char *msg;
- ulong nbytes;
+ size_t nbytes;
const uchar *tail;
int i;
@@ -1433,16 +1429,16 @@ _PyObject_DebugCheckAddress(const void *p)
* corruption, the number-of-bytes field may be nuts, and checking
* the tail could lead to a segfault then.
*/
- for (i = 4; i >= 1; --i) {
+ for (i = SST; i >= 1; --i) {
if (*(q-i) != FORBIDDENBYTE) {
msg = "bad leading pad byte";
goto error;
}
}
- nbytes = read4(q-8);
+ nbytes = read_size_t(q - 2*SST);
tail = q + nbytes;
- for (i = 0; i < 4; ++i) {
+ for (i = 0; i < SST; ++i) {
if (tail[i] != FORBIDDENBYTE) {
msg = "bad trailing pad byte";
goto error;
@@ -1462,28 +1458,33 @@ _PyObject_DebugDumpAddress(const void *p)
{
const uchar *q = (const uchar *)p;
const uchar *tail;
- ulong nbytes, serial;
+ size_t nbytes, serial;
int i;
+ int ok;
fprintf(stderr, "Debug memory block at address p=%p:\n", p);
if (p == NULL)
return;
- nbytes = read4(q-8);
- fprintf(stderr, " %lu bytes originally requested\n", nbytes);
+ nbytes = read_size_t(q - 2*SST);
+ fprintf(stderr, " %" PY_FORMAT_SIZE_T "u bytes originally "
+ "requested\n", nbytes);
/* In case this is nuts, check the leading pad bytes first. */
- fputs(" The 4 pad bytes at p-4 are ", stderr);
- if (*(q-4) == FORBIDDENBYTE &&
- *(q-3) == FORBIDDENBYTE &&
- *(q-2) == FORBIDDENBYTE &&
- *(q-1) == FORBIDDENBYTE) {
- fputs("FORBIDDENBYTE, as expected.\n", stderr);
+ fprintf(stderr, " The %d pad bytes at p-%d are ", SST, SST);
+ ok = 1;
+ for (i = 1; i <= SST; ++i) {
+ if (*(q-i) != FORBIDDENBYTE) {
+ ok = 0;
+ break;
+ }
}
+ if (ok)
+ fputs("FORBIDDENBYTE, as expected.\n", stderr);
else {
fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
FORBIDDENBYTE);
- for (i = 4; i >= 1; --i) {
+ for (i = SST; i >= 1; --i) {
const uchar byte = *(q-i);
fprintf(stderr, " at p-%d: 0x%02x", i, byte);
if (byte != FORBIDDENBYTE)
@@ -1498,17 +1499,20 @@ _PyObject_DebugDumpAddress(const void *p)
}
tail = q + nbytes;
- fprintf(stderr, " The 4 pad bytes at tail=%p are ", tail);
- if (tail[0] == FORBIDDENBYTE &&
- tail[1] == FORBIDDENBYTE &&
- tail[2] == FORBIDDENBYTE &&
- tail[3] == FORBIDDENBYTE) {
- fputs("FORBIDDENBYTE, as expected.\n", stderr);
+ fprintf(stderr, " The %d pad bytes at tail=%p are ", SST, tail);
+ ok = 1;
+ for (i = 0; i < SST; ++i) {
+ if (tail[i] != FORBIDDENBYTE) {
+ ok = 0;
+ break;
+ }
}
+ if (ok)
+ fputs("FORBIDDENBYTE, as expected.\n", stderr);
else {
fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
FORBIDDENBYTE);
- for (i = 0; i < 4; ++i) {
+ for (i = 0; i < SST; ++i) {
const uchar byte = tail[i];
fprintf(stderr, " at tail+%d: 0x%02x",
i, byte);
@@ -1518,12 +1522,12 @@ _PyObject_DebugDumpAddress(const void *p)
}
}
- serial = read4(tail+4);
- fprintf(stderr, " The block was made by call #%lu to "
- "debug malloc/realloc.\n", serial);
+ serial = read_size_t(tail + SST);
+ fprintf(stderr, " The block was made by call #%" PY_FORMAT_SIZE_T
+ "u to debug malloc/realloc.\n", serial);
if (nbytes > 0) {
- int i = 0;
+ i = 0;
fputs(" Data at p:", stderr);
/* print up to 8 bytes at the start */
while (q < tail && i < 8) {
@@ -1546,12 +1550,12 @@ _PyObject_DebugDumpAddress(const void *p)
}
}
-static ulong
-printone(const char* msg, ulong value)
+static size_t
+printone(const char* msg, size_t value)
{
int i, k;
char buf[100];
- ulong origvalue = value;
+ size_t origvalue = value;
fputs(msg, stderr);
for (i = (int)strlen(msg); i < 35; ++i)
@@ -1564,8 +1568,8 @@ printone(const char* msg, ulong value)
buf[i--] = '\n';
k = 3;
do {
- ulong nextvalue = value / 10UL;
- uint digit = value - nextvalue * 10UL;
+ size_t nextvalue = value / 10;
+ uint digit = (uint)(value - nextvalue * 10);
value = nextvalue;
buf[i--] = (char)(digit + '0');
--k;
@@ -1592,28 +1596,28 @@ _PyObject_DebugMallocStats(void)
uint i;
const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
/* # of pools, allocated blocks, and free blocks per class index */
- ulong numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
- ulong numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
- ulong numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
+ size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
+ size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
+ size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
/* total # of allocated bytes in used and full pools */
- ulong allocated_bytes = 0;
+ size_t allocated_bytes = 0;
/* total # of available bytes in used pools */
- ulong available_bytes = 0;
+ size_t available_bytes = 0;
/* # of free pools + pools not yet carved out of current arena */
uint numfreepools = 0;
/* # of bytes for arena alignment padding */
- ulong arena_alignment = 0;
+ size_t arena_alignment = 0;
/* # of bytes in used and full pools used for pool_headers */
- ulong pool_header_bytes = 0;
+ size_t pool_header_bytes = 0;
/* # of bytes in used and full pools wasted due to quantization,
* i.e. the necessarily leftover space at the ends of used and
* full pools.
*/
- ulong quantization = 0;
+ size_t quantization = 0;
/* # of arenas actually allocated. */
- ulong narenas = 0;
+ size_t narenas = 0;
/* running total -- should equal narenas * ARENA_SIZE */
- ulong total;
+ size_t total;
char buf[128];
fprintf(stderr, "Small block threshold = %d, in %u size classes.\n",
@@ -1678,15 +1682,18 @@ _PyObject_DebugMallocStats(void)
stderr);
for (i = 0; i < numclasses; ++i) {
- ulong p = numpools[i];
- ulong b = numblocks[i];
- ulong f = numfreeblocks[i];
+ size_t p = numpools[i];
+ size_t b = numblocks[i];
+ size_t f = numfreeblocks[i];
uint size = INDEX2SIZE(i);
if (p == 0) {
assert(b == 0 && f == 0);
continue;
}
- fprintf(stderr, "%5u %6u %11lu %15lu %13lu\n",
+ fprintf(stderr, "%5u %6u "
+ "%11" PY_FORMAT_SIZE_T "u "
+ "%15" PY_FORMAT_SIZE_T "u "
+ "%13" PY_FORMAT_SIZE_T "u\n",
i, size, p, b, f);
allocated_bytes += b * size;
available_bytes += f * size;
@@ -1702,7 +1709,8 @@ _PyObject_DebugMallocStats(void)
(void)printone("# arenas allocated current", narenas);
PyOS_snprintf(buf, sizeof(buf),
- "%lu arenas * %d bytes/arena", narenas, ARENA_SIZE);
+ "%" PY_FORMAT_SIZE_T "u arenas * %d bytes/arena",
+ narenas, ARENA_SIZE);
(void)printone(buf, narenas * ARENA_SIZE);
fputc('\n', stderr);
@@ -1712,7 +1720,7 @@ _PyObject_DebugMallocStats(void)
PyOS_snprintf(buf, sizeof(buf),
"%u unused pools * %d bytes", numfreepools, POOL_SIZE);
- total += printone(buf, (ulong)numfreepools * POOL_SIZE);
+ total += printone(buf, (size_t)numfreepools * POOL_SIZE);
total += printone("# bytes lost to pool headers", pool_header_bytes);
total += printone("# bytes lost to quantization", quantization);
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 6cdb0fc..dcc9274 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3057,6 +3057,221 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
return NULL;
}
+/* Charmap encoding: the lookup table */
+
+struct encoding_map{
+ PyObject_HEAD
+ unsigned char level1[32];
+ int count2, count3;
+ unsigned char level23[1];
+};
+
+static PyObject*
+encoding_map_size(PyObject *obj, PyObject* args)
+{
+ struct encoding_map *map = (struct encoding_map*)obj;
+ return PyInt_FromLong(sizeof(*map) - 1 + 16*map->count2 +
+ 128*map->count3);
+}
+
+static PyMethodDef encoding_map_methods[] = {
+ {"size", encoding_map_size, METH_NOARGS,
+ PyDoc_STR("Return the size (in bytes) of this object") },
+ { 0 }
+};
+
+static void
+encoding_map_dealloc(PyObject* o)
+{
+ PyObject_FREE(o);
+}
+
+static PyTypeObject EncodingMapType = {
+ PyObject_HEAD_INIT(NULL)
+ 0, /*ob_size*/
+ "EncodingMap", /*tp_name*/
+ sizeof(struct encoding_map), /*tp_basicsize*/
+ 0, /*tp_itemsize*/
+ /* methods */
+ encoding_map_dealloc, /*tp_dealloc*/
+ 0, /*tp_print*/
+ 0, /*tp_getattr*/
+ 0, /*tp_setattr*/
+ 0, /*tp_compare*/
+ 0, /*tp_repr*/
+ 0, /*tp_as_number*/
+ 0, /*tp_as_sequence*/
+ 0, /*tp_as_mapping*/
+ 0, /*tp_hash*/
+ 0, /*tp_call*/
+ 0, /*tp_str*/
+ 0, /*tp_getattro*/
+ 0, /*tp_setattro*/
+ 0, /*tp_as_buffer*/
+ Py_TPFLAGS_DEFAULT, /*tp_flags*/
+ 0, /*tp_doc*/
+ 0, /*tp_traverse*/
+ 0, /*tp_clear*/
+ 0, /*tp_richcompare*/
+ 0, /*tp_weaklistoffset*/
+ 0, /*tp_iter*/
+ 0, /*tp_iternext*/
+ encoding_map_methods, /*tp_methods*/
+ 0, /*tp_members*/
+ 0, /*tp_getset*/
+ 0, /*tp_base*/
+ 0, /*tp_dict*/
+ 0, /*tp_descr_get*/
+ 0, /*tp_descr_set*/
+ 0, /*tp_dictoffset*/
+ 0, /*tp_init*/
+ 0, /*tp_alloc*/
+ 0, /*tp_new*/
+ 0, /*tp_free*/
+ 0, /*tp_is_gc*/
+};
+
+PyObject*
+PyUnicode_BuildEncodingMap(PyObject* string)
+{
+ Py_UNICODE *decode;
+ PyObject *result;
+ struct encoding_map *mresult;
+ int i;
+ int need_dict = 0;
+ unsigned char level1[32];
+ unsigned char level2[512];
+ unsigned char *mlevel1, *mlevel2, *mlevel3;
+ int count2 = 0, count3 = 0;
+
+ if (!PyUnicode_Check(string) || PyUnicode_GetSize(string) != 256) {
+ PyErr_BadArgument();
+ return NULL;
+ }
+ decode = PyUnicode_AS_UNICODE(string);
+ memset(level1, 0xFF, sizeof level1);
+ memset(level2, 0xFF, sizeof level2);
+
+ /* If there isn't a one-to-one mapping of NULL to \0,
+ or if there are non-BMP characters, we need to use
+ a mapping dictionary. */
+ if (decode[0] != 0)
+ need_dict = 1;
+ for (i = 1; i < 256; i++) {
+ int l1, l2;
+ if (decode[i] == 0
+ #ifdef Py_UNICODE_WIDE
+ || decode[i] > 0xFFFF
+ #endif
+ ) {
+ need_dict = 1;
+ break;
+ }
+ if (decode[i] == 0xFFFE)
+ /* unmapped character */
+ continue;
+ l1 = decode[i] >> 11;
+ l2 = decode[i] >> 7;
+ if (level1[l1] == 0xFF)
+ level1[l1] = count2++;
+ if (level2[l2] == 0xFF)
+ level2[l2] = count3++;
+ }
+
+ if (count2 >= 0xFF || count3 >= 0xFF)
+ need_dict = 1;
+
+ if (need_dict) {
+ PyObject *result = PyDict_New();
+ PyObject *key, *value;
+ if (!result)
+ return NULL;
+ for (i = 0; i < 256; i++) {
+ key = value = NULL;
+ key = PyInt_FromLong(decode[i]);
+ value = PyInt_FromLong(i);
+ if (!key || !value)
+ goto failed1;
+ if (PyDict_SetItem(result, key, value) == -1)
+ goto failed1;
+ Py_DECREF(key);
+ Py_DECREF(value);
+ }
+ return result;
+ failed1:
+ Py_XDECREF(key);
+ Py_XDECREF(value);
+ Py_DECREF(result);
+ return NULL;
+ }
+
+ /* Create a three-level trie */
+ result = PyObject_MALLOC(sizeof(struct encoding_map) +
+ 16*count2 + 128*count3 - 1);
+ if (!result)
+ return PyErr_NoMemory();
+ PyObject_Init(result, &EncodingMapType);
+ mresult = (struct encoding_map*)result;
+ mresult->count2 = count2;
+ mresult->count3 = count3;
+ mlevel1 = mresult->level1;
+ mlevel2 = mresult->level23;
+ mlevel3 = mresult->level23 + 16*count2;
+ memcpy(mlevel1, level1, 32);
+ memset(mlevel2, 0xFF, 16*count2);
+ memset(mlevel3, 0, 128*count3);
+ count3 = 0;
+ for (i = 1; i < 256; i++) {
+ int o1, o2, o3, i2, i3;
+ if (decode[i] == 0xFFFE)
+ /* unmapped character */
+ continue;
+ o1 = decode[i]>>11;
+ o2 = (decode[i]>>7) & 0xF;
+ i2 = 16*mlevel1[o1] + o2;
+ if (mlevel2[i2] == 0xFF)
+ mlevel2[i2] = count3++;
+ o3 = decode[i] & 0x7F;
+ i3 = 128*mlevel2[i2] + o3;
+ mlevel3[i3] = i;
+ }
+ return result;
+}
+
+static int
+encoding_map_lookup(Py_UNICODE c, PyObject *mapping)
+{
+ struct encoding_map *map = (struct encoding_map*)mapping;
+ int l1 = c>>11;
+ int l2 = (c>>7) & 0xF;
+ int l3 = c & 0x7F;
+ int i;
+
+#ifdef Py_UNICODE_WIDE
+ if (c > 0xFFFF) {
+ return -1;
+ }
+#endif
+ if (c == 0)
+ return 0;
+ /* level 1*/
+ i = map->level1[l1];
+ if (i == 0xFF) {
+ return -1;
+ }
+ /* level 2*/
+ i = map->level23[16*i+l2];
+ if (i == 0xFF) {
+ return -1;
+ }
+ /* level 3 */
+ i = map->level23[16*map->count2 + 128*i + l3];
+ if (i == 0) {
+ return -1;
+ }
+ return i;
+}
+
/* Lookup the character ch in the mapping. If the character
can't be found, Py_None is returned (or NULL, if another
error occurred). */
@@ -3102,6 +3317,22 @@ static PyObject *charmapencode_lookup(Py_UNICODE c, PyObject *mapping)
}
}
+static int
+charmapencode_resize(PyObject **outobj, Py_ssize_t *outpos, Py_ssize_t requiredsize)
+{
+ Py_ssize_t outsize = PyString_GET_SIZE(*outobj);
+ /* exponentially overallocate to minimize reallocations */
+ if (requiredsize < 2*outsize)
+ requiredsize = 2*outsize;
+ if (_PyString_Resize(outobj, requiredsize)) {
+ return 0;
+ }
+ return 1;
+}
+
+typedef enum charmapencode_result {
+ enc_SUCCESS, enc_FAILED, enc_EXCEPTION
+}charmapencode_result;
/* lookup the character, put the result in the output string and adjust
various state variables. Reallocate the output string if not enough
space is available. Return a new reference to the object that
@@ -3109,51 +3340,59 @@ static PyObject *charmapencode_lookup(Py_UNICODE c, PyObject *mapping)
(in which case no character was written) or NULL, if a
reallocation error occurred. The caller must decref the result */
static
-PyObject *charmapencode_output(Py_UNICODE c, PyObject *mapping,
+charmapencode_result charmapencode_output(Py_UNICODE c, PyObject *mapping,
PyObject **outobj, Py_ssize_t *outpos)
{
- PyObject *rep = charmapencode_lookup(c, mapping);
+ PyObject *rep;
+ char *outstart;
+ Py_ssize_t outsize = PyString_GET_SIZE(*outobj);
+ if (mapping->ob_type == &EncodingMapType) {
+ int res = encoding_map_lookup(c, mapping);
+ Py_ssize_t requiredsize = *outpos+1;
+ if (res == -1)
+ return enc_FAILED;
+ if (outsize<requiredsize)
+ if (!charmapencode_resize(outobj, outpos, requiredsize))
+ return enc_EXCEPTION;
+ outstart = PyString_AS_STRING(*outobj);
+ outstart[(*outpos)++] = (char)res;
+ return enc_SUCCESS;
+ }
+
+ rep = charmapencode_lookup(c, mapping);
if (rep==NULL)
- return NULL;
- else if (rep==Py_None)
- return rep;
- else {
- char *outstart = PyString_AS_STRING(*outobj);
- Py_ssize_t outsize = PyString_GET_SIZE(*outobj);
+ return enc_EXCEPTION;
+ else if (rep==Py_None) {
+ Py_DECREF(rep);
+ return enc_FAILED;
+ } else {
if (PyInt_Check(rep)) {
Py_ssize_t requiredsize = *outpos+1;
- if (outsize<requiredsize) {
- /* exponentially overallocate to minimize reallocations */
- if (requiredsize < 2*outsize)
- requiredsize = 2*outsize;
- if (_PyString_Resize(outobj, requiredsize)) {
+ if (outsize<requiredsize)
+ if (!charmapencode_resize(outobj, outpos, requiredsize)) {
Py_DECREF(rep);
- return NULL;
+ return enc_EXCEPTION;
}
- outstart = PyString_AS_STRING(*outobj);
- }
+ outstart = PyString_AS_STRING(*outobj);
outstart[(*outpos)++] = (char)PyInt_AS_LONG(rep);
}
else {
const char *repchars = PyString_AS_STRING(rep);
Py_ssize_t repsize = PyString_GET_SIZE(rep);
Py_ssize_t requiredsize = *outpos+repsize;
- if (outsize<requiredsize) {
- /* exponentially overallocate to minimize reallocations */
- if (requiredsize < 2*outsize)
- requiredsize = 2*outsize;
- if (_PyString_Resize(outobj, requiredsize)) {
+ if (outsize<requiredsize)
+ if (!charmapencode_resize(outobj, outpos, requiredsize)) {
Py_DECREF(rep);
- return NULL;
+ return enc_EXCEPTION;
}
- outstart = PyString_AS_STRING(*outobj);
- }
+ outstart = PyString_AS_STRING(*outobj);
memcpy(outstart + *outpos, repchars, repsize);
*outpos += repsize;
}
}
- return rep;
+ Py_DECREF(rep);
+ return enc_SUCCESS;
}
/* handle an error in PyUnicode_EncodeCharmap
@@ -3175,18 +3414,27 @@ int charmap_encoding_error(
Py_ssize_t collpos;
char *encoding = "charmap";
char *reason = "character maps to <undefined>";
+ charmapencode_result x;
- PyObject *x;
/* find all unencodable characters */
while (collendpos < size) {
- x = charmapencode_lookup(p[collendpos], mapping);
- if (x==NULL)
+ PyObject *rep;
+ if (mapping->ob_type == &EncodingMapType) {
+ int res = encoding_map_lookup(p[collendpos], mapping);
+ if (res != -1)
+ break;
+ ++collendpos;
+ continue;
+ }
+
+ rep = charmapencode_lookup(p[collendpos], mapping);
+ if (rep==NULL)
return -1;
- else if (x!=Py_None) {
- Py_DECREF(x);
+ else if (rep!=Py_None) {
+ Py_DECREF(rep);
break;
}
- Py_DECREF(x);
+ Py_DECREF(rep);
++collendpos;
}
/* cache callback name lookup
@@ -3210,15 +3458,13 @@ int charmap_encoding_error(
case 2: /* replace */
for (collpos = collstartpos; collpos<collendpos; ++collpos) {
x = charmapencode_output('?', mapping, res, respos);
- if (x==NULL) {
+ if (x==enc_EXCEPTION) {
return -1;
}
- else if (x==Py_None) {
- Py_DECREF(x);
+ else if (x==enc_FAILED) {
raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
return -1;
}
- Py_DECREF(x);
}
/* fall through */
case 3: /* ignore */
@@ -3232,14 +3478,12 @@ int charmap_encoding_error(
sprintf(buffer, "&#%d;", (int)p[collpos]);
for (cp = buffer; *cp; ++cp) {
x = charmapencode_output(*cp, mapping, res, respos);
- if (x==NULL)
+ if (x==enc_EXCEPTION)
return -1;
- else if (x==Py_None) {
- Py_DECREF(x);
+ else if (x==enc_FAILED) {
raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
return -1;
}
- Py_DECREF(x);
}
}
*inpos = collendpos;
@@ -3254,17 +3498,14 @@ int charmap_encoding_error(
repsize = PyUnicode_GET_SIZE(repunicode);
for (uni2 = PyUnicode_AS_UNICODE(repunicode); repsize-->0; ++uni2) {
x = charmapencode_output(*uni2, mapping, res, respos);
- if (x==NULL) {
- Py_DECREF(repunicode);
+ if (x==enc_EXCEPTION) {
return -1;
}
- else if (x==Py_None) {
+ else if (x==enc_FAILED) {
Py_DECREF(repunicode);
- Py_DECREF(x);
raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
return -1;
}
- Py_DECREF(x);
}
*inpos = newpos;
Py_DECREF(repunicode);
@@ -3304,22 +3545,20 @@ PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p,
while (inpos<size) {
/* try to encode it */
- PyObject *x = charmapencode_output(p[inpos], mapping, &res, &respos);
- if (x==NULL) /* error */
+ charmapencode_result x = charmapencode_output(p[inpos], mapping, &res, &respos);
+ if (x==enc_EXCEPTION) /* error */
goto onError;
- if (x==Py_None) { /* unencodable character */
+ if (x==enc_FAILED) { /* unencodable character */
if (charmap_encoding_error(p, size, &inpos, mapping,
&exc,
&known_errorHandler, &errorHandler, errors,
&res, &respos)) {
- Py_DECREF(x);
goto onError;
}
}
else
/* done with this character => adjust input position */
++inpos;
- Py_DECREF(x);
}
/* Resize if we allocated to much */