summaryrefslogtreecommitdiffstats
path: root/Modules/_sre.c
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2003-04-19 12:56:08 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2003-04-19 12:56:08 (GMT)
commit78e2f06cc66178887ee0d6d243370efa241a675a (patch)
tree07910e9afd123f6c5c394d46f6dbdf087ef978ba /Modules/_sre.c
parent53d93adc46c73e6fdcd6db2d16f0136eadba9839 (diff)
downloadcpython-78e2f06cc66178887ee0d6d243370efa241a675a.zip
cpython-78e2f06cc66178887ee0d6d243370efa241a675a.tar.gz
cpython-78e2f06cc66178887ee0d6d243370efa241a675a.tar.bz2
Fully support 32-bit codes. Enable BIGCHARSET in UCS-4 builds.
Diffstat (limited to 'Modules/_sre.c')
-rw-r--r--Modules/_sre.c52
1 files changed, 42 insertions, 10 deletions
diff --git a/Modules/_sre.c b/Modules/_sre.c
index dde365b..8cae095 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -20,6 +20,7 @@
* 2001-10-24 fl added finditer primitive (for 2.2 only)
* 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
* 2002-11-09 fl fixed empty sub/subn return type
+ * 2003-04-18 mvl fully support 4-byte codes
*
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
*
@@ -510,10 +511,18 @@ SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
break;
case SRE_OP_CHARSET:
- /* <CHARSET> <bitmap> (16 bits per code word) */
- if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
- return ok;
- set += 16;
+ if (sizeof(SRE_CODE) == 2) {
+ /* <CHARSET> <bitmap> (16 bits per code word) */
+ if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
+ return ok;
+ set += 16;
+ }
+ else {
+ /* <CHARSET> <bitmap> (32 bits per code word) */
+ if (ch < 256 && (set[ch >> 5] & (1 << (ch & 31))))
+ return ok;
+ set += 8;
+ }
break;
case SRE_OP_BIGCHARSET:
@@ -521,11 +530,25 @@ SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
{
int count, block;
count = *(set++);
- block = ((unsigned char*)set)[ch >> 8];
- set += 128;
- if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
- return ok;
- set += count*16;
+
+ if (sizeof(SRE_CODE) == 2) {
+ block = ((unsigned char*)set)[ch >> 8];
+ set += 128;
+ if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
+ return ok;
+ set += count*16;
+ }
+ else {
+ if (ch < 65536)
+ block = ((unsigned char*)set)[ch >> 8];
+ else
+ block = -1;
+ set += 64;
+ if (block >=0 &&
+ (set[block*8 + ((ch & 255)>>5)] & (1 << (ch & 31))))
+ return ok;
+ set += count*8;
+ }
break;
}
@@ -1371,7 +1394,10 @@ _compile(PyObject* self_, PyObject* args)
for (i = 0; i < n; i++) {
PyObject *o = PyList_GET_ITEM(code, i);
- self->code[i] = (SRE_CODE) PyInt_AsLong(o);
+ if (PyInt_Check(o))
+ self->code[i] = (SRE_CODE) PyInt_AsLong(o);
+ else
+ self->code[i] = (SRE_CODE) PyLong_AsUnsignedLong(o);
}
if (PyErr_Occurred()) {
@@ -3045,6 +3071,12 @@ PyMODINIT_FUNC init_sre(void)
Py_DECREF(x);
}
+ x = PyInt_FromLong(sizeof(SRE_CODE));
+ if (x) {
+ PyDict_SetItemString(d, "CODESIZE", x);
+ Py_DECREF(x);
+ }
+
x = PyString_FromString(copyright);
if (x) {
PyDict_SetItemString(d, "copyright", x);