summaryrefslogtreecommitdiffstats
path: root/Python/codecs.c
diff options
context:
space:
mode:
Diffstat (limited to 'Python/codecs.c')
-rw-r--r--Python/codecs.c272
1 files changed, 30 insertions, 242 deletions
diff --git a/Python/codecs.c b/Python/codecs.c
index 45d9929..7334eb3 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -61,22 +61,18 @@ PyObject *normalizestring(const char *string)
return NULL;
}
- p = PyMem_Malloc(len + 1);
- if (p == NULL)
+ v = PyString_FromStringAndSize(NULL, len);
+ if (v == NULL)
return NULL;
+ p = PyString_AS_STRING(v);
for (i = 0; i < len; i++) {
register char ch = string[i];
if (ch == ' ')
ch = '-';
else
- ch = tolower(Py_CHARMASK(ch));
+ ch = Py_TOLOWER(Py_CHARMASK(ch));
p[i] = ch;
}
- p[i] = '\0';
- v = PyUnicode_FromString(p);
- if (v == NULL)
- return NULL;
- PyMem_Free(p);
return v;
}
@@ -116,7 +112,7 @@ PyObject *_PyCodec_Lookup(const char *encoding)
v = normalizestring(encoding);
if (v == NULL)
goto onError;
- PyUnicode_InternInPlace(&v);
+ PyString_InternInPlace(&v);
/* First, try to lookup the name in the registry dictionary */
result = PyDict_GetItem(interp->codec_search_cache, v);
@@ -171,10 +167,7 @@ PyObject *_PyCodec_Lookup(const char *encoding)
}
/* Cache and return the result */
- if (PyDict_SetItem(interp->codec_search_cache, v, result) < 0) {
- Py_DECREF(result);
- goto onError;
- }
+ PyDict_SetItem(interp->codec_search_cache, v, result);
Py_DECREF(args);
return result;
@@ -183,23 +176,6 @@ PyObject *_PyCodec_Lookup(const char *encoding)
return NULL;
}
-/* Codec registry encoding check API. */
-
-int PyCodec_KnownEncoding(const char *encoding)
-{
- PyObject *codecs;
-
- codecs = _PyCodec_Lookup(encoding);
- if (!codecs) {
- PyErr_Clear();
- return 0;
- }
- else {
- Py_DECREF(codecs);
- return 1;
- }
-}
-
static
PyObject *args_tuple(PyObject *object,
const char *errors)
@@ -214,7 +190,7 @@ PyObject *args_tuple(PyObject *object,
if (errors) {
PyObject *v;
- v = PyUnicode_FromString(errors);
+ v = PyString_FromString(errors);
if (v == NULL) {
Py_DECREF(args);
return NULL;
@@ -341,7 +317,7 @@ PyObject *PyCodec_Encode(PyObject *object,
{
PyObject *encoder = NULL;
PyObject *args = NULL, *result = NULL;
- PyObject *v = NULL;
+ PyObject *v;
encoder = PyCodec_Encoder(encoding);
if (encoder == NULL)
@@ -351,14 +327,14 @@ PyObject *PyCodec_Encode(PyObject *object,
if (args == NULL)
goto onError;
- result = PyEval_CallObject(encoder, args);
+ result = PyEval_CallObject(encoder,args);
if (result == NULL)
goto onError;
if (!PyTuple_Check(result) ||
PyTuple_GET_SIZE(result) != 2) {
PyErr_SetString(PyExc_TypeError,
- "encoder must return a tuple (object, integer)");
+ "encoder must return a tuple (object,integer)");
goto onError;
}
v = PyTuple_GET_ITEM(result,0);
@@ -470,9 +446,14 @@ static void wrong_exception_type(PyObject *exc)
PyObject *name = PyObject_GetAttrString(type, "__name__");
Py_DECREF(type);
if (name != NULL) {
- PyErr_Format(PyExc_TypeError,
- "don't know how to handle %S in error callback", name);
+ PyObject *string = PyObject_Str(name);
Py_DECREF(name);
+ if (string != NULL) {
+ PyErr_Format(PyExc_TypeError,
+ "don't know how to handle %.400s in error callback",
+ PyString_AS_STRING(string));
+ Py_DECREF(string);
+ }
}
}
}
@@ -487,6 +468,7 @@ PyObject *PyCodec_StrictErrors(PyObject *exc)
}
+#ifdef Py_USING_UNICODE
PyObject *PyCodec_IgnoreErrors(PyObject *exc)
{
Py_ssize_t end;
@@ -678,13 +660,6 @@ static Py_UNICODE hexdigits[] = {
PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
{
-#ifndef Py_UNICODE_WIDE
-#define IS_SURROGATE_PAIR(p, end) \
- (*p >= 0xD800 && *p <= 0xDBFF && (p + 1) < end && \
- *(p + 1) >= 0xDC00 && *(p + 1) <= 0xDFFF)
-#else
-#define IS_SURROGATE_PAIR(p, end) 0
-#endif
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
PyObject *restuple;
PyObject *object;
@@ -709,12 +684,7 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
else
#endif
if (*p >= 0x100) {
- if (IS_SURROGATE_PAIR(p, startp+end)) {
- ressize += 1+1+8;
- ++p;
- }
- else
- ressize += 1+1+4;
+ ressize += 1+1+4;
}
else
ressize += 1+1+2;
@@ -724,12 +694,9 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
return NULL;
for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
p < startp+end; ++p) {
- Py_UCS4 c = (Py_UCS4) *p;
+ Py_UNICODE c = *p;
*outp++ = '\\';
- if (IS_SURROGATE_PAIR(p, startp+end)) {
- c = ((*p & 0x3FF) << 10) + (*(p + 1) & 0x3FF) + 0x10000;
- ++p;
- }
+#ifdef Py_UNICODE_WIDE
if (c >= 0x00010000) {
*outp++ = 'U';
*outp++ = hexdigits[(c>>28)&0xf];
@@ -739,7 +706,9 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
*outp++ = hexdigits[(c>>12)&0xf];
*outp++ = hexdigits[(c>>8)&0xf];
}
- else if (c >= 0x100) {
+ else
+#endif
+ if (c >= 0x100) {
*outp++ = 'u';
*outp++ = hexdigits[(c>>12)&0xf];
*outp++ = hexdigits[(c>>8)&0xf];
@@ -759,166 +728,8 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
wrong_exception_type(exc);
return NULL;
}
-#undef IS_SURROGATE_PAIR
-}
-
-/* This handler is declared static until someone demonstrates
- a need to call it directly. */
-static PyObject *
-PyCodec_SurrogatePassErrors(PyObject *exc)
-{
- PyObject *restuple;
- PyObject *object;
- Py_ssize_t start;
- Py_ssize_t end;
- PyObject *res;
- if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
- Py_UNICODE *p;
- Py_UNICODE *startp;
- char *outp;
- if (PyUnicodeEncodeError_GetStart(exc, &start))
- return NULL;
- if (PyUnicodeEncodeError_GetEnd(exc, &end))
- return NULL;
- if (!(object = PyUnicodeEncodeError_GetObject(exc)))
- return NULL;
- startp = PyUnicode_AS_UNICODE(object);
- res = PyBytes_FromStringAndSize(NULL, 3*(end-start));
- if (!res) {
- Py_DECREF(object);
- return NULL;
- }
- outp = PyBytes_AsString(res);
- for (p = startp+start; p < startp+end; p++) {
- Py_UNICODE ch = *p;
- if (ch < 0xd800 || ch > 0xdfff) {
- /* Not a surrogate, fail with original exception */
- PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
- Py_DECREF(res);
- Py_DECREF(object);
- return NULL;
- }
- *outp++ = (char)(0xe0 | (ch >> 12));
- *outp++ = (char)(0x80 | ((ch >> 6) & 0x3f));
- *outp++ = (char)(0x80 | (ch & 0x3f));
- }
- restuple = Py_BuildValue("(On)", res, end);
- Py_DECREF(res);
- Py_DECREF(object);
- return restuple;
- }
- else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
- unsigned char *p;
- Py_UNICODE ch = 0;
- if (PyUnicodeDecodeError_GetStart(exc, &start))
- return NULL;
- if (!(object = PyUnicodeDecodeError_GetObject(exc)))
- return NULL;
- if (!(p = (unsigned char*)PyBytes_AsString(object))) {
- Py_DECREF(object);
- return NULL;
- }
- /* Try decoding a single surrogate character. If
- there are more, let the codec call us again. */
- p += start;
- if ((p[0] & 0xf0) == 0xe0 ||
- (p[1] & 0xc0) == 0x80 ||
- (p[2] & 0xc0) == 0x80) {
- /* it's a three-byte code */
- ch = ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) + (p[2] & 0x3f);
- if (ch < 0xd800 || ch > 0xdfff)
- /* it's not a surrogate - fail */
- ch = 0;
- }
- Py_DECREF(object);
- if (ch == 0) {
- PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
- return NULL;
- }
- return Py_BuildValue("(u#n)", &ch, 1, start+3);
- }
- else {
- wrong_exception_type(exc);
- return NULL;
- }
-}
-
-static PyObject *
-PyCodec_SurrogateEscapeErrors(PyObject *exc)
-{
- PyObject *restuple;
- PyObject *object;
- Py_ssize_t start;
- Py_ssize_t end;
- PyObject *res;
- if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
- Py_UNICODE *p;
- Py_UNICODE *startp;
- char *outp;
- if (PyUnicodeEncodeError_GetStart(exc, &start))
- return NULL;
- if (PyUnicodeEncodeError_GetEnd(exc, &end))
- return NULL;
- if (!(object = PyUnicodeEncodeError_GetObject(exc)))
- return NULL;
- startp = PyUnicode_AS_UNICODE(object);
- res = PyBytes_FromStringAndSize(NULL, end-start);
- if (!res) {
- Py_DECREF(object);
- return NULL;
- }
- outp = PyBytes_AsString(res);
- for (p = startp+start; p < startp+end; p++) {
- Py_UNICODE ch = *p;
- if (ch < 0xdc80 || ch > 0xdcff) {
- /* Not a UTF-8b surrogate, fail with original exception */
- PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
- Py_DECREF(res);
- Py_DECREF(object);
- return NULL;
- }
- *outp++ = ch - 0xdc00;
- }
- restuple = Py_BuildValue("(On)", res, end);
- Py_DECREF(res);
- Py_DECREF(object);
- return restuple;
- }
- else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
- unsigned char *p;
- Py_UNICODE ch[4]; /* decode up to 4 bad bytes. */
- int consumed = 0;
- if (PyUnicodeDecodeError_GetStart(exc, &start))
- return NULL;
- if (PyUnicodeDecodeError_GetEnd(exc, &end))
- return NULL;
- if (!(object = PyUnicodeDecodeError_GetObject(exc)))
- return NULL;
- if (!(p = (unsigned char*)PyBytes_AsString(object))) {
- Py_DECREF(object);
- return NULL;
- }
- while (consumed < 4 && consumed < end-start) {
- /* Refuse to escape ASCII bytes. */
- if (p[start+consumed] < 128)
- break;
- ch[consumed] = 0xdc00 + p[start+consumed];
- consumed++;
- }
- Py_DECREF(object);
- if (!consumed) {
- /* codec complained about ASCII byte. */
- PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
- return NULL;
- }
- return Py_BuildValue("(u#n)", ch, consumed, start+consumed);
- }
- else {
- wrong_exception_type(exc);
- return NULL;
- }
}
-
+#endif
static PyObject *strict_errors(PyObject *self, PyObject *exc)
{
@@ -926,6 +737,7 @@ static PyObject *strict_errors(PyObject *self, PyObject *exc)
}
+#ifdef Py_USING_UNICODE
static PyObject *ignore_errors(PyObject *self, PyObject *exc)
{
return PyCodec_IgnoreErrors(exc);
@@ -948,16 +760,7 @@ static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
{
return PyCodec_BackslashReplaceErrors(exc);
}
-
-static PyObject *surrogatepass_errors(PyObject *self, PyObject *exc)
-{
- return PyCodec_SurrogatePassErrors(exc);
-}
-
-static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc)
-{
- return PyCodec_SurrogateEscapeErrors(exc);
-}
+#endif
static int _PyCodecRegistry_Init(void)
{
@@ -976,6 +779,7 @@ static int _PyCodecRegistry_Init(void)
"raises a UnicodeError on coding errors.")
}
},
+#ifdef Py_USING_UNICODE
{
"ignore",
{
@@ -1017,23 +821,8 @@ static int _PyCodecRegistry_Init(void)
"which replaces an unencodable character with a "
"backslashed escape sequence.")
}
- },
- {
- "surrogatepass",
- {
- "surrogatepass",
- surrogatepass_errors,
- METH_O
- }
- },
- {
- "surrogateescape",
- {
- "surrogateescape",
- surrogateescape_errors,
- METH_O
- }
}
+#endif
};
PyInterpreterState *interp = PyThreadState_GET()->interp;
@@ -1065,7 +854,7 @@ static int _PyCodecRegistry_Init(void)
interp->codec_error_registry == NULL)
Py_FatalError("can't initialize codec registry");
- mod = PyImport_ImportModuleNoBlock("encodings");
+ mod = PyImport_ImportModuleLevel("encodings", NULL, NULL, NULL, 0);
if (mod == NULL) {
if (PyErr_ExceptionMatches(PyExc_ImportError)) {
/* Ignore ImportErrors... this is done so that
@@ -1079,6 +868,5 @@ static int _PyCodecRegistry_Init(void)
return -1;
}
Py_DECREF(mod);
- interp->codecs_initialized = 1;
return 0;
}