summaryrefslogtreecommitdiffstats
path: root/Python
diff options
context:
space:
mode:
Diffstat (limited to 'Python')
-rw-r--r--Python/codecs.c89
-rw-r--r--Python/pythonrun.c32
2 files changed, 105 insertions, 16 deletions
diff --git a/Python/codecs.c b/Python/codecs.c
index 633a24c..7e3ff8a 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -829,6 +829,82 @@ PyCodec_SurrogateErrors(PyObject *exc)
}
}
+static PyObject *
+PyCodec_UTF8bErrors(PyObject *exc)
+{
+ PyObject *restuple;
+ PyObject *object;
+ Py_ssize_t start;
+ Py_ssize_t end;
+ PyObject *res;
+ if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
+ Py_UNICODE *p;
+ Py_UNICODE *startp;
+ char *outp;
+ if (PyUnicodeEncodeError_GetStart(exc, &start))
+ return NULL;
+ if (PyUnicodeEncodeError_GetEnd(exc, &end))
+ return NULL;
+ if (!(object = PyUnicodeEncodeError_GetObject(exc)))
+ return NULL;
+ startp = PyUnicode_AS_UNICODE(object);
+ res = PyBytes_FromStringAndSize(NULL, end-start);
+ if (!res) {
+ Py_DECREF(object);
+ return NULL;
+ }
+ outp = PyBytes_AsString(res);
+ for (p = startp+start; p < startp+end; p++) {
+ Py_UNICODE ch = *p;
+ if (ch < 0xdc80 || ch > 0xdcff) {
+ /* Not a UTF-8b surrogate, fail with original exception */
+ PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
+ Py_DECREF(res);
+ Py_DECREF(object);
+ return NULL;
+ }
+ *outp++ = ch - 0xdc00;
+ }
+ restuple = Py_BuildValue("(On)", res, end);
+ Py_DECREF(res);
+ Py_DECREF(object);
+ return restuple;
+ }
+ else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
+ unsigned char *p;
+ Py_UNICODE ch[4]; /* decode up to 4 bad bytes. */
+ int consumed = 0;
+ if (PyUnicodeDecodeError_GetStart(exc, &start))
+ return NULL;
+ if (PyUnicodeDecodeError_GetEnd(exc, &end))
+ return NULL;
+ if (!(object = PyUnicodeDecodeError_GetObject(exc)))
+ return NULL;
+ if (!(p = (unsigned char*)PyBytes_AsString(object))) {
+ Py_DECREF(object);
+ return NULL;
+ }
+ while (consumed < 4 && consumed < end-start) {
+ /* Refuse to escape ASCII bytes. */
+ if (p[start+consumed] < 128)
+ break;
+ ch[consumed] = 0xdc00 + p[start+consumed];
+ consumed++;
+ }
+ Py_DECREF(object);
+ if (!consumed) {
+ /* codec complained about ASCII byte. */
+ PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
+ return NULL;
+ }
+ return Py_BuildValue("(u#n)", ch, consumed, start+consumed);
+ }
+ else {
+ wrong_exception_type(exc);
+ return NULL;
+ }
+}
+
static PyObject *strict_errors(PyObject *self, PyObject *exc)
{
@@ -864,6 +940,11 @@ static PyObject *surrogates_errors(PyObject *self, PyObject *exc)
return PyCodec_SurrogateErrors(exc);
}
+static PyObject *utf8b_errors(PyObject *self, PyObject *exc)
+{
+ return PyCodec_UTF8bErrors(exc);
+}
+
static int _PyCodecRegistry_Init(void)
{
static struct {
@@ -918,6 +999,14 @@ static int _PyCodecRegistry_Init(void)
surrogates_errors,
METH_O
}
+ },
+ {
+ "utf8b",
+ {
+ "utf8b",
+ utf8b_errors,
+ METH_O
+ }
}
};
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
index f93403b..c75f55f 100644
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -262,6 +262,22 @@ Py_InitializeEx(int install_sigs)
_PyImportHooks_Init();
+#if defined(HAVE_LANGINFO_H) && defined(CODESET)
+ /* On Unix, set the file system encoding according to the
+ user's preference, if the CODESET names a well-known
+ Python codec, and Py_FileSystemDefaultEncoding isn't
+ initialized by other means. Also set the encoding of
+ stdin and stdout if these are terminals. */
+
+ codeset = get_codeset();
+ if (codeset) {
+ if (!Py_FileSystemDefaultEncoding)
+ Py_FileSystemDefaultEncoding = codeset;
+ else
+ free(codeset);
+ }
+#endif
+
if (install_sigs)
initsigs(); /* Signal handling stuff, including initintr() */
@@ -285,22 +301,6 @@ Py_InitializeEx(int install_sigs)
#ifdef WITH_THREAD
_PyGILState_Init(interp, tstate);
#endif /* WITH_THREAD */
-
-#if defined(HAVE_LANGINFO_H) && defined(CODESET)
- /* On Unix, set the file system encoding according to the
- user's preference, if the CODESET names a well-known
- Python codec, and Py_FileSystemDefaultEncoding isn't
- initialized by other means. Also set the encoding of
- stdin and stdout if these are terminals. */
-
- codeset = get_codeset();
- if (codeset) {
- if (!Py_FileSystemDefaultEncoding)
- Py_FileSystemDefaultEncoding = codeset;
- else
- free(codeset);
- }
-#endif
}
void