summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2000-03-10 23:09:23 (GMT)
committerGuido van Rossum <guido@python.org>2000-03-10 23:09:23 (GMT)
commite2d67f98d1aade1059b2ff3278672b2ffbaf180e (patch)
tree9c029f014f4f113b7846bdd61ba6419deb638cf6
parent5bfc2eb6972f35c3f81a08d93885e3c86089e22d (diff)
downloadcpython-e2d67f98d1aade1059b2ff3278672b2ffbaf180e.zip
cpython-e2d67f98d1aade1059b2ff3278672b2ffbaf180e.tar.gz
cpython-e2d67f98d1aade1059b2ff3278672b2ffbaf180e.tar.bz2
Internal module _codecs -- Provides access to the codec registry and
the builtin codecs. Written by Marc-Andre Lemburg.
-rw-r--r--Modules/_codecsmodule.c529
1 files changed, 529 insertions, 0 deletions
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
new file mode 100644
index 0000000..6c8a2d4
--- /dev/null
+++ b/Modules/_codecsmodule.c
@@ -0,0 +1,529 @@
+/* ------------------------------------------------------------------------
+
+ _codecs -- Provides access to the codec registry and the builtin
+ codecs.
+
+ This module should never be imported directly. The standard library
+ module "codecs" wraps this builtin module for use within Python.
+
+ The codec registry is accessible via:
+
+ register(search_function) -> None
+
+ lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)
+
+ The builtin Unicode codecs use the following interface:
+
+ <encoding>_encode(Unicode_object[,errors='strict']) ->
+ (string object, bytes consumed)
+
+ <encoding>_decode(char_buffer_obj[,errors='strict']) ->
+ (Unicode object, bytes consumed)
+
+ These <encoding>s are available: utf_8, unicode_escape,
+ raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit)
+
+Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+
+ ------------------------------------------------------------------------ */
+
+#include "Python.h"
+
+/* --- Registry ----------------------------------------------------------- */
+
+static
+PyObject *codecregister(PyObject *self, PyObject *args)
+{
+ PyObject *search_function;
+
+ if (!PyArg_ParseTuple(args, "O:register", &search_function))
+ goto onError;
+
+ if (PyCodec_Register(search_function))
+ goto onError;
+
+ Py_INCREF(Py_None);
+ return Py_None;
+
+ onError:
+ return NULL;
+}
+
+static
+PyObject *codeclookup(PyObject *self, PyObject *args)
+{
+ char *encoding;
+
+ if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
+ goto onError;
+
+ return _PyCodec_Lookup(encoding);
+
+ onError:
+ return NULL;
+}
+
+/* --- Helpers ------------------------------------------------------------ */
+
+static
+PyObject *codec_tuple(PyObject *unicode,
+ int len)
+{
+ PyObject *v,*w;
+
+ if (unicode == NULL)
+ return NULL;
+ v = PyTuple_New(2);
+ if (v == NULL) {
+ Py_DECREF(unicode);
+ return NULL;
+ }
+ PyTuple_SET_ITEM(v,0,unicode);
+ w = PyInt_FromLong(len);
+ if (w == NULL) {
+ Py_DECREF(v);
+ return NULL;
+ }
+ PyTuple_SET_ITEM(v,1,w);
+ return v;
+}
+
+/* --- Decoder ------------------------------------------------------------ */
+
+static PyObject *
+unicode_internal_decode(PyObject *self,
+ PyObject *args)
+{
+ const char *data;
+ int size;
+ const char *errors = NULL;
+
+ if (!PyArg_ParseTuple(args, "s#|z:unicode_internal_decode",
+ &data, &size, &errors))
+ return NULL;
+
+ return codec_tuple(PyUnicode_FromUnicode((Py_UNICODE *)data,
+ size / sizeof(Py_UNICODE)),
+ size);
+}
+
+static PyObject *
+utf_8_decode(PyObject *self,
+ PyObject *args)
+{
+ const char *data;
+ int size;
+ const char *errors = NULL;
+
+ if (!PyArg_ParseTuple(args, "t#|z:utf_8_decode",
+ &data, &size, &errors))
+ return NULL;
+
+ return codec_tuple(PyUnicode_DecodeUTF8(data, size, errors),
+ size);
+}
+
+static PyObject *
+utf_16_decode(PyObject *self,
+ PyObject *args)
+{
+ const char *data;
+ int size;
+ const char *errors = NULL;
+ int byteorder = 0;
+
+ if (!PyArg_ParseTuple(args, "t#|z:utf_16_decode",
+ &data, &size, &errors))
+ return NULL;
+ return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
+ size);
+}
+
+static PyObject *
+utf_16_le_decode(PyObject *self,
+ PyObject *args)
+{
+ const char *data;
+ int size;
+ const char *errors = NULL;
+ int byteorder = -1;
+
+ if (!PyArg_ParseTuple(args, "t#|z:utf_16_le_decode",
+ &data, &size, &errors))
+ return NULL;
+ return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
+ size);
+}
+
+static PyObject *
+utf_16_be_decode(PyObject *self,
+ PyObject *args)
+{
+ const char *data;
+ int size;
+ const char *errors = NULL;
+ int byteorder = 1;
+
+ if (!PyArg_ParseTuple(args, "t#|z:utf_16_be_decode",
+ &data, &size, &errors))
+ return NULL;
+ return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
+ size);
+}
+
+/* This non-standard version also provides access to the byteorder
+ parameter of the builtin UTF-16 codec.
+
+ It returns a tuple (unicode, bytesread, byteorder) with byteorder
+ being the value in effect at the end of data.
+
+*/
+
+static PyObject *
+utf_16_ex_decode(PyObject *self,
+ PyObject *args)
+{
+ const char *data;
+ int size;
+ const char *errors = NULL;
+ int byteorder = 0;
+ PyObject *unicode, *tuple;
+
+ if (!PyArg_ParseTuple(args, "t#|zi:utf_16_ex_decode",
+ &data, &size, &errors, &byteorder))
+ return NULL;
+
+ unicode = PyUnicode_DecodeUTF16(data, size, errors, &byteorder);
+ if (unicode == NULL)
+ return NULL;
+ tuple = Py_BuildValue("Oii", unicode, size, byteorder);
+ Py_DECREF(unicode);
+ return tuple;
+}
+
+static PyObject *
+unicode_escape_decode(PyObject *self,
+ PyObject *args)
+{
+ const char *data;
+ int size;
+ const char *errors = NULL;
+
+ if (!PyArg_ParseTuple(args, "t#|z:unicode_escape_decode",
+ &data, &size, &errors))
+ return NULL;
+
+ return codec_tuple(PyUnicode_DecodeUnicodeEscape(data, size, errors),
+ size);
+}
+
+static PyObject *
+raw_unicode_escape_decode(PyObject *self,
+ PyObject *args)
+{
+ const char *data;
+ int size;
+ const char *errors = NULL;
+
+ if (!PyArg_ParseTuple(args, "t#|z:raw_unicode_escape_decode",
+ &data, &size, &errors))
+ return NULL;
+
+ return codec_tuple(PyUnicode_DecodeRawUnicodeEscape(data, size, errors),
+ size);
+}
+
+static PyObject *
+latin_1_decode(PyObject *self,
+ PyObject *args)
+{
+ const char *data;
+ int size;
+ const char *errors = NULL;
+
+ if (!PyArg_ParseTuple(args, "t#|z:latin_1_decode",
+ &data, &size, &errors))
+ return NULL;
+
+ return codec_tuple(PyUnicode_DecodeLatin1(data, size, errors),
+ size);
+}
+
+static PyObject *
+ascii_decode(PyObject *self,
+ PyObject *args)
+{
+ const char *data;
+ int size;
+ const char *errors = NULL;
+
+ if (!PyArg_ParseTuple(args, "t#|z:ascii_decode",
+ &data, &size, &errors))
+ return NULL;
+
+ return codec_tuple(PyUnicode_DecodeASCII(data, size, errors),
+ size);
+}
+
+static PyObject *
+charmap_decode(PyObject *self,
+ PyObject *args)
+{
+ const char *data;
+ int size;
+ const char *errors = NULL;
+ PyObject *mapping = NULL;
+
+ if (!PyArg_ParseTuple(args, "t#|zO:charmap_decode",
+ &data, &size, &errors, &mapping))
+ return NULL;
+ if (mapping == Py_None)
+ mapping = NULL;
+
+ return codec_tuple(PyUnicode_DecodeCharmap(data, size, mapping, errors),
+ size);
+}
+
+/* --- Encoder ------------------------------------------------------------ */
+
+static PyObject *
+readbuffer_encode(PyObject *self,
+ PyObject *args)
+{
+ const char *data;
+ int size;
+ const char *errors = NULL;
+
+ if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
+ &data, &size, &errors))
+ return NULL;
+
+ return codec_tuple(PyString_FromStringAndSize(data, size),
+ size);
+}
+
+static PyObject *
+charbuffer_encode(PyObject *self,
+ PyObject *args)
+{
+ const char *data;
+ int size;
+ const char *errors = NULL;
+
+ if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
+ &data, &size, &errors))
+ return NULL;
+
+ return codec_tuple(PyString_FromStringAndSize(data, size),
+ size);
+}
+
+static PyObject *
+utf_8_encode(PyObject *self,
+ PyObject *args)
+{
+ PyObject *str;
+ const char *errors = NULL;
+
+ if (!PyArg_ParseTuple(args, "U|z:utf_8_encode",
+ &str, &errors))
+ return NULL;
+
+ return codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
+ PyUnicode_GET_SIZE(str),
+ errors),
+ PyUnicode_GET_SIZE(str));
+}
+
+/* This version provides access to the byteorder parameter of the
+ builtin UTF-16 codecs as optional third argument. It defaults to 0
+ which means: use the native byte order and prepend the data with a
+ BOM mark.
+
+*/
+
+static PyObject *
+utf_16_encode(PyObject *self,
+ PyObject *args)
+{
+ PyObject *str;
+ const char *errors = NULL;
+ int byteorder = 0;
+
+ if (!PyArg_ParseTuple(args, "U|zi:utf_16_encode",
+ &str, &errors, &byteorder))
+ return NULL;
+
+ return codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
+ PyUnicode_GET_SIZE(str),
+ errors,
+ byteorder),
+ PyUnicode_GET_SIZE(str));
+}
+
+static PyObject *
+utf_16_le_encode(PyObject *self,
+ PyObject *args)
+{
+ PyObject *str;
+ const char *errors = NULL;
+
+ if (!PyArg_ParseTuple(args, "U|zi:utf_16_le_encode",
+ &str, &errors))
+ return NULL;
+
+ return codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
+ PyUnicode_GET_SIZE(str),
+ errors,
+ -1),
+ PyUnicode_GET_SIZE(str));
+}
+
+static PyObject *
+utf_16_be_encode(PyObject *self,
+ PyObject *args)
+{
+ PyObject *str;
+ const char *errors = NULL;
+
+ if (!PyArg_ParseTuple(args, "U|zi:utf_16_be_encode",
+ &str, &errors))
+ return NULL;
+
+ return codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
+ PyUnicode_GET_SIZE(str),
+ errors,
+ +1),
+ PyUnicode_GET_SIZE(str));
+}
+
+static PyObject *
+unicode_escape_encode(PyObject *self,
+ PyObject *args)
+{
+ PyObject *str;
+ const char *errors = NULL;
+
+ if (!PyArg_ParseTuple(args, "U|z:unicode_escape_encode",
+ &str, &errors))
+ return NULL;
+
+ return codec_tuple(PyUnicode_EncodeUnicodeEscape(
+ PyUnicode_AS_UNICODE(str),
+ PyUnicode_GET_SIZE(str)),
+ PyUnicode_GET_SIZE(str));
+}
+
+static PyObject *
+raw_unicode_escape_encode(PyObject *self,
+ PyObject *args)
+{
+ PyObject *str;
+ const char *errors = NULL;
+
+ if (!PyArg_ParseTuple(args, "U|z:raw_unicode_escape_encode",
+ &str, &errors))
+ return NULL;
+
+ return codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
+ PyUnicode_AS_UNICODE(str),
+ PyUnicode_GET_SIZE(str)),
+ PyUnicode_GET_SIZE(str));
+}
+
+static PyObject *
+latin_1_encode(PyObject *self,
+ PyObject *args)
+{
+ PyObject *str;
+ const char *errors = NULL;
+
+ if (!PyArg_ParseTuple(args, "U|z:latin_1_encode",
+ &str, &errors))
+ return NULL;
+
+ return codec_tuple(PyUnicode_EncodeLatin1(
+ PyUnicode_AS_UNICODE(str),
+ PyUnicode_GET_SIZE(str),
+ errors),
+ PyUnicode_GET_SIZE(str));
+}
+
+static PyObject *
+ascii_encode(PyObject *self,
+ PyObject *args)
+{
+ PyObject *str;
+ const char *errors = NULL;
+
+ if (!PyArg_ParseTuple(args, "U|z:ascii_encode",
+ &str, &errors))
+ return NULL;
+
+ return codec_tuple(PyUnicode_EncodeASCII(
+ PyUnicode_AS_UNICODE(str),
+ PyUnicode_GET_SIZE(str),
+ errors),
+ PyUnicode_GET_SIZE(str));
+}
+
+static PyObject *
+charmap_encode(PyObject *self,
+ PyObject *args)
+{
+ PyObject *str;
+ const char *errors = NULL;
+ PyObject *mapping = NULL;
+
+ if (!PyArg_ParseTuple(args, "U|zO:charmap_encode",
+ &str, &errors, &mapping))
+ return NULL;
+ if (mapping == Py_None)
+ mapping = NULL;
+
+ return codec_tuple(PyUnicode_EncodeCharmap(
+ PyUnicode_AS_UNICODE(str),
+ PyUnicode_GET_SIZE(str),
+ mapping,
+ errors),
+ PyUnicode_GET_SIZE(str));
+}
+
+/* --- Module API --------------------------------------------------------- */
+
+static PyMethodDef _codecs_functions[] = {
+ {"register", codecregister, 1},
+ {"lookup", codeclookup, 1},
+ {"utf_8_encode", utf_8_encode, 1},
+ {"utf_8_decode", utf_8_decode, 1},
+ {"utf_16_encode", utf_16_encode, 1},
+ {"utf_16_le_encode", utf_16_le_encode, 1},
+ {"utf_16_be_encode", utf_16_be_encode, 1},
+ {"utf_16_decode", utf_16_decode, 1},
+ {"utf_16_le_decode", utf_16_le_decode, 1},
+ {"utf_16_be_decode", utf_16_be_decode, 1},
+ {"utf_16_ex_decode", utf_16_ex_decode, 1},
+ {"unicode_escape_encode", unicode_escape_encode, 1},
+ {"unicode_escape_decode", unicode_escape_decode, 1},
+ {"unicode_internal_encode", readbuffer_encode, 1},
+ {"unicode_internal_decode", unicode_internal_decode, 1},
+ {"raw_unicode_escape_encode", raw_unicode_escape_encode, 1},
+ {"raw_unicode_escape_decode", raw_unicode_escape_decode, 1},
+ {"latin_1_encode", latin_1_encode, 1},
+ {"latin_1_decode", latin_1_decode, 1},
+ {"ascii_encode", ascii_encode, 1},
+ {"ascii_decode", ascii_decode, 1},
+ {"charmap_encode", charmap_encode, 1},
+ {"charmap_decode", charmap_decode, 1},
+ {"readbuffer_encode", readbuffer_encode, 1},
+ {"charbuffer_encode", charbuffer_encode, 1},
+ {NULL, NULL} /* sentinel */
+};
+
+DL_EXPORT(void)
+init_codecs()
+{
+ Py_InitModule("_codecs", _codecs_functions);
+}