summaryrefslogtreecommitdiffstats
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2014-01-03 11:53:47 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2014-01-03 11:53:47 (GMT)
commit985a82a6d296a587843e35de643576b957816bdb (patch)
treec18a9e21aa5756b154b1d88271e2d778e9b5ed8c /Objects/unicodeobject.c
parent5c86733c8ab0817d3aea569592a0d6bfbee81e9c (diff)
downloadcpython-985a82a6d296a587843e35de643576b957816bdb.zip
cpython-985a82a6d296a587843e35de643576b957816bdb.tar.gz
cpython-985a82a6d296a587843e35de643576b957816bdb.tar.bz2
add unicode_char() in unicodeobject.c to factorize code
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c86
1 files changed, 31 insertions, 55 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 34d51e4..1fb5742 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1749,7 +1749,6 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
}
}
-
static PyObject*
get_latin1_char(unsigned char ch)
{
@@ -1766,6 +1765,31 @@ get_latin1_char(unsigned char ch)
return unicode;
}
+static PyObject*
+unicode_char(Py_UCS4 ch)
+{
+ PyObject *unicode;
+
+ assert(ch <= MAX_UNICODE);
+
+ unicode = PyUnicode_New(1, ch);
+ if (unicode == NULL)
+ return NULL;
+ switch (PyUnicode_KIND(unicode)) {
+ case PyUnicode_1BYTE_KIND:
+ PyUnicode_1BYTE_DATA(unicode)[0] = (Py_UCS1)ch;
+ break;
+ case PyUnicode_2BYTE_KIND:
+ PyUnicode_2BYTE_DATA(unicode)[0] = (Py_UCS2)ch;
+ break;
+ default:
+ assert(PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND);
+ PyUnicode_4BYTE_DATA(unicode)[0] = ch;
+ }
+ assert(_PyUnicode_CheckConsistency(unicode, 1));
+ return unicode;
+}
+
PyObject *
PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size)
{
@@ -1964,22 +1988,8 @@ _PyUnicode_FromUCS2(const Py_UCS2 *u, Py_ssize_t size)
if (size == 0)
_Py_RETURN_UNICODE_EMPTY();
assert(size > 0);
- if (size == 1) {
- Py_UCS4 ch = u[0];
- int kind;
- void *data;
- if (ch < 256)
- return get_latin1_char((unsigned char)ch);
-
- res = PyUnicode_New(1, ch);
- if (res == NULL)
- return NULL;
- kind = PyUnicode_KIND(res);
- data = PyUnicode_DATA(res);
- PyUnicode_WRITE(kind, data, 0, ch);
- assert(_PyUnicode_CheckConsistency(res, 1));
- return res;
- }
+ if (size == 1)
+ return unicode_char(u[0]);
max_char = ucs2lib_find_max_char(u, u + size);
res = PyUnicode_New(size, max_char);
@@ -2004,22 +2014,8 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size)
if (size == 0)
_Py_RETURN_UNICODE_EMPTY();
assert(size > 0);
- if (size == 1) {
- Py_UCS4 ch = u[0];
- int kind;
- void *data;
- if (ch < 256)
- return get_latin1_char((unsigned char)ch);
-
- res = PyUnicode_New(1, ch);
- if (res == NULL)
- return NULL;
- kind = PyUnicode_KIND(res);
- data = PyUnicode_DATA(res);
- PyUnicode_WRITE(kind, data, 0, ch);
- assert(_PyUnicode_CheckConsistency(res, 1));
- return res;
- }
+ if (size == 1)
+ return unicode_char(u[0]);
max_char = ucs4lib_find_max_char(u, u + size);
res = PyUnicode_New(size, max_char);
@@ -2887,17 +2883,7 @@ PyUnicode_FromOrdinal(int ordinal)
return NULL;
}
- if ((Py_UCS4)ordinal < 256)
- return get_latin1_char((unsigned char)ordinal);
-
- v = PyUnicode_New(1, ordinal);
- if (v == NULL)
- return NULL;
- kind = PyUnicode_KIND(v);
- data = PyUnicode_DATA(v);
- PyUnicode_WRITE(kind, data, 0, ordinal);
- assert(_PyUnicode_CheckConsistency(v, 1));
- return v;
+ return unicode_char((Py_UCS4)ordinal);
}
PyObject *
@@ -11354,17 +11340,7 @@ unicode_getitem(PyObject *self, Py_ssize_t index)
kind = PyUnicode_KIND(self);
data = PyUnicode_DATA(self);
ch = PyUnicode_READ(kind, data, index);
- if (ch < 256)
- return get_latin1_char(ch);
-
- res = PyUnicode_New(1, ch);
- if (res == NULL)
- return NULL;
- kind = PyUnicode_KIND(res);
- data = PyUnicode_DATA(res);
- PyUnicode_WRITE(kind, data, 0, ch);
- assert(_PyUnicode_CheckConsistency(res, 1));
- return res;
+ return unicode_char(ch);
}
/* Believe it or not, this produces the same value for ASCII strings