summaryrefslogtreecommitdiffstats
path: root/Modules/unicodedata.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/unicodedata.c')
-rw-r--r--Modules/unicodedata.c286
1 files changed, 143 insertions, 143 deletions
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
index 92c312b..538c8ca 100644
--- a/Modules/unicodedata.c
+++ b/Modules/unicodedata.c
@@ -19,14 +19,14 @@
/* character properties */
typedef struct {
- const unsigned char category; /* index into
- _PyUnicode_CategoryNames */
- const unsigned char combining; /* combining class value 0 - 255 */
- const unsigned char bidirectional; /* index into
- _PyUnicode_BidirectionalNames */
- const unsigned char mirrored; /* true if mirrored in bidir mode */
- const unsigned char east_asian_width; /* index into
- _PyUnicode_EastAsianWidth */
+ const unsigned char category; /* index into
+ _PyUnicode_CategoryNames */
+ const unsigned char combining; /* combining class value 0 - 255 */
+ const unsigned char bidirectional; /* index into
+ _PyUnicode_BidirectionalNames */
+ const unsigned char mirrored; /* true if mirrored in bidir mode */
+ const unsigned char east_asian_width; /* index into
+ _PyUnicode_EastAsianWidth */
const unsigned char normalization_quick_check; /* see is_normalized() */
} _PyUnicode_DatabaseRecord;
@@ -67,7 +67,7 @@ typedef struct previous_version {
#define get_old_record(self, v) ((((PreviousDBVersion*)self)->getrecord)(v))
static PyMemberDef DB_members[] = {
- {"unidata_version", T_STRING, offsetof(PreviousDBVersion, name), READONLY},
+ {"unidata_version", T_STRING, offsetof(PreviousDBVersion, name), READONLY},
{NULL}
};
@@ -79,14 +79,14 @@ static PyObject*
new_previous_version(const char*name, const change_record* (*getrecord)(Py_UCS4),
Py_UCS4 (*normalization)(Py_UCS4))
{
- PreviousDBVersion *self;
- self = PyObject_New(PreviousDBVersion, &UCD_Type);
- if (self == NULL)
- return NULL;
- self->name = name;
- self->getrecord = getrecord;
+ PreviousDBVersion *self;
+ self = PyObject_New(PreviousDBVersion, &UCD_Type);
+ if (self == NULL)
+ return NULL;
+ self->name = name;
+ self->getrecord = getrecord;
self->normalization = normalization;
- return (PyObject*)self;
+ return (PyObject*)self;
}
@@ -95,12 +95,12 @@ static Py_UCS4 getuchar(PyUnicodeObject *obj)
Py_UNICODE *v = PyUnicode_AS_UNICODE(obj);
if (PyUnicode_GET_SIZE(obj) == 1)
- return *v;
+ return *v;
#ifndef Py_UNICODE_WIDE
else if ((PyUnicode_GET_SIZE(obj) == 2) &&
(0xD800 <= v[0] && v[0] <= 0xDBFF) &&
(0xDC00 <= v[1] && v[1] <= 0xDFFF))
- return (((v[0] & 0x3FF)<<10) | (v[1] & 0x3FF)) + 0x10000;
+ return (((v[0] & 0x3FF)<<10) | (v[1] & 0x3FF)) + 0x10000;
#endif
PyErr_SetString(PyExc_TypeError,
"need a single Unicode character as parameter");
@@ -137,7 +137,7 @@ unicodedata_decimal(PyObject *self, PyObject *args)
/* unassigned */
have_old = 1;
rc = -1;
- }
+ }
else if (old->decimal_changed != 0xFF) {
have_old = 1;
rc = old->decimal_changed;
@@ -147,15 +147,15 @@ unicodedata_decimal(PyObject *self, PyObject *args)
if (!have_old)
rc = Py_UNICODE_TODECIMAL(c);
if (rc < 0) {
- if (defobj == NULL) {
- PyErr_SetString(PyExc_ValueError,
- "not a decimal");
+ if (defobj == NULL) {
+ PyErr_SetString(PyExc_ValueError,
+ "not a decimal");
return NULL;
- }
- else {
- Py_INCREF(defobj);
- return defobj;
- }
+ }
+ else {
+ Py_INCREF(defobj);
+ return defobj;
+ }
}
return PyLong_FromLong(rc);
}
@@ -182,14 +182,14 @@ unicodedata_digit(PyObject *self, PyObject *args)
return NULL;
rc = Py_UNICODE_TODIGIT(c);
if (rc < 0) {
- if (defobj == NULL) {
- PyErr_SetString(PyExc_ValueError, "not a digit");
+ if (defobj == NULL) {
+ PyErr_SetString(PyExc_ValueError, "not a digit");
return NULL;
- }
- else {
- Py_INCREF(defobj);
- return defobj;
- }
+ }
+ else {
+ Py_INCREF(defobj);
+ return defobj;
+ }
}
return PyLong_FromLong(rc);
}
@@ -222,7 +222,7 @@ unicodedata_numeric(PyObject *self, PyObject *args)
/* unassigned */
have_old = 1;
rc = -1.0;
- }
+ }
else if (old->decimal_changed != 0xFF) {
have_old = 1;
rc = old->decimal_changed;
@@ -232,14 +232,14 @@ unicodedata_numeric(PyObject *self, PyObject *args)
if (!have_old)
rc = Py_UNICODE_TONUMERIC(c);
if (rc == -1.0) {
- if (defobj == NULL) {
- PyErr_SetString(PyExc_ValueError, "not a numeric character");
- return NULL;
- }
- else {
- Py_INCREF(defobj);
- return defobj;
- }
+ if (defobj == NULL) {
+ PyErr_SetString(PyExc_ValueError, "not a numeric character");
+ return NULL;
+ }
+ else {
+ Py_INCREF(defobj);
+ return defobj;
+ }
}
return PyFloat_FromDouble(rc);
}
@@ -258,8 +258,8 @@ unicodedata_category(PyObject *self, PyObject *args)
Py_UCS4 c;
if (!PyArg_ParseTuple(args, "O!:category",
- &PyUnicode_Type, &v))
- return NULL;
+ &PyUnicode_Type, &v))
+ return NULL;
c = getuchar(v);
if (c == (Py_UCS4)-1)
return NULL;
@@ -287,8 +287,8 @@ unicodedata_bidirectional(PyObject *self, PyObject *args)
Py_UCS4 c;
if (!PyArg_ParseTuple(args, "O!:bidirectional",
- &PyUnicode_Type, &v))
- return NULL;
+ &PyUnicode_Type, &v))
+ return NULL;
c = getuchar(v);
if (c == (Py_UCS4)-1)
return NULL;
@@ -318,8 +318,8 @@ unicodedata_combining(PyObject *self, PyObject *args)
Py_UCS4 c;
if (!PyArg_ParseTuple(args, "O!:combining",
- &PyUnicode_Type, &v))
- return NULL;
+ &PyUnicode_Type, &v))
+ return NULL;
c = getuchar(v);
if (c == (Py_UCS4)-1)
return NULL;
@@ -347,8 +347,8 @@ unicodedata_mirrored(PyObject *self, PyObject *args)
Py_UCS4 c;
if (!PyArg_ParseTuple(args, "O!:mirrored",
- &PyUnicode_Type, &v))
- return NULL;
+ &PyUnicode_Type, &v))
+ return NULL;
c = getuchar(v);
if (c == (Py_UCS4)-1)
return NULL;
@@ -377,8 +377,8 @@ unicodedata_east_asian_width(PyObject *self, PyObject *args)
Py_UCS4 c;
if (!PyArg_ParseTuple(args, "O!:east_asian_width",
- &PyUnicode_Type, &v))
- return NULL;
+ &PyUnicode_Type, &v))
+ return NULL;
c = getuchar(v);
if (c == (Py_UCS4)-1)
return NULL;
@@ -408,8 +408,8 @@ unicodedata_decomposition(PyObject *self, PyObject *args)
Py_UCS4 c;
if (!PyArg_ParseTuple(args, "O!:decomposition",
- &PyUnicode_Type, &v))
- return NULL;
+ &PyUnicode_Type, &v))
+ return NULL;
c = getuchar(v);
if (c == (Py_UCS4)-1)
return NULL;
@@ -455,7 +455,7 @@ unicodedata_decomposition(PyObject *self, PyObject *args)
decomp_data[++index]);
i += strlen(decomp + i);
}
-
+
decomp[i] = '\0';
return PyUnicode_FromString(decomp);
@@ -466,7 +466,7 @@ get_decomp_record(PyObject *self, Py_UCS4 code, int *index, int *prefix, int *co
{
if (code >= 0x110000) {
*index = 0;
- } else if (self && UCD_Check(self) &&
+ } else if (self && UCD_Check(self) &&
get_old_record(self, code)->category_changed==0) {
/* unassigned in old version */
*index = 0;
@@ -476,7 +476,7 @@ get_decomp_record(PyObject *self, Py_UCS4 code, int *index, int *prefix, int *co
*index = decomp_index2[(*index<<DECOMP_SHIFT)+
(code&((1<<DECOMP_SHIFT)-1))];
}
-
+
/* high byte is number of hex bytes (usually one or two), low byte
is prefix code (from*/
*count = decomp_data[*index] >> 8;
@@ -501,11 +501,11 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)
PyObject *result;
Py_UNICODE *i, *end, *o;
/* Longest decomposition in Unicode 3.2: U+FDFA */
- Py_UNICODE stack[20];
+ Py_UNICODE stack[20];
Py_ssize_t space, isize;
int index, prefix, count, stackptr;
unsigned char prev, cur;
-
+
stackptr = 0;
isize = PyUnicode_GET_SIZE(input);
/* Overallocate atmost 10 characters. */
@@ -642,12 +642,12 @@ nfc_nfkc(PyObject *self, PyObject *input, int k)
i = PyUnicode_AS_UNICODE(result);
end = i + PyUnicode_GET_SIZE(result);
o = PyUnicode_AS_UNICODE(result);
-
+
again:
while (i < end) {
for (index = 0; index < cskipped; index++) {
if (skipped[index] == i) {
- /* *i character is skipped.
+ /* *i character is skipped.
Remove from list. */
skipped[index] = skipped[cskipped-1];
cskipped--;
@@ -658,7 +658,7 @@ nfc_nfkc(PyObject *self, PyObject *input, int k)
/* Hangul Composition. We don't need to check for <LV,T>
pairs, since we always have decomposed data. */
if (LBase <= *i && *i < (LBase+LCount) &&
- i + 1 < end &&
+ i + 1 < end &&
VBase <= i[1] && i[1] <= (VBase+VCount)) {
int LIndex, VIndex;
LIndex = i[0] - LBase;
@@ -707,7 +707,7 @@ nfc_nfkc(PyObject *self, PyObject *input, int k)
(index&((1<<COMP_SHIFT)-1))];
if (code == 0)
goto not_combinable;
-
+
/* Replace the original character. */
*i = code;
/* Mark the second character unused. */
@@ -891,29 +891,29 @@ _getucname(PyObject *self, Py_UCS4 code, char* buffer, int buflen)
if (old->category_changed == 0) {
/* unassigned */
return 0;
- }
+ }
}
if (SBase <= code && code < SBase+SCount) {
- /* Hangul syllable. */
- int SIndex = code - SBase;
- int L = SIndex / NCount;
- int V = (SIndex % NCount) / TCount;
- int T = SIndex % TCount;
-
- if (buflen < 27)
- /* Worst case: HANGUL SYLLABLE <10chars>. */
- return 0;
- strcpy(buffer, "HANGUL SYLLABLE ");
- buffer += 16;
- strcpy(buffer, hangul_syllables[L][0]);
- buffer += strlen(hangul_syllables[L][0]);
- strcpy(buffer, hangul_syllables[V][1]);
- buffer += strlen(hangul_syllables[V][1]);
- strcpy(buffer, hangul_syllables[T][2]);
- buffer += strlen(hangul_syllables[T][2]);
- *buffer = '\0';
- return 1;
+ /* Hangul syllable. */
+ int SIndex = code - SBase;
+ int L = SIndex / NCount;
+ int V = (SIndex % NCount) / TCount;
+ int T = SIndex % TCount;
+
+ if (buflen < 27)
+ /* Worst case: HANGUL SYLLABLE <10chars>. */
+ return 0;
+ strcpy(buffer, "HANGUL SYLLABLE ");
+ buffer += 16;
+ strcpy(buffer, hangul_syllables[L][0]);
+ buffer += strlen(hangul_syllables[L][0]);
+ strcpy(buffer, hangul_syllables[V][1]);
+ buffer += strlen(hangul_syllables[V][1]);
+ strcpy(buffer, hangul_syllables[T][2]);
+ buffer += strlen(hangul_syllables[T][2]);
+ *buffer = '\0';
+ return 1;
}
if (is_unified_ideograph(code)) {
@@ -980,23 +980,23 @@ _cmpname(PyObject *self, int code, const char* name, int namelen)
return buffer[namelen] == '\0';
}
-static void
+static void
find_syllable(const char *str, int *len, int *pos, int count, int column)
{
int i, len1;
*len = -1;
for (i = 0; i < count; i++) {
- char *s = hangul_syllables[i][column];
- len1 = strlen(s);
- if (len1 <= *len)
- continue;
- if (strncmp(str, s, len1) == 0) {
- *len = len1;
- *pos = i;
- }
+ char *s = hangul_syllables[i][column];
+ len1 = strlen(s);
+ if (len1 <= *len)
+ continue;
+ if (strncmp(str, s, len1) == 0) {
+ *len = len1;
+ *pos = i;
+ }
}
if (*len == -1) {
- *len = 0;
+ *len = 0;
}
}
@@ -1009,18 +1009,18 @@ _getcode(PyObject* self, const char* name, int namelen, Py_UCS4* code)
/* Check for hangul syllables. */
if (strncmp(name, "HANGUL SYLLABLE ", 16) == 0) {
- int len, L = -1, V = -1, T = -1;
- const char *pos = name + 16;
- find_syllable(pos, &len, &L, LCount, 0);
- pos += len;
- find_syllable(pos, &len, &V, VCount, 1);
- pos += len;
- find_syllable(pos, &len, &T, TCount, 2);
- pos += len;
- if (L != -1 && V != -1 && T != -1 && pos-name == namelen) {
- *code = SBase + (L*VCount+V)*TCount + T;
- return 1;
- }
+ int len, L = -1, V = -1, T = -1;
+ const char *pos = name + 16;
+ find_syllable(pos, &len, &L, LCount, 0);
+ pos += len;
+ find_syllable(pos, &len, &V, VCount, 1);
+ pos += len;
+ find_syllable(pos, &len, &T, TCount, 2);
+ pos += len;
+ if (L != -1 && V != -1 && T != -1 && pos-name == namelen) {
+ *code = SBase + (L*VCount+V)*TCount + T;
+ return 1;
+ }
/* Otherwise, it's an illegal syllable name. */
return 0;
}
@@ -1080,7 +1080,7 @@ _getcode(PyObject* self, const char* name, int namelen, Py_UCS4* code)
}
}
-static const _PyUnicode_Name_CAPI hashAPI =
+static const _PyUnicode_Name_CAPI hashAPI =
{
sizeof(_PyUnicode_Name_CAPI),
_getucname,
@@ -1112,14 +1112,14 @@ unicodedata_name(PyObject* self, PyObject* args)
return NULL;
if (!_getucname(self, c, name, sizeof(name))) {
- if (defobj == NULL) {
- PyErr_SetString(PyExc_ValueError, "no such name");
+ if (defobj == NULL) {
+ PyErr_SetString(PyExc_ValueError, "no such name");
return NULL;
- }
- else {
- Py_INCREF(defobj);
- return defobj;
- }
+ }
+ else {
+ Py_INCREF(defobj);
+ return defobj;
+ }
}
return PyUnicode_FromString(name);
@@ -1157,7 +1157,7 @@ unicodedata_lookup(PyObject* self, PyObject* args)
}
#endif
str[0] = (Py_UNICODE) code;
- return PyUnicode_FromUnicode(str, 1);
+ return PyUnicode_FromUnicode(str, 1);
}
/* XXX Add doc strings. */
@@ -1182,27 +1182,27 @@ static PyMethodDef unicodedata_functions[] = {
{"lookup", unicodedata_lookup, METH_VARARGS, unicodedata_lookup__doc__},
{"normalize", unicodedata_normalize, METH_VARARGS,
unicodedata_normalize__doc__},
- {NULL, NULL} /* sentinel */
+ {NULL, NULL} /* sentinel */
};
static PyTypeObject UCD_Type = {
- /* The ob_type field must be initialized in the module init function
- * to be portable to Windows without using C++. */
- PyVarObject_HEAD_INIT(NULL, 0)
- "unicodedata.UCD", /*tp_name*/
- sizeof(PreviousDBVersion), /*tp_basicsize*/
- 0, /*tp_itemsize*/
- /* methods */
- (destructor)PyObject_Del, /*tp_dealloc*/
- 0, /*tp_print*/
- 0, /*tp_getattr*/
- 0, /*tp_setattr*/
- 0, /*tp_reserved*/
- 0, /*tp_repr*/
- 0, /*tp_as_number*/
- 0, /*tp_as_sequence*/
- 0, /*tp_as_mapping*/
- 0, /*tp_hash*/
+ /* The ob_type field must be initialized in the module init function
+ * to be portable to Windows without using C++. */
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "unicodedata.UCD", /*tp_name*/
+ sizeof(PreviousDBVersion), /*tp_basicsize*/
+ 0, /*tp_itemsize*/
+ /* methods */
+ (destructor)PyObject_Del, /*tp_dealloc*/
+ 0, /*tp_print*/
+ 0, /*tp_getattr*/
+ 0, /*tp_setattr*/
+ 0, /*tp_reserved*/
+ 0, /*tp_repr*/
+ 0, /*tp_as_number*/
+ 0, /*tp_as_sequence*/
+ 0, /*tp_as_mapping*/
+ 0, /*tp_hash*/
0, /*tp_call*/
0, /*tp_str*/
PyObject_GenericGetAttr,/*tp_getattro*/
@@ -1243,15 +1243,15 @@ http://www.unicode.org/reports/tr44/tr44-4.html).");
static struct PyModuleDef unicodedatamodule = {
- PyModuleDef_HEAD_INIT,
- "unicodedata",
- unicodedata_docstring,
- -1,
- unicodedata_functions,
- NULL,
- NULL,
- NULL,
- NULL
+ PyModuleDef_HEAD_INIT,
+ "unicodedata",
+ unicodedata_docstring,
+ -1,
+ unicodedata_functions,
+ NULL,
+ NULL,
+ NULL,
+ NULL
};
PyMODINIT_FUNC
@@ -1281,7 +1281,7 @@ PyInit_unicodedata(void)
return m;
}
-/*
+/*
Local variables:
c-basic-offset: 4
indent-tabs-mode: nil