diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2006-05-27 08:36:52 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2006-05-27 08:36:52 (GMT) |
commit | d004fc810af3e1985686e616763e14a1b0aa60c1 (patch) | |
tree | 84d813c5639518d647144fb8befbfcb524052175 | |
parent | d1b6cd7bfb35ffecd8b52d9d2b3cd608ab6515e0 (diff) | |
download | cpython-d004fc810af3e1985686e616763e14a1b0aa60c1.zip cpython-d004fc810af3e1985686e616763e14a1b0aa60c1.tar.gz cpython-d004fc810af3e1985686e616763e14a1b0aa60c1.tar.bz2 |
Patch 1494554: Update numeric properties to Unicode 4.1.
-rw-r--r-- | Doc/api/concrete.tex | 2 | ||||
-rw-r--r-- | Lib/test/test_unicodedata.py | 4 | ||||
-rw-r--r-- | Misc/NEWS | 3 | ||||
-rw-r--r-- | Modules/unicodedata.c | 4 | ||||
-rw-r--r-- | Objects/unicodectype.c | 333 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 1 |
6 files changed, 336 insertions, 11 deletions
diff --git a/Doc/api/concrete.tex b/Doc/api/concrete.tex index 8390a72..10247ab 100644 --- a/Doc/api/concrete.tex +++ b/Doc/api/concrete.tex @@ -965,7 +965,7 @@ These APIs can be used for fast direct character conversions: \end{cfuncdesc} \begin{cfuncdesc}{double}{Py_UNICODE_TONUMERIC}{Py_UNICODE ch} - Return the character \var{ch} converted to a (positive) double. + Return the character \var{ch} converted to a double. Return \code{-1.0} if this is not possible. This macro does not raise exceptions. \end{cfuncdesc} diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py index c4b5cf3..0023bf4 100644 --- a/Lib/test/test_unicodedata.py +++ b/Lib/test/test_unicodedata.py @@ -16,7 +16,7 @@ encoding = 'utf-8' class UnicodeMethodsTest(unittest.TestCase): # update this, if the database changes - expectedchecksum = 'a6555cd209d960dcfa17bfdce0c96d91cfa9a9ba' + expectedchecksum = 'c198ed264497f108434b3f576d4107237221cc8a' def test_method_checksum(self): h = hashlib.sha1() @@ -75,7 +75,7 @@ class UnicodeDatabaseTest(unittest.TestCase): class UnicodeFunctionsTest(UnicodeDatabaseTest): # update this, if the database changes - expectedchecksum = 'b45b79f3203ee1a896d9b5655484adaff5d4964b' + expectedchecksum = '4e389f97e9f88b8b7ab743121fd643089116f9f2' def test_function_checksum(self): data = [] @@ -12,6 +12,9 @@ What's New in Python 2.5 alpha 3? Core and builtins ----------------- +- Patch #1494554: Update unicodedata.numeric and unicode.isnumeric to + Unicode 4.1. + - Patch #921466: sys.path_importer_cache is now used to cache valid and invalid file paths for the built-in import machinery which leads to fewer open calls on startup. diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c index 297611c..0660353 100644 --- a/Modules/unicodedata.c +++ b/Modules/unicodedata.c @@ -209,7 +209,7 @@ unicodedata_numeric(PyObject *self, PyObject *args) if (old->category_changed == 0) { /* unassigned */ have_old = 1; - rc = -1; + rc = -1.0; } else if (old->decimal_changed != 0xFF) { have_old = 1; @@ -219,7 +219,7 @@ unicodedata_numeric(PyObject *self, PyObject *args) if (!have_old) rc = Py_UNICODE_TONUMERIC(*PyUnicode_AS_UNICODE(v)); - if (rc < 0) { + if (rc == -1.0) { if (defobj == NULL) { PyErr_SetString(PyExc_ValueError, "not a numeric character"); return NULL; diff --git a/Objects/unicodectype.c b/Objects/unicodectype.c index b432399..73def09 100644 --- a/Objects/unicodectype.c +++ b/Objects/unicodectype.c @@ -140,20 +140,48 @@ int _PyUnicode_IsDigit(Py_UNICODE ch) double _PyUnicode_ToNumeric(Py_UNICODE ch) { switch (ch) { + case 0x0F33: + return (double) -1 / 2; + case 0x17F0: case 0x3007: +#ifdef Py_UNICODE_WIDE + case 0x1018A: +#endif return (double) 0; case 0x09F4: + case 0x17F1: case 0x215F: case 0x2160: case 0x2170: case 0x3021: + case 0x3192: + case 0x3220: case 0x3280: +#ifdef Py_UNICODE_WIDE + case 0x10107: + case 0x10142: + case 0x10158: + case 0x10159: + case 0x1015A: + case 0x10320: + case 0x103D1: +#endif return (double) 1; case 0x00BD: + case 0x0F2A: + case 0x2CFD: +#ifdef Py_UNICODE_WIDE + case 0x10141: + case 0x10175: + case 0x10176: +#endif return (double) 1 / 2; case 0x2153: return (double) 1 / 3; case 0x00BC: +#ifdef Py_UNICODE_WIDE + case 0x10140: +#endif return (double) 1 / 4; case 0x2155: return (double) 1 / 5; @@ -168,92 +196,201 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch) case 0x2469: case 0x247D: case 0x2491: + case 0x24FE: case 0x277F: case 0x2789: case 0x2793: case 0x3038: + case 0x3229: case 0x3289: +#ifdef Py_UNICODE_WIDE + case 0x10110: + case 0x10149: + case 0x10150: + case 0x10157: + case 0x10160: + case 0x10161: + case 0x10162: + case 0x10163: + case 0x10164: + case 0x10322: + case 0x103D3: + case 0x10A44: +#endif return (double) 10; case 0x0BF1: case 0x137B: case 0x216D: case 0x217D: +#ifdef Py_UNICODE_WIDE + case 0x10119: + case 0x1014B: + case 0x10152: + case 0x1016A: + case 0x103D5: + case 0x10A46: +#endif return (double) 100; case 0x0BF2: case 0x216F: case 0x217F: case 0x2180: +#ifdef Py_UNICODE_WIDE + case 0x10122: + case 0x1014D: + case 0x10154: + case 0x10171: + case 0x10A47: +#endif return (double) 1000; case 0x137C: case 0x2182: +#ifdef Py_UNICODE_WIDE + case 0x1012B: + case 0x10155: +#endif return (double) 10000; case 0x216A: case 0x217A: case 0x246A: case 0x247E: case 0x2492: + case 0x24EB: return (double) 11; + case 0x0F2F: + return (double) 11 / 2; case 0x216B: case 0x217B: case 0x246B: case 0x247F: case 0x2493: + case 0x24EC: return (double) 12; case 0x246C: case 0x2480: case 0x2494: + case 0x24ED: return (double) 13; + case 0x0F30: + return (double) 13 / 2; case 0x246D: case 0x2481: case 0x2495: + case 0x24EE: return (double) 14; case 0x246E: case 0x2482: case 0x2496: + case 0x24EF: return (double) 15; + case 0x0F31: + return (double) 15 / 2; case 0x09F9: case 0x246F: case 0x2483: case 0x2497: + case 0x24F0: return (double) 16; case 0x16EE: case 0x2470: case 0x2484: case 0x2498: + case 0x24F1: return (double) 17; + case 0x0F32: + return (double) 17 / 2; case 0x16EF: case 0x2471: case 0x2485: case 0x2499: + case 0x24F2: return (double) 18; case 0x16F0: case 0x2472: case 0x2486: case 0x249A: + case 0x24F3: return (double) 19; case 0x09F5: + case 0x17F2: case 0x2161: case 0x2171: case 0x3022: + case 0x3193: + case 0x3221: case 0x3281: +#ifdef Py_UNICODE_WIDE + case 0x10108: + case 0x1015B: + case 0x1015C: + case 0x1015D: + case 0x1015E: + case 0x103D2: +#endif return (double) 2; case 0x2154: +#ifdef Py_UNICODE_WIDE + case 0x10177: +#endif return (double) 2 / 3; case 0x2156: - return (double) 2 / 5; + return (double) 2 / 5; case 0x1373: case 0x2473: case 0x2487: case 0x249B: + case 0x24F4: case 0x3039: - return (double) 20; +#ifdef Py_UNICODE_WIDE + case 0x10111: + case 0x103D4: + case 0x10A45: +#endif + return (double) 20; +#ifdef Py_UNICODE_WIDE + case 0x1011A: + return (double) 200; + case 0x10123: + return (double) 2000; + case 0x1012C: + return (double) 20000; +#endif + case 0x3251: + return (double) 21; + case 0x3252: + return (double) 22; + case 0x3253: + return (double) 23; + case 0x3254: + return (double) 24; + case 0x3255: + return (double) 25; + case 0x3256: + return (double) 26; + case 0x3257: + return (double) 27; + case 0x3258: + return (double) 28; + case 0x3259: + return (double) 29; case 0x09F6: + case 0x17F3: case 0x2162: case 0x2172: case 0x3023: + case 0x3194: + case 0x3222: case 0x3282: +#ifdef Py_UNICODE_WIDE + case 0x10109: +#endif return (double) 3; + case 0x0F2B: + return (double) 3 / 2; case 0x00BE: +#ifdef Py_UNICODE_WIDE + case 0x10178: +#endif return (double) 3 / 4; case 0x2157: return (double) 3 / 5; @@ -261,22 +398,103 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch) return (double) 3 / 8; case 0x1374: case 0x303A: + case 0x325A: +#ifdef Py_UNICODE_WIDE + case 0x10112: + case 0x10165: +#endif return (double) 30; +#ifdef Py_UNICODE_WIDE + case 0x1011B: + case 0x1016B: + return (double) 300; + case 0x10124: + return (double) 3000; + case 0x1012D: + return (double) 30000; +#endif + case 0x325B: + return (double) 31; + case 0x325C: + return (double) 32; + case 0x325D: + return (double) 33; + case 0x325E: + return (double) 34; + case 0x325F: + return (double) 35; + case 0x32B1: + return (double) 36; + case 0x32B2: + return (double) 37; + case 0x32B3: + return (double) 38; + case 0x32B4: + return (double) 39; case 0x09F7: + case 0x17F4: case 0x2163: case 0x2173: case 0x3024: + case 0x3195: + case 0x3223: case 0x3283: +#ifdef Py_UNICODE_WIDE + case 0x1010A: +#endif return (double) 4; case 0x2158: return (double) 4 / 5; case 0x1375: - return (double) 40; + case 0x32B5: +#ifdef Py_UNICODE_WIDE + case 0x10113: +#endif + return (double) 40; +#ifdef Py_UNICODE_WIDE + case 0x1011C: + return (double) 400; + case 0x10125: + return (double) 4000; + case 0x1012E: + return (double) 40000; +#endif + case 0x32B6: + return (double) 41; + case 0x32B7: + return (double) 42; + case 0x32B8: + return (double) 43; + case 0x32B9: + return (double) 44; + case 0x32BA: + return (double) 45; + case 0x32BB: + return (double) 46; + case 0x32BC: + return (double) 47; + case 0x32BD: + return (double) 48; + case 0x32BE: + return (double) 49; + case 0x17F5: case 0x2164: case 0x2174: case 0x3025: + case 0x3224: case 0x3284: +#ifdef Py_UNICODE_WIDE + case 0x1010B: + case 0x10143: + case 0x10148: + case 0x1014F: + case 0x1015F: + case 0x10173: + case 0x10321: +#endif return (double) 5; + case 0x0F2C: + return (double) 5 / 2; case 0x215A: return (double) 5 / 6; case 0x215D: @@ -284,42 +502,147 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch) case 0x1376: case 0x216C: case 0x217C: + case 0x32BF: +#ifdef Py_UNICODE_WIDE + case 0x10114: + case 0x10144: + case 0x1014A: + case 0x10151: + case 0x10166: + case 0x10167: + case 0x10168: + case 0x10169: + case 0x10174: + case 0x10323: +#endif return (double) 50; case 0x216E: case 0x217E: +#ifdef Py_UNICODE_WIDE + case 0x1011D: + case 0x10145: + case 0x1014C: + case 0x10153: + case 0x1016C: + case 0x1016D: + case 0x1016E: + case 0x1016F: + case 0x10170: +#endif return (double) 500; case 0x2181: +#ifdef Py_UNICODE_WIDE + case 0x10126: + case 0x10146: + case 0x1014E: + case 0x10172: +#endif return (double) 5000; +#ifdef Py_UNICODE_WIDE + case 0x1012F: + case 0x10147: + case 0x10156: + return (double) 50000; +#endif + case 0x17F6: case 0x2165: case 0x2175: case 0x3026: + case 0x3225: case 0x3285: +#ifdef Py_UNICODE_WIDE + case 0x1010C: +#endif return (double) 6; case 0x1377: +#ifdef Py_UNICODE_WIDE + case 0x10115: +#endif return (double) 60; +#ifdef Py_UNICODE_WIDE + case 0x1011E: + return (double) 600; + case 0x10127: + return (double) 6000; + case 0x10130: + return (double) 60000; +#endif + case 0x17F7: case 0x2166: case 0x2176: case 0x3027: + case 0x3226: case 0x3286: +#ifdef Py_UNICODE_WIDE + case 0x1010D: +#endif return (double) 7; + case 0x0F2D: + return (double) 7 / 2; case 0x215E: return (double) 7 / 8; case 0x1378: +#ifdef Py_UNICODE_WIDE + case 0x10116: +#endif return (double) 70; +#ifdef Py_UNICODE_WIDE + case 0x1011F: + return (double) 700; + case 0x10128: + return (double) 7000; + case 0x10131: + return (double) 70000; +#endif + case 0x17F8: case 0x2167: case 0x2177: case 0x3028: + case 0x3227: case 0x3287: +#ifdef Py_UNICODE_WIDE + case 0x1010E: +#endif return (double) 8; case 0x1379: +#ifdef Py_UNICODE_WIDE + case 0x10117: +#endif return (double) 80; +#ifdef Py_UNICODE_WIDE + case 0x10120: + return (double) 800; + case 0x10129: + return (double) 8000; + case 0x10132: + return (double) 80000; +#endif + case 0x17F9: case 0x2168: case 0x2178: case 0x3029: + case 0x3228: case 0x3288: +#ifdef Py_UNICODE_WIDE + case 0x1010F: +#endif return (double) 9; + case 0x0F2E: + return (double) 9 / 2; case 0x137A: +#ifdef Py_UNICODE_WIDE + case 0x10118: +#endif return (double) 90; +#ifdef Py_UNICODE_WIDE + case 0x10121: + case 0x1034A: + return (double) 900; + case 0x1012A: + return (double) 9000; + case 0x10133: + return (double) 90000; +#endif default: return (double) _PyUnicode_ToDigit(ch); } @@ -327,9 +650,7 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch) int _PyUnicode_IsNumeric(Py_UNICODE ch) { - if (_PyUnicode_ToNumeric(ch) < 0.0) - return 0; - return 1; + return _PyUnicode_ToNumeric(ch) != -1.0; } #ifndef WANT_WCTYPE_FUNCTIONS diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 783eb8f..26cf521 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2376,6 +2376,7 @@ PyObject *_PyUnicode_DecodeUnicodeInternal(const char *s, end = s + size; while (s < end) { + *p = *(Py_UNICODE*)s; memcpy(p, s, sizeof(Py_UNICODE)); /* We have to sanity check the raw data, otherwise doom looms for some malformed UCS-4 data. */ |