summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/test/test_pep3131.py3
-rw-r--r--Lib/test/test_unicode.py1
-rw-r--r--Misc/NEWS3
-rw-r--r--Objects/unicodeobject.c31
4 files changed, 30 insertions, 8 deletions
diff --git a/Lib/test/test_pep3131.py b/Lib/test/test_pep3131.py
index 9d5f217..ed7558a 100644
--- a/Lib/test/test_pep3131.py
+++ b/Lib/test/test_pep3131.py
@@ -8,9 +8,12 @@ class PEP3131Test(unittest.TestCase):
รค = 1
ยต = 2 # this is a compatibility character
่Ÿ’ = 3
+ ๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข = 4
self.assertEqual(getattr(T, "\xe4"), 1)
self.assertEqual(getattr(T, "\u03bc"), 2)
self.assertEqual(getattr(T, '\u87d2'), 3)
+ v = getattr(T, "\U0001d518\U0001d52b\U0001d526\U0001d520\U0001d52c\U0001d521\U0001d522")
+ self.assertEqual(v, 4)
def test_invalid(self):
try:
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 1da44b0..19b31c0 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -404,6 +404,7 @@ class UnicodeTest(string_tests.CommonTest,
self.assertTrue("bc".isidentifier())
self.assertTrue("b_".isidentifier())
self.assertTrue("ยต".isidentifier())
+ self.assertTrue("๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข".isidentifier())
self.assertFalse(" ".isidentifier())
self.assertFalse("[".isidentifier())
diff --git a/Misc/NEWS b/Misc/NEWS
index 34aac79..360e731 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ What's New in Python 3.3 Alpha 1?
Core and Builtins
-----------------
+- Issue #12732: In narrow unicode builds, allow Unicode identifiers which fall
+ outside the BMP.
+
- Issue #12575: Validate user-generated AST before it is compiled.
- Make type(None), type(Ellipsis), and type(NotImplemented) callable. They
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 0918671..61b253d 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -8044,14 +8044,30 @@ unicode_isnumeric(PyUnicodeObject *self)
return PyBool_FromLong(1);
}
+static Py_UCS4
+decode_ucs4(const Py_UNICODE *s, Py_ssize_t *i, Py_ssize_t size)
+{
+ Py_UCS4 ch;
+ assert(*i < size);
+ ch = s[(*i)++];
+#ifndef Py_UNICODE_WIDE
+ if ((ch & 0xfffffc00) == 0xd800 &&
+ *i < size
+ && (s[*i] & 0xFFFFFC00) == 0xDC00)
+ ch = ((Py_UCS4)ch << 10UL) + (Py_UCS4)(s[(*i)++]) - 0x35fdc00;
+#endif
+ return ch;
+}
+
int
PyUnicode_IsIdentifier(PyObject *self)
{
- register const Py_UNICODE *p = PyUnicode_AS_UNICODE((PyUnicodeObject*)self);
- register const Py_UNICODE *e;
+ Py_ssize_t i = 0, size = PyUnicode_GET_SIZE(self);
+ Py_UCS4 first;
+ const Py_UNICODE *p = PyUnicode_AS_UNICODE((PyUnicodeObject*)self);
/* Special case for empty strings */
- if (PyUnicode_GET_SIZE(self) == 0)
+ if (!size)
return 0;
/* PEP 3131 says that the first character must be in
@@ -8062,14 +8078,13 @@ PyUnicode_IsIdentifier(PyObject *self)
definition of XID_Start and XID_Continue, it is sufficient
to check just for these, except that _ must be allowed
as starting an identifier. */
- if (!_PyUnicode_IsXidStart(*p) && *p != 0x5F /* LOW LINE */)
+ first = decode_ucs4(p, &i, size);
+ if (!_PyUnicode_IsXidStart(first) && first != 0x5F /* LOW LINE */)
return 0;
- e = p + PyUnicode_GET_SIZE(self);
- for (p++; p < e; p++) {
- if (!_PyUnicode_IsXidContinue(*p))
+ while (i < size)
+ if (!_PyUnicode_IsXidContinue(decode_ucs4(p, &i, size)))
return 0;
- }
return 1;
}