summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Objects/unicodectype.c62
1 files changed, 52 insertions, 10 deletions
diff --git a/Objects/unicodectype.c b/Objects/unicodectype.c
index 083fbe1..b432399 100644
--- a/Objects/unicodectype.c
+++ b/Objects/unicodectype.c
@@ -49,14 +49,24 @@ gettyperecord(Py_UNICODE code)
return &_PyUnicode_TypeRecords[index];
}
-/* Returns 1 for Unicode characters having the category 'Zl' or type
- 'B', 0 otherwise. */
+/* Returns 1 for Unicode characters having the category 'Zl', 'Zp' or
+ type 'B', 0 otherwise. */
-int _PyUnicode_IsLinebreak(Py_UNICODE ch)
+int _PyUnicode_IsLinebreak(register const Py_UNICODE ch)
{
- const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
-
- return (ctype->flags & LINEBREAK_MASK) != 0;
+ switch (ch) {
+ case 0x000A: /* LINE FEED */
+ case 0x000D: /* CARRIAGE RETURN */
+ case 0x001C: /* FILE SEPARATOR */
+ case 0x001D: /* GROUP SEPARATOR */
+ case 0x001E: /* RECORD SEPARATOR */
+ case 0x0085: /* NEXT LINE */
+ case 0x2028: /* LINE SEPARATOR */
+ case 0x2029: /* PARAGRAPH SEPARATOR */
+ return 1;
+ default:
+ return 0;
+ }
}
/* Returns the titlecase Unicode characters corresponding to ch or just
@@ -327,11 +337,43 @@ int _PyUnicode_IsNumeric(Py_UNICODE ch)
/* Returns 1 for Unicode characters having the bidirectional type
'WS', 'B' or 'S' or the category 'Zs', 0 otherwise. */
-int _PyUnicode_IsWhitespace(Py_UNICODE ch)
+int _PyUnicode_IsWhitespace(register const Py_UNICODE ch)
{
- const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
-
- return (ctype->flags & SPACE_MASK) != 0;
+ switch (ch) {
+ case 0x0009: /* HORIZONTAL TABULATION */
+ case 0x000A: /* LINE FEED */
+ case 0x000B: /* VERTICAL TABULATION */
+ case 0x000C: /* FORM FEED */
+ case 0x000D: /* CARRIAGE RETURN */
+ case 0x001C: /* FILE SEPARATOR */
+ case 0x001D: /* GROUP SEPARATOR */
+ case 0x001E: /* RECORD SEPARATOR */
+ case 0x001F: /* UNIT SEPARATOR */
+ case 0x0020: /* SPACE */
+ case 0x0085: /* NEXT LINE */
+ case 0x00A0: /* NO-BREAK SPACE */
+ case 0x1680: /* OGHAM SPACE MARK */
+ case 0x2000: /* EN QUAD */
+ case 0x2001: /* EM QUAD */
+ case 0x2002: /* EN SPACE */
+ case 0x2003: /* EM SPACE */
+ case 0x2004: /* THREE-PER-EM SPACE */
+ case 0x2005: /* FOUR-PER-EM SPACE */
+ case 0x2006: /* SIX-PER-EM SPACE */
+ case 0x2007: /* FIGURE SPACE */
+ case 0x2008: /* PUNCTUATION SPACE */
+ case 0x2009: /* THIN SPACE */
+ case 0x200A: /* HAIR SPACE */
+ case 0x200B: /* ZERO WIDTH SPACE */
+ case 0x2028: /* LINE SEPARATOR */
+ case 0x2029: /* PARAGRAPH SEPARATOR */
+ case 0x202F: /* NARROW NO-BREAK SPACE */
+ case 0x205F: /* MEDIUM MATHEMATICAL SPACE */
+ case 0x3000: /* IDEOGRAPHIC SPACE */
+ return 1;
+ default:
+ return 0;
+ }
}
/* Returns 1 for Unicode characters having the category 'Ll', 0