diff options
-rw-r--r-- | Objects/unicodectype.c | 62 |
1 files changed, 52 insertions, 10 deletions
diff --git a/Objects/unicodectype.c b/Objects/unicodectype.c index 083fbe1..b432399 100644 --- a/Objects/unicodectype.c +++ b/Objects/unicodectype.c @@ -49,14 +49,24 @@ gettyperecord(Py_UNICODE code) return &_PyUnicode_TypeRecords[index]; } -/* Returns 1 for Unicode characters having the category 'Zl' or type - 'B', 0 otherwise. */ +/* Returns 1 for Unicode characters having the category 'Zl', 'Zp' or + type 'B', 0 otherwise. */ -int _PyUnicode_IsLinebreak(Py_UNICODE ch) +int _PyUnicode_IsLinebreak(register const Py_UNICODE ch) { - const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); - - return (ctype->flags & LINEBREAK_MASK) != 0; + switch (ch) { + case 0x000A: /* LINE FEED */ + case 0x000D: /* CARRIAGE RETURN */ + case 0x001C: /* FILE SEPARATOR */ + case 0x001D: /* GROUP SEPARATOR */ + case 0x001E: /* RECORD SEPARATOR */ + case 0x0085: /* NEXT LINE */ + case 0x2028: /* LINE SEPARATOR */ + case 0x2029: /* PARAGRAPH SEPARATOR */ + return 1; + default: + return 0; + } } /* Returns the titlecase Unicode characters corresponding to ch or just @@ -327,11 +337,43 @@ int _PyUnicode_IsNumeric(Py_UNICODE ch) /* Returns 1 for Unicode characters having the bidirectional type 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise. */ -int _PyUnicode_IsWhitespace(Py_UNICODE ch) +int _PyUnicode_IsWhitespace(register const Py_UNICODE ch) { - const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); - - return (ctype->flags & SPACE_MASK) != 0; + switch (ch) { + case 0x0009: /* HORIZONTAL TABULATION */ + case 0x000A: /* LINE FEED */ + case 0x000B: /* VERTICAL TABULATION */ + case 0x000C: /* FORM FEED */ + case 0x000D: /* CARRIAGE RETURN */ + case 0x001C: /* FILE SEPARATOR */ + case 0x001D: /* GROUP SEPARATOR */ + case 0x001E: /* RECORD SEPARATOR */ + case 0x001F: /* UNIT SEPARATOR */ + case 0x0020: /* SPACE */ + case 0x0085: /* NEXT LINE */ + case 0x00A0: /* NO-BREAK SPACE */ + case 0x1680: /* OGHAM SPACE MARK */ + case 0x2000: /* EN QUAD */ + case 0x2001: /* EM QUAD */ + case 0x2002: /* EN SPACE */ + case 0x2003: /* EM SPACE */ + case 0x2004: /* THREE-PER-EM SPACE */ + case 0x2005: /* FOUR-PER-EM SPACE */ + case 0x2006: /* SIX-PER-EM SPACE */ + case 0x2007: /* FIGURE SPACE */ + case 0x2008: /* PUNCTUATION SPACE */ + case 0x2009: /* THIN SPACE */ + case 0x200A: /* HAIR SPACE */ + case 0x200B: /* ZERO WIDTH SPACE */ + case 0x2028: /* LINE SEPARATOR */ + case 0x2029: /* PARAGRAPH SEPARATOR */ + case 0x202F: /* NARROW NO-BREAK SPACE */ + case 0x205F: /* MEDIUM MATHEMATICAL SPACE */ + case 0x3000: /* IDEOGRAPHIC SPACE */ + return 1; + default: + return 0; + } } /* Returns 1 for Unicode characters having the category 'Ll', 0 |