diff options
author | Florent Xicluna <florent.xicluna@gmail.com> | 2010-03-30 08:24:06 (GMT) |
---|---|---|
committer | Florent Xicluna <florent.xicluna@gmail.com> | 2010-03-30 08:24:06 (GMT) |
commit | 22b243809eea5f4f504f9a7ac157539adde6b3f7 (patch) | |
tree | c910ac626faa4b6ec4565e1762b41089364aee5d /Objects | |
parent | e6410c536c9dca5a3a7899888c071f41a1767291 (diff) | |
download | cpython-22b243809eea5f4f504f9a7ac157539adde6b3f7.zip cpython-22b243809eea5f4f504f9a7ac157539adde6b3f7.tar.gz cpython-22b243809eea5f4f504f9a7ac157539adde6b3f7.tar.bz2 |
#7643: Unicode codepoints VT (0x0B) and FF (0x0C) are linebreaks according to Unicode Standard Annex #14.
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/unicodeobject.c | 8 | ||||
-rw-r--r-- | Objects/unicodetype_db.h | 9 |
2 files changed, 11 insertions, 6 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 4943413..930d58c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -115,9 +115,9 @@ static char unicode_default_encoding[100]; /* Fast detection of the most frequent whitespace characters */ const unsigned char _Py_ascii_whitespace[] = { 0, 0, 0, 0, 0, 0, 0, 0, -/* case 0x0009: * HORIZONTAL TABULATION */ +/* case 0x0009: * CHARACTER TABULATION */ /* case 0x000A: * LINE FEED */ -/* case 0x000B: * VERTICAL TABULATION */ +/* case 0x000B: * LINE TABULATION */ /* case 0x000C: * FORM FEED */ /* case 0x000D: * CARRIAGE RETURN */ 0, 1, 1, 1, 1, 1, 0, 0, @@ -147,8 +147,10 @@ const unsigned char _Py_ascii_whitespace[] = { static unsigned char ascii_linebreak[] = { 0, 0, 0, 0, 0, 0, 0, 0, /* 0x000A, * LINE FEED */ +/* 0x000B, * LINE TABULATION */ +/* 0x000C, * FORM FEED */ /* 0x000D, * CARRIAGE RETURN */ - 0, 0, 1, 0, 0, 1, 0, 0, + 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x001C, * FILE SEPARATOR */ /* 0x001D, * GROUP SEPARATOR */ diff --git a/Objects/unicodetype_db.h b/Objects/unicodetype_db.h index 443693d..d2ec46b 100644 --- a/Objects/unicodetype_db.h +++ b/Objects/unicodetype_db.h @@ -661,7 +661,7 @@ static unsigned char index1[] = { }; static unsigned char index2[] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 2, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1, 1, 1, 1, 1, 1, 1, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, @@ -3313,13 +3313,16 @@ int _PyUnicode_IsWhitespace(register const Py_UNICODE ch) #endif } -/* Returns 1 for Unicode characters having the category 'Zl', - * 'Zp' or type 'B', 0 otherwise. +/* Returns 1 for Unicode characters having the line break + * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional + * type 'B', 0 otherwise. */ int _PyUnicode_IsLinebreak(register const Py_UNICODE ch) { switch (ch) { case 0x000A: + case 0x000B: + case 0x000C: case 0x000D: case 0x001C: case 0x001D: |