summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorFlorent Xicluna <florent.xicluna@gmail.com>2010-03-30 08:24:06 (GMT)
committerFlorent Xicluna <florent.xicluna@gmail.com>2010-03-30 08:24:06 (GMT)
commit22b243809eea5f4f504f9a7ac157539adde6b3f7 (patch)
treec910ac626faa4b6ec4565e1762b41089364aee5d /Objects
parente6410c536c9dca5a3a7899888c071f41a1767291 (diff)
downloadcpython-22b243809eea5f4f504f9a7ac157539adde6b3f7.zip
cpython-22b243809eea5f4f504f9a7ac157539adde6b3f7.tar.gz
cpython-22b243809eea5f4f504f9a7ac157539adde6b3f7.tar.bz2
#7643: Unicode codepoints VT (0x0B) and FF (0x0C) are linebreaks according to Unicode Standard Annex #14.
Diffstat (limited to 'Objects')
-rw-r--r--Objects/unicodeobject.c8
-rw-r--r--Objects/unicodetype_db.h9
2 files changed, 11 insertions, 6 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 4943413..930d58c 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -115,9 +115,9 @@ static char unicode_default_encoding[100];
/* Fast detection of the most frequent whitespace characters */
const unsigned char _Py_ascii_whitespace[] = {
0, 0, 0, 0, 0, 0, 0, 0,
-/* case 0x0009: * HORIZONTAL TABULATION */
+/* case 0x0009: * CHARACTER TABULATION */
/* case 0x000A: * LINE FEED */
-/* case 0x000B: * VERTICAL TABULATION */
+/* case 0x000B: * LINE TABULATION */
/* case 0x000C: * FORM FEED */
/* case 0x000D: * CARRIAGE RETURN */
0, 1, 1, 1, 1, 1, 0, 0,
@@ -147,8 +147,10 @@ const unsigned char _Py_ascii_whitespace[] = {
static unsigned char ascii_linebreak[] = {
0, 0, 0, 0, 0, 0, 0, 0,
/* 0x000A, * LINE FEED */
+/* 0x000B, * LINE TABULATION */
+/* 0x000C, * FORM FEED */
/* 0x000D, * CARRIAGE RETURN */
- 0, 0, 1, 0, 0, 1, 0, 0,
+ 0, 0, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
/* 0x001C, * FILE SEPARATOR */
/* 0x001D, * GROUP SEPARATOR */
diff --git a/Objects/unicodetype_db.h b/Objects/unicodetype_db.h
index 443693d..d2ec46b 100644
--- a/Objects/unicodetype_db.h
+++ b/Objects/unicodetype_db.h
@@ -661,7 +661,7 @@ static unsigned char index1[] = {
};
static unsigned char index2[] = {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 2, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 3, 3, 3, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1, 1, 1, 1, 1, 1, 1, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
@@ -3313,13 +3313,16 @@ int _PyUnicode_IsWhitespace(register const Py_UNICODE ch)
#endif
}
-/* Returns 1 for Unicode characters having the category 'Zl',
- * 'Zp' or type 'B', 0 otherwise.
+/* Returns 1 for Unicode characters having the line break
+ * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional
+ * type 'B', 0 otherwise.
*/
int _PyUnicode_IsLinebreak(register const Py_UNICODE ch)
{
switch (ch) {
case 0x000A:
+ case 0x000B:
+ case 0x000C:
case 0x000D:
case 0x001C:
case 0x001D: