Merged revisions 79494,79496 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/trunk ........ r79494 | florent.xicluna | 2010-03-30 10:24:06 +0200 (mar, 30 mar 2010) | 2 lines #7643: Unicode codepoints VT (0x0B) and FF (0x0C) are linebreaks according to Unicode Standard Annex #14. ........ r79496 | florent.xicluna | 2010-03-30 18:29:03 +0200 (mar, 30 mar 2010) | 2 lines Highlight the change of behavior related to r79494. Now VT and FF are linebreaks. ........
author: Florent Xicluna <florent.xicluna@gmail.com> 2010-03-30 19:34:18 (GMT)
committer: Florent Xicluna <florent.xicluna@gmail.com> 2010-03-30 19:34:18 (GMT)
commit: 806d8cf0e8056726580e210e1dea960d6e77c910 (patch)
tree: ed95ffd06d353ecdffdbdacba271d5dda71f80aa /Objects
parent: 364129ef5a806bf919b5d321206cc1b72aed7272 (diff)
download: cpython-806d8cf0e8056726580e210e1dea960d6e77c910.zip
cpython-806d8cf0e8056726580e210e1dea960d6e77c910.tar.gz
cpython-806d8cf0e8056726580e210e1dea960d6e77c910.tar.bz2
2 files changed, 11 insertions, 6 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 28b8c66..a409b22 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -126,9 +126,9 @@ static const char unicode_default_encoding[] = "utf-8";
 /* Fast detection of the most frequent whitespace characters */
 const unsigned char _Py_ascii_whitespace[] = {
     0, 0, 0, 0, 0, 0, 0, 0,
-/*     case 0x0009: * HORIZONTAL TABULATION */
+/*     case 0x0009: * CHARACTER TABULATION */
 /*     case 0x000A: * LINE FEED */
-/*     case 0x000B: * VERTICAL TABULATION */
+/*     case 0x000B: * LINE TABULATION */
 /*     case 0x000C: * FORM FEED */
 /*     case 0x000D: * CARRIAGE RETURN */
     0, 1, 1, 1, 1, 1, 0, 0,
@@ -163,8 +163,10 @@ static PyObject *unicode_encode_call_errorhandler(const char *errors,
 static unsigned char ascii_linebreak[] = {
     0, 0, 0, 0, 0, 0, 0, 0,
 /*         0x000A, * LINE FEED */
+/*         0x000B, * LINE TABULATION */
+/*         0x000C, * FORM FEED */
 /*         0x000D, * CARRIAGE RETURN */
-    0, 0, 1, 0, 0, 1, 0, 0,
+    0, 0, 1, 1, 1, 1, 0, 0,
     0, 0, 0, 0, 0, 0, 0, 0,
 /*         0x001C, * FILE SEPARATOR */
 /*         0x001D, * GROUP SEPARATOR */
diff --git a/Objects/unicodetype_db.h b/Objects/unicodetype_db.h
index 8c8955c..424a317 100644
--- a/Objects/unicodetype_db.h
+++ b/Objects/unicodetype_db.h
@@ -694,7 +694,7 @@ static unsigned char index1[] = {
 };
 
 static unsigned char index2[] = {
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 2, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
     1, 1, 1, 1, 3, 3, 3, 2, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
     6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 5, 5, 5, 5, 5, 5, 5, 16, 16, 16, 16, 
     16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 
@@ -3395,13 +3395,16 @@ int _PyUnicode_IsWhitespace(register const Py_UNICODE ch)
 #endif
 }
 
-/* Returns 1 for Unicode characters having the category 'Zl',
- * 'Zp' or type 'B', 0 otherwise.
+/* Returns 1 for Unicode characters having the line break
+ * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional
+ * type 'B', 0 otherwise.
  */
 int _PyUnicode_IsLinebreak(register const Py_UNICODE ch)
 {
     switch (ch) {
     case 0x000A:
+    case 0x000B:
+    case 0x000C:
     case 0x000D:
     case 0x001C:
     case 0x001D:
author	Florent Xicluna <florent.xicluna@gmail.com>	2010-03-30 19:34:18 (GMT)
committer	Florent Xicluna <florent.xicluna@gmail.com>	2010-03-30 19:34:18 (GMT)
commit	806d8cf0e8056726580e210e1dea960d6e77c910 (patch)
tree	ed95ffd06d353ecdffdbdacba271d5dda71f80aa /Objects
parent	364129ef5a806bf919b5d321206cc1b72aed7272 (diff)
download	cpython-806d8cf0e8056726580e210e1dea960d6e77c910.zip cpython-806d8cf0e8056726580e210e1dea960d6e77c910.tar.gz cpython-806d8cf0e8056726580e210e1dea960d6e77c910.tar.bz2