summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEzio Melotti <ezio.melotti@gmail.com>2010-07-03 04:52:19 (GMT)
committerEzio Melotti <ezio.melotti@gmail.com>2010-07-03 04:52:19 (GMT)
commit9bf2b3ae6a21c254bdd0d8f2dfbebd320494452e (patch)
tree62e293d3bc58c483a5f92fce0b767f9df2995a45
parent2f194b906027db630e731021429746eb214b0a7a (diff)
downloadcpython-9bf2b3ae6a21c254bdd0d8f2dfbebd320494452e.zip
cpython-9bf2b3ae6a21c254bdd0d8f2dfbebd320494452e.tar.gz
cpython-9bf2b3ae6a21c254bdd0d8f2dfbebd320494452e.tar.bz2
Update comment about surrogates.
-rw-r--r--Objects/unicodeobject.c10
1 files changed, 5 insertions, 5 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 83bc422..dba3d36 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2450,11 +2450,11 @@ PyObject *PyUnicode_DecodeUTF8Stateful(const char *s,
break;
case 3:
- /* XXX: surrogates shouldn't be valid UTF-8!
- see http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf
- (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt
- Uncomment the 2 lines below to make them invalid,
- codepoints: d800-dfff; UTF-8: \xed\xa0\x80-\xed\xbf\xbf. */
+ /* Decoding UTF-8 sequences in range \xed\xa0\x80-\xed\xbf\xbf
+ will result in surrogates in range d800-dfff. Surrogates are
+ not valid UTF-8 so they are rejected.
+ See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf
+ (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */
if ((s[1] & 0xc0) != 0x80 ||
(s[2] & 0xc0) != 0x80 ||
((unsigned char)s[0] == 0xE0 &&