1 files changed, 47 insertions, 20 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 1b4522d..75dec86 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -85,7 +85,7 @@ Copyright (c) Corporation for National Research Initiatives.
 
 /* Py_UNICODE was the native Unicode storage format (code unit) used by
    Python and represents a single Unicode element in the Unicode type.
-   With PEP 393, Py_UNICODE is deprected and replaced with a
+   With PEP 393, Py_UNICODE is deprecated and replaced with a
    typedef to wchar_t. */
 
 #ifndef Py_LIMITED_API
@@ -115,7 +115,7 @@ typedef wchar_t Py_UNICODE;
 #  include <wchar.h>
 #endif
 
-/* Py_UCS4 and Py_UCS2 are typdefs for the respecitve
+/* Py_UCS4 and Py_UCS2 are typedefs for the respective
    unicode representations. */
 #if SIZEOF_INT >= 4
 typedef unsigned int Py_UCS4;
@@ -288,10 +288,27 @@ typedef struct {
         unsigned int interned:2;
         /* Character size:
 
-           PyUnicode_WCHAR_KIND (0): wchar_t*
-           PyUnicode_1BYTE_KIND (1): Py_UCS1*
-           PyUnicode_2BYTE_KIND (2): Py_UCS2*
-           PyUnicode_4BYTE_KIND (3): Py_UCS4*
+           - PyUnicode_WCHAR_KIND (0):
+
+             * character type = wchar_t (16 or 32 bits, depending on the
+               platform)
+
+           - PyUnicode_1BYTE_KIND (1):
+
+             * character type = Py_UCS1 (8 bits, unsigned)
+             * if ascii is set, all characters must be in range
+               U+0000-U+007F, otherwise at least one character must be in range
+               U+0080-U+00FF
+
+           - PyUnicode_2BYTE_KIND (2):
+
+             * character type = Py_UCS2 (16 bits, unsigned)
+             * at least one character must be in range U+0100-U+FFFF
+
+           - PyUnicode_4BYTE_KIND (3):
+
+             * character type = Py_UCS4 (32 bits, unsigned)
+             * at least one character must be in range U+10000-U+10FFFF
          */
         unsigned int kind:2;
         /* Compact is with respect to the allocation scheme. Compact unicode
@@ -299,9 +316,9 @@ typedef struct {
            one block for the PyUnicodeObject struct and another for its data
            buffer. */
         unsigned int compact:1;
-        /* kind is PyUnicode_1BYTE_KIND but data contains only ASCII
-           characters. If ascii is 1 and compact is 1, use the PyASCIIObject
-           structure. */
+        /* The string only contains characters in range U+0000-U+007F (ASCII)
+           and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
+           set, use the PyASCIIObject structure. */
         unsigned int ascii:1;
         /* The ready flag indicates whether the object layout is initialized
            completely. This means that this is either a compact object, or
@@ -313,7 +330,7 @@ typedef struct {
 } PyASCIIObject;
 
 /* Non-ASCII strings allocated through PyUnicode_New use the
-   PyCompactUnicodeOject structure. state.compact is set, and the data
+   PyCompactUnicodeObject structure. state.compact is set, and the data
    immediately follow the structure. */
 typedef struct {
     PyASCIIObject _base;
@@ -382,7 +399,7 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
     ((const char *)(PyUnicode_AS_UNICODE(op)))
 
 
-/* --- Flexible String Representaion Helper Macros (PEP 393) -------------- */
+/* --- Flexible String Representation Helper Macros (PEP 393) -------------- */
 
 /* Values for PyUnicodeObject.state: */
 
@@ -426,7 +443,7 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
 #define PyUnicode_CHARACTER_SIZE(op) \
     (1 << (PyUnicode_KIND(op) - 1))
 
-/* Return pointers to the canonical representation casted as unsigned char,
+/* Return pointers to the canonical representation cast to unsigned char,
    Py_UCS2, or Py_UCS4 for direct character access.
    No checks are performed, use PyUnicode_CHARACTER_SIZE or
    PyUnicode_KIND() before to ensure these will work correctly. */
@@ -468,9 +485,9 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
 
 /* Write into the canonical representation, this macro does not do any sanity
    checks and is intended for usage in loops.  The caller should cache the
-   kind and data pointers optained form other macro calls.
+   kind and data pointers obtained from other macro calls.
    index is the index in the string (starts at 0) and value is the new
-   code point value which shoule be written to that location. */
+   code point value which should be written to that location. */
 #define PyUnicode_WRITE(kind, data, index, value) \
     do { \
         switch ((kind)) { \
@@ -489,7 +506,7 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
         } \
     } while (0)
 
-/* Read a code point form the string's canonical representation.  No checks
+/* Read a code point from the string's canonical representation.  No checks
    or ready calls are performed. */
 #define PyUnicode_READ(kind, data, index) \
     ((Py_UCS4) \
@@ -542,7 +559,7 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
 
 /* Return a maximum character value which is suitable for creating another
    string based on op.  This is always an approximation but more efficient
-   than interating over the string. */
+   than iterating over the string. */
 #define PyUnicode_MAX_CHAR_VALUE(op) \
     (assert(PyUnicode_IS_READY(op)),                                    \
      (PyUnicode_IS_COMPACT_ASCII(op) ? 0x7f:                            \
@@ -654,6 +671,8 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromString(
     const char *u              /* UTF-8 encoded string */
     );
 
+/* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.
+   Scan the string to find the maximum character. */
 #ifndef Py_LIMITED_API
 PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
     int kind,
@@ -934,8 +953,8 @@ PyAPI_FUNC(int) PyUnicode_ClearFreeList(void);
 
    In case of an error, no *size is set.
 
-   This funcation caches the UTF-8 encoded string in the unicodeobject
-   and subsequent calls will return the same string.  The memory is relased
+   This function caches the UTF-8 encoded string in the unicodeobject
+   and subsequent calls will return the same string.  The memory is released
    when the unicodeobject is deallocated.
 
    _PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to
@@ -1585,7 +1604,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(
 
    These are capable of handling Unicode objects and strings on input
    (we refer to them as strings in the descriptions) and return
-   Unicode objects or integers as apporpriate. */
+   Unicode objects or integers as appropriate. */
 
 /* Concat two strings giving a new Unicode string. */
 
@@ -1765,7 +1784,7 @@ PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
 /* Rich compare two strings and return one of the following:
 
    - NULL in case an exception was raised
-   - Py_True or Py_False for successfuly comparisons
+   - Py_True or Py_False for successfully comparisons
    - Py_NotImplemented in case the type combination is unknown
 
    Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in
@@ -1833,6 +1852,7 @@ PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGroupingLocale(Py_UNICODE *buff
    see Objects/stringlib/localeutil.h */
 #ifndef Py_LIMITED_API
 PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
+    PyObject *unicode,
     int kind,
     void *buffer,
     Py_ssize_t n_buffer,
@@ -2011,6 +2031,13 @@ PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy(
     );
 #endif /* Py_LIMITED_API */
 
+#if defined(Py_DEBUG) && !defined(Py_LIMITED_API)
+/* FIXME: use PyObject* type for op */
+PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
+    void *op,
+    int check_content);
+#endif
+
 #ifdef __cplusplus
 }
 #endif