diff options
author | Guido van Rossum <guido@python.org> | 2007-11-06 21:34:58 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2007-11-06 21:34:58 (GMT) |
commit | 98297ee7815939b124156e438b22bd652d67b5db (patch) | |
tree | a9d239ebd87c73af2571ab48003984c4e18e27e5 /Parser/tokenizer.c | |
parent | a19f80c6df2df5e8a5d0cff37131097835ef971e (diff) | |
download | cpython-98297ee7815939b124156e438b22bd652d67b5db.zip cpython-98297ee7815939b124156e438b22bd652d67b5db.tar.gz cpython-98297ee7815939b124156e438b22bd652d67b5db.tar.bz2 |
Merging the py3k-pep3137 branch back into the py3k branch.
No detailed change log; just check out the change log for the py3k-pep3137
branch. The most obvious changes:
- str8 renamed to bytes (PyString at the C level);
- bytes renamed to buffer (PyBytes at the C level);
- PyString and PyUnicode are no longer compatible.
I.e. we now have an immutable bytes type and a mutable bytes type.
The behavior of PyString was modified quite a bit, to make it more
bytes-like. Some changes are still on the to-do list.
Diffstat (limited to 'Parser/tokenizer.c')
-rw-r--r-- | Parser/tokenizer.c | 23 |
1 files changed, 11 insertions, 12 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 5b3fd9e..099f6df 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -646,7 +646,7 @@ decode_str(const char *str, struct tok_state *tok) "unknown encoding: %s", tok->enc); return error_ret(tok); } - str = PyBytes_AsString(utf8); + str = PyString_AS_STRING(utf8); } assert(tok->decoding_buffer == NULL); tok->decoding_buffer = utf8; /* CAUTION */ @@ -765,8 +765,8 @@ tok_nextc(register struct tok_state *tok) tok->done = E_DECODE; return EOF; } - buflen = PyBytes_Size(u); - buf = PyBytes_AsString(u); + buflen = PyString_GET_SIZE(u); + buf = PyString_AS_STRING(u); if (!buf) { Py_DECREF(u); tok->done = E_DECODE; @@ -1550,7 +1550,7 @@ PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int* offset) #else static PyObject * dec_utf8(const char *enc, const char *text, size_t len) { - PyObject *ret = NULL; + PyObject *ret = NULL; PyObject *unicode_text = PyUnicode_DecodeUTF8(text, len, "replace"); if (unicode_text) { ret = PyUnicode_AsEncodedString(unicode_text, enc, "replace"); @@ -1560,7 +1560,7 @@ dec_utf8(const char *enc, const char *text, size_t len) { PyErr_Clear(); } else { - assert(PyBytes_Check(ret)); + assert(PyString_Check(ret)); } return ret; } @@ -1573,8 +1573,8 @@ PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset) /* convert source to original encondig */ PyObject *lineobj = dec_utf8(tok->encoding, tok->buf, len); if (lineobj != NULL) { - int linelen = PyBytes_GET_SIZE(lineobj); - const char *line = PyBytes_AS_STRING(lineobj); + int linelen = PyString_GET_SIZE(lineobj); + const char *line = PyString_AS_STRING(lineobj); text = PyObject_MALLOC(linelen + 1); if (text != NULL && line != NULL) { if (linelen) @@ -1582,19 +1582,18 @@ PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset) text[linelen] = '\0'; } Py_DECREF(lineobj); - + /* adjust error offset */ if (*offset > 1) { - PyObject *offsetobj = dec_utf8(tok->encoding, + PyObject *offsetobj = dec_utf8(tok->encoding, tok->buf, *offset-1); if (offsetobj) { - *offset = 1 + - PyBytes_GET_SIZE(offsetobj); + *offset = 1 + Py_Size(offsetobj); Py_DECREF(offsetobj); } } - + } } return text; |