summaryrefslogtreecommitdiffstats
path: root/Parser
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2007-11-06 21:34:58 (GMT)
committerGuido van Rossum <guido@python.org>2007-11-06 21:34:58 (GMT)
commit98297ee7815939b124156e438b22bd652d67b5db (patch)
treea9d239ebd87c73af2571ab48003984c4e18e27e5 /Parser
parenta19f80c6df2df5e8a5d0cff37131097835ef971e (diff)
downloadcpython-98297ee7815939b124156e438b22bd652d67b5db.zip
cpython-98297ee7815939b124156e438b22bd652d67b5db.tar.gz
cpython-98297ee7815939b124156e438b22bd652d67b5db.tar.bz2
Merging the py3k-pep3137 branch back into the py3k branch.
No detailed change log; just check out the change log for the py3k-pep3137 branch. The most obvious changes: - str8 renamed to bytes (PyString at the C level); - bytes renamed to buffer (PyBytes at the C level); - PyString and PyUnicode are no longer compatible. I.e. we now have an immutable bytes type and a mutable bytes type. The behavior of PyString was modified quite a bit, to make it more bytes-like. Some changes are still on the to-do list.
Diffstat (limited to 'Parser')
-rw-r--r--Parser/tokenizer.c23
1 files changed, 11 insertions, 12 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 5b3fd9e..099f6df 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -646,7 +646,7 @@ decode_str(const char *str, struct tok_state *tok)
"unknown encoding: %s", tok->enc);
return error_ret(tok);
}
- str = PyBytes_AsString(utf8);
+ str = PyString_AS_STRING(utf8);
}
assert(tok->decoding_buffer == NULL);
tok->decoding_buffer = utf8; /* CAUTION */
@@ -765,8 +765,8 @@ tok_nextc(register struct tok_state *tok)
tok->done = E_DECODE;
return EOF;
}
- buflen = PyBytes_Size(u);
- buf = PyBytes_AsString(u);
+ buflen = PyString_GET_SIZE(u);
+ buf = PyString_AS_STRING(u);
if (!buf) {
Py_DECREF(u);
tok->done = E_DECODE;
@@ -1550,7 +1550,7 @@ PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int* offset)
#else
static PyObject *
dec_utf8(const char *enc, const char *text, size_t len) {
- PyObject *ret = NULL;
+ PyObject *ret = NULL;
PyObject *unicode_text = PyUnicode_DecodeUTF8(text, len, "replace");
if (unicode_text) {
ret = PyUnicode_AsEncodedString(unicode_text, enc, "replace");
@@ -1560,7 +1560,7 @@ dec_utf8(const char *enc, const char *text, size_t len) {
PyErr_Clear();
}
else {
- assert(PyBytes_Check(ret));
+ assert(PyString_Check(ret));
}
return ret;
}
@@ -1573,8 +1573,8 @@ PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
/* convert source to original encondig */
PyObject *lineobj = dec_utf8(tok->encoding, tok->buf, len);
if (lineobj != NULL) {
- int linelen = PyBytes_GET_SIZE(lineobj);
- const char *line = PyBytes_AS_STRING(lineobj);
+ int linelen = PyString_GET_SIZE(lineobj);
+ const char *line = PyString_AS_STRING(lineobj);
text = PyObject_MALLOC(linelen + 1);
if (text != NULL && line != NULL) {
if (linelen)
@@ -1582,19 +1582,18 @@ PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
text[linelen] = '\0';
}
Py_DECREF(lineobj);
-
+
/* adjust error offset */
if (*offset > 1) {
- PyObject *offsetobj = dec_utf8(tok->encoding,
+ PyObject *offsetobj = dec_utf8(tok->encoding,
tok->buf,
*offset-1);
if (offsetobj) {
- *offset = 1 +
- PyBytes_GET_SIZE(offsetobj);
+ *offset = 1 + Py_Size(offsetobj);
Py_DECREF(offsetobj);
}
}
-
+
}
}
return text;