summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2008-03-17 20:43:42 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2008-03-17 20:43:42 (GMT)
commit259314622750c72de2ef377e77a0b70b8d8b2fb5 (patch)
tree089ad865c7be59bf68fd72e0d5c18c12d831e345
parentddaa7064ee81c48adc4fdea327892c29179f7845 (diff)
downloadcpython-259314622750c72de2ef377e77a0b70b8d8b2fb5.zip
cpython-259314622750c72de2ef377e77a0b70b8d8b2fb5.tar.gz
cpython-259314622750c72de2ef377e77a0b70b8d8b2fb5.tar.bz2
Bug #2301: Don't try decoding the source code into the original
encoding for syntax errors.
-rw-r--r--Lib/test/test_pep263.py7
-rw-r--r--Misc/NEWS6
-rw-r--r--Parser/parsetok.c15
-rw-r--r--Parser/tokenizer.c64
4 files changed, 18 insertions, 74 deletions
diff --git a/Lib/test/test_pep263.py b/Lib/test/test_pep263.py
index cc126ba..92065c9 100644
--- a/Lib/test/test_pep263.py
+++ b/Lib/test/test_pep263.py
@@ -23,6 +23,13 @@ class PEP263Test(unittest.TestCase):
exec(c, d)
self.assertEqual(d['u'], '\xf3')
+ def test_issue2301(self):
+ try:
+ compile(b"# coding: cp932\nprint '\x94\x4e'", "dummy", "exec")
+ except SyntaxError as v:
+ self.assertEquals(v.text, "print '\u5e74'")
+ else:
+ self.fail()
def test_main():
test_support.run_unittest(PEP263Test)
diff --git a/Misc/NEWS b/Misc/NEWS
index 1665256..6c38150 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -9,6 +9,12 @@ What's New in Python 3.0a4?
*Release date: XX-XXX-2008*
+Core and Builtins
+-----------------
+
+- Bug #2301: Don't try decoding the source code into the original
+ encoding for syntax errors.
+
Extension Modules
-----------------
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
index 0b3314e..708c26d 100644
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -213,21 +213,16 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
err_ret->error = E_EOF;
err_ret->lineno = tok->lineno;
if (tok->buf != NULL) {
- char *text = NULL;
size_t len;
assert(tok->cur - tok->buf < INT_MAX);
err_ret->offset = (int)(tok->cur - tok->buf);
len = tok->inp - tok->buf;
- text = PyTokenizer_RestoreEncoding(tok, len, &err_ret->offset);
- if (text == NULL) {
- text = (char *) PyObject_MALLOC(len + 1);
- if (text != NULL) {
- if (len > 0)
- strncpy(text, tok->buf, len);
- text[len] = '\0';
- }
+ err_ret->text = (char *) PyObject_MALLOC(len + 1);
+ if (err_ret->text != NULL) {
+ if (len > 0)
+ strncpy(err_ret->text, tok->buf, len);
+ err_ret->text[len] = '\0';
}
- err_ret->text = text;
}
} else if (tok->encoding != NULL) {
node* r = PyNode_New(encoding_decl);
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 2833e53..0b8341a 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1579,70 +1579,6 @@ PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
return result;
}
-/* This function is only called from parsetok. However, it cannot live
- there, as it must be empty for PGEN, and we can check for PGEN only
- in this file. */
-
-#ifdef PGEN
-char*
-PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int* offset)
-{
- return NULL;
-}
-#else
-static PyObject *
-dec_utf8(const char *enc, const char *text, size_t len) {
- PyObject *ret = NULL;
- PyObject *unicode_text = PyUnicode_DecodeUTF8(text, len, "replace");
- if (unicode_text) {
- ret = PyUnicode_AsEncodedString(unicode_text, enc, "replace");
- Py_DECREF(unicode_text);
- }
- if (!ret) {
- PyErr_Clear();
- }
- else {
- assert(PyString_Check(ret));
- }
- return ret;
-}
-
-char *
-PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
-{
- char *text = NULL;
- if (tok->encoding) {
- /* convert source to original encondig */
- PyObject *lineobj = dec_utf8(tok->encoding, tok->buf, len);
- if (lineobj != NULL) {
- int linelen = PyString_GET_SIZE(lineobj);
- const char *line = PyString_AS_STRING(lineobj);
- text = PyObject_MALLOC(linelen + 1);
- if (text != NULL && line != NULL) {
- if (linelen)
- strncpy(text, line, linelen);
- text[linelen] = '\0';
- }
- Py_DECREF(lineobj);
-
- /* adjust error offset */
- if (*offset > 1) {
- PyObject *offsetobj = dec_utf8(tok->encoding,
- tok->buf,
- *offset-1);
- if (offsetobj) {
- *offset = 1 + Py_SIZE(offsetobj);
- Py_DECREF(offsetobj);
- }
- }
-
- }
- }
- return text;
-
-}
-#endif
-
/* Get -*- encoding -*- from a Python file.
PyTokenizer_FindEncoding returns NULL when it can't find the encoding in