diff options
author | Benjamin Peterson <benjamin@python.org> | 2009-03-02 23:31:26 (GMT) |
---|---|---|
committer | Benjamin Peterson <benjamin@python.org> | 2009-03-02 23:31:26 (GMT) |
commit | f5b52246ed8a1191c3aa1da7d3c63bbe11aee020 (patch) | |
tree | dcac9a1daca7f583ff958cb61f73e9ed11b5564b | |
parent | 0663a1ed793c164fb11d3dd62bebc677e260891e (diff) | |
download | cpython-f5b52246ed8a1191c3aa1da7d3c63bbe11aee020.zip cpython-f5b52246ed8a1191c3aa1da7d3c63bbe11aee020.tar.gz cpython-f5b52246ed8a1191c3aa1da7d3c63bbe11aee020.tar.bz2 |
ignore the coding cookie in compile(), exec(), and eval() if the source is a string #4626
-rw-r--r-- | Include/parsetok.h | 2 | ||||
-rw-r--r-- | Include/pythonrun.h | 1 | ||||
-rw-r--r-- | Lib/test/test_coding.py | 6 | ||||
-rw-r--r-- | Lib/test/test_pep263.py | 6 | ||||
-rw-r--r-- | Misc/NEWS | 3 | ||||
-rw-r--r-- | Parser/parsetok.c | 6 | ||||
-rw-r--r-- | Parser/tokenizer.c | 22 | ||||
-rw-r--r-- | Parser/tokenizer.h | 1 | ||||
-rw-r--r-- | Python/bltinmodule.c | 16 | ||||
-rw-r--r-- | Python/pythonrun.c | 14 |
10 files changed, 63 insertions, 14 deletions
diff --git a/Include/parsetok.h b/Include/parsetok.h index 81f1771..fa402f8 100644 --- a/Include/parsetok.h +++ b/Include/parsetok.h @@ -29,6 +29,8 @@ typedef struct { #define PyPARSE_UNICODE_LITERALS 0x0008 #endif +#define PyPARSE_IGNORE_COOKIE 0x0010 + PyAPI_FUNC(node *) PyParser_ParseString(const char *, grammar *, int, perrdetail *); PyAPI_FUNC(node *) PyParser_ParseFile (FILE *, const char *, grammar *, int, diff --git a/Include/pythonrun.h b/Include/pythonrun.h index e57b7f0..c909e1a 100644 --- a/Include/pythonrun.h +++ b/Include/pythonrun.h @@ -12,6 +12,7 @@ extern "C" { #define PyCF_SOURCE_IS_UTF8 0x0100 #define PyCF_DONT_IMPLY_DEDENT 0x0200 #define PyCF_ONLY_AST 0x0400 +#define PyCF_IGNORE_COOKIE 0x0800 typedef struct { int cf_flags; /* bitmask of CO_xxx flags relevant to future */ diff --git a/Lib/test/test_coding.py b/Lib/test/test_coding.py index ade8bdf..51873b4 100644 --- a/Lib/test/test_coding.py +++ b/Lib/test/test_coding.py @@ -17,10 +17,10 @@ class CodingTest(unittest.TestCase): path = os.path.dirname(__file__) filename = os.path.join(path, module_name + '.py') - fp = open(filename, encoding='utf-8') - text = fp.read() + fp = open(filename, "rb") + bytes = fp.read() fp.close() - self.assertRaises(SyntaxError, compile, text, filename, 'exec') + self.assertRaises(SyntaxError, compile, bytes, filename, 'exec') def test_exec_valid_coding(self): d = {} diff --git a/Lib/test/test_pep263.py b/Lib/test/test_pep263.py index 72764f9..05ca47f 100644 --- a/Lib/test/test_pep263.py +++ b/Lib/test/test_pep263.py @@ -30,6 +30,12 @@ class PEP263Test(unittest.TestCase): else: self.fail() + def test_issue4626(self): + c = compile("# coding=latin-1\n\u00c6 = '\u00c6'", "dummy", "exec") + d = {} + exec(c, d) + self.assertEquals(d['\xc6'], '\xc6') + def test_main(): support.run_unittest(PEP263Test) @@ -19,6 +19,9 @@ Core and Builtins - Issue #5249: time.strftime returned malformed string when format string contained non ascii character on windows. +- Issue #4626: compile(), exec(), and eval() ignore the coding cookie if the + source has already been decoded into str. + - Issue #5186: Reduce hash collisions for objects with no __hash__ method by rotating the object pointer by 4 bits to the right. diff --git a/Parser/parsetok.c b/Parser/parsetok.c index d8ff6ee..4c3b506 100644 --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -49,7 +49,11 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename, initerr(err_ret, filename); - if ((tok = PyTokenizer_FromString(s)) == NULL) { + if (*flags & PyPARSE_IGNORE_COOKIE) + tok = PyTokenizer_FromUTF8(s); + else + tok = PyTokenizer_FromString(s); + if (tok == NULL) { err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM; return NULL; } diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 3d52bed..c4f447d 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -715,6 +715,28 @@ PyTokenizer_FromString(const char *str) return tok; } +struct tok_state * +PyTokenizer_FromUTF8(const char *str) +{ + struct tok_state *tok = tok_new(); + if (tok == NULL) + return NULL; + tok->decoding_state = STATE_RAW; + tok->read_coding_spec = 1; + tok->enc = NULL; + tok->str = str; + tok->encoding = (char *)PyMem_MALLOC(6); + if (!tok->encoding) { + PyTokenizer_Free(tok); + return NULL; + } + strcpy(tok->encoding, "utf-8"); + + /* XXX: constify members. */ + tok->buf = tok->cur = tok->end = tok->inp = (char*)str; + return tok; +} + /* Set up tokenizer for file */ diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index df9cbc7..e3328f1 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -61,6 +61,7 @@ struct tok_state { }; extern struct tok_state *PyTokenizer_FromString(const char *); +extern struct tok_state *PyTokenizer_FromUTF8(const char *); extern struct tok_state *PyTokenizer_FromFile(FILE *, char*, char *, char *); extern void PyTokenizer_Free(struct tok_state *); diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 9805697..7a27fba 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -494,12 +494,13 @@ PyDoc_STR( static char * -source_as_string(PyObject *cmd, char *funcname, char *what) +source_as_string(PyObject *cmd, char *funcname, char *what, PyCompilerFlags *cf) { char *str; Py_ssize_t size; if (PyUnicode_Check(cmd)) { + cf->cf_flags |= PyCF_IGNORE_COOKIE; cmd = _PyUnicode_AsDefaultEncodedString(cmd, NULL); if (cmd == NULL) return NULL; @@ -591,7 +592,7 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) return result; } - str = source_as_string(cmd, "compile", "string, bytes, AST or code"); + str = source_as_string(cmd, "compile", "string, bytes, AST or code", &cf); if (str == NULL) return NULL; @@ -703,14 +704,14 @@ builtin_eval(PyObject *self, PyObject *args) return PyEval_EvalCode((PyCodeObject *) cmd, globals, locals); } - str = source_as_string(cmd, "eval", "string, bytes or code"); + cf.cf_flags = PyCF_SOURCE_IS_UTF8; + str = source_as_string(cmd, "eval", "string, bytes or code", &cf); if (str == NULL) return NULL; while (*str == ' ' || *str == '\t') str++; - cf.cf_flags = PyCF_SOURCE_IS_UTF8; (void)PyEval_MergeCompilerFlags(&cf); result = PyRun_StringFlags(str, Py_eval_input, globals, locals, &cf); Py_XDECREF(tmp); @@ -779,12 +780,13 @@ builtin_exec(PyObject *self, PyObject *args) v = PyEval_EvalCode((PyCodeObject *) prog, globals, locals); } else { - char *str = source_as_string(prog, "exec", - "string, bytes or code"); + char *str; PyCompilerFlags cf; + cf.cf_flags = PyCF_SOURCE_IS_UTF8; + str = source_as_string(prog, "exec", + "string, bytes or code", &cf); if (str == NULL) return NULL; - cf.cf_flags = PyCF_SOURCE_IS_UTF8; if (PyEval_MergeCompilerFlags(&cf)) v = PyRun_StringFlags(str, Py_file_input, globals, locals, &cf); diff --git a/Python/pythonrun.c b/Python/pythonrun.c index 65c6f5f..dee18b6 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -1002,9 +1002,17 @@ PyRun_InteractiveLoopFlags(FILE *fp, const char *filename, PyCompilerFlags *flag } /* compute parser flags based on compiler flags */ -#define PARSER_FLAGS(flags) \ - ((flags) ? ((((flags)->cf_flags & PyCF_DONT_IMPLY_DEDENT) ? \ - PyPARSE_DONT_IMPLY_DEDENT : 0)) : 0) +static int PARSER_FLAGS(PyCompilerFlags *flags) +{ + int parser_flags = 0; + if (!flags) + return 0; + if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT) + parser_flags |= PyPARSE_DONT_IMPLY_DEDENT; + if (flags->cf_flags & PyCF_IGNORE_COOKIE) + parser_flags |= PyPARSE_IGNORE_COOKIE; + return parser_flags; +} #if 0 /* Keep an example of flags with future keyword support. */ |