summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Peterson <benjamin@python.org>2009-03-02 23:31:26 (GMT)
committerBenjamin Peterson <benjamin@python.org>2009-03-02 23:31:26 (GMT)
commitf5b52246ed8a1191c3aa1da7d3c63bbe11aee020 (patch)
treedcac9a1daca7f583ff958cb61f73e9ed11b5564b
parent0663a1ed793c164fb11d3dd62bebc677e260891e (diff)
downloadcpython-f5b52246ed8a1191c3aa1da7d3c63bbe11aee020.zip
cpython-f5b52246ed8a1191c3aa1da7d3c63bbe11aee020.tar.gz
cpython-f5b52246ed8a1191c3aa1da7d3c63bbe11aee020.tar.bz2
ignore the coding cookie in compile(), exec(), and eval() if the source is a string #4626
-rw-r--r--Include/parsetok.h2
-rw-r--r--Include/pythonrun.h1
-rw-r--r--Lib/test/test_coding.py6
-rw-r--r--Lib/test/test_pep263.py6
-rw-r--r--Misc/NEWS3
-rw-r--r--Parser/parsetok.c6
-rw-r--r--Parser/tokenizer.c22
-rw-r--r--Parser/tokenizer.h1
-rw-r--r--Python/bltinmodule.c16
-rw-r--r--Python/pythonrun.c14
10 files changed, 63 insertions, 14 deletions
diff --git a/Include/parsetok.h b/Include/parsetok.h
index 81f1771..fa402f8 100644
--- a/Include/parsetok.h
+++ b/Include/parsetok.h
@@ -29,6 +29,8 @@ typedef struct {
#define PyPARSE_UNICODE_LITERALS 0x0008
#endif
+#define PyPARSE_IGNORE_COOKIE 0x0010
+
PyAPI_FUNC(node *) PyParser_ParseString(const char *, grammar *, int,
perrdetail *);
PyAPI_FUNC(node *) PyParser_ParseFile (FILE *, const char *, grammar *, int,
diff --git a/Include/pythonrun.h b/Include/pythonrun.h
index e57b7f0..c909e1a 100644
--- a/Include/pythonrun.h
+++ b/Include/pythonrun.h
@@ -12,6 +12,7 @@ extern "C" {
#define PyCF_SOURCE_IS_UTF8 0x0100
#define PyCF_DONT_IMPLY_DEDENT 0x0200
#define PyCF_ONLY_AST 0x0400
+#define PyCF_IGNORE_COOKIE 0x0800
typedef struct {
int cf_flags; /* bitmask of CO_xxx flags relevant to future */
diff --git a/Lib/test/test_coding.py b/Lib/test/test_coding.py
index ade8bdf..51873b4 100644
--- a/Lib/test/test_coding.py
+++ b/Lib/test/test_coding.py
@@ -17,10 +17,10 @@ class CodingTest(unittest.TestCase):
path = os.path.dirname(__file__)
filename = os.path.join(path, module_name + '.py')
- fp = open(filename, encoding='utf-8')
- text = fp.read()
+ fp = open(filename, "rb")
+ bytes = fp.read()
fp.close()
- self.assertRaises(SyntaxError, compile, text, filename, 'exec')
+ self.assertRaises(SyntaxError, compile, bytes, filename, 'exec')
def test_exec_valid_coding(self):
d = {}
diff --git a/Lib/test/test_pep263.py b/Lib/test/test_pep263.py
index 72764f9..05ca47f 100644
--- a/Lib/test/test_pep263.py
+++ b/Lib/test/test_pep263.py
@@ -30,6 +30,12 @@ class PEP263Test(unittest.TestCase):
else:
self.fail()
+ def test_issue4626(self):
+ c = compile("# coding=latin-1\n\u00c6 = '\u00c6'", "dummy", "exec")
+ d = {}
+ exec(c, d)
+ self.assertEquals(d['\xc6'], '\xc6')
+
def test_main():
support.run_unittest(PEP263Test)
diff --git a/Misc/NEWS b/Misc/NEWS
index 91c94ff..d737f74 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -19,6 +19,9 @@ Core and Builtins
- Issue #5249: time.strftime returned malformed string when format string
contained non ascii character on windows.
+- Issue #4626: compile(), exec(), and eval() ignore the coding cookie if the
+ source has already been decoded into str.
+
- Issue #5186: Reduce hash collisions for objects with no __hash__ method by
rotating the object pointer by 4 bits to the right.
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
index d8ff6ee..4c3b506 100644
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -49,7 +49,11 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
initerr(err_ret, filename);
- if ((tok = PyTokenizer_FromString(s)) == NULL) {
+ if (*flags & PyPARSE_IGNORE_COOKIE)
+ tok = PyTokenizer_FromUTF8(s);
+ else
+ tok = PyTokenizer_FromString(s);
+ if (tok == NULL) {
err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
return NULL;
}
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 3d52bed..c4f447d 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -715,6 +715,28 @@ PyTokenizer_FromString(const char *str)
return tok;
}
+struct tok_state *
+PyTokenizer_FromUTF8(const char *str)
+{
+ struct tok_state *tok = tok_new();
+ if (tok == NULL)
+ return NULL;
+ tok->decoding_state = STATE_RAW;
+ tok->read_coding_spec = 1;
+ tok->enc = NULL;
+ tok->str = str;
+ tok->encoding = (char *)PyMem_MALLOC(6);
+ if (!tok->encoding) {
+ PyTokenizer_Free(tok);
+ return NULL;
+ }
+ strcpy(tok->encoding, "utf-8");
+
+ /* XXX: constify members. */
+ tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
+ return tok;
+}
+
/* Set up tokenizer for file */
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index df9cbc7..e3328f1 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -61,6 +61,7 @@ struct tok_state {
};
extern struct tok_state *PyTokenizer_FromString(const char *);
+extern struct tok_state *PyTokenizer_FromUTF8(const char *);
extern struct tok_state *PyTokenizer_FromFile(FILE *, char*,
char *, char *);
extern void PyTokenizer_Free(struct tok_state *);
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c
index 9805697..7a27fba 100644
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -494,12 +494,13 @@ PyDoc_STR(
static char *
-source_as_string(PyObject *cmd, char *funcname, char *what)
+source_as_string(PyObject *cmd, char *funcname, char *what, PyCompilerFlags *cf)
{
char *str;
Py_ssize_t size;
if (PyUnicode_Check(cmd)) {
+ cf->cf_flags |= PyCF_IGNORE_COOKIE;
cmd = _PyUnicode_AsDefaultEncodedString(cmd, NULL);
if (cmd == NULL)
return NULL;
@@ -591,7 +592,7 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
return result;
}
- str = source_as_string(cmd, "compile", "string, bytes, AST or code");
+ str = source_as_string(cmd, "compile", "string, bytes, AST or code", &cf);
if (str == NULL)
return NULL;
@@ -703,14 +704,14 @@ builtin_eval(PyObject *self, PyObject *args)
return PyEval_EvalCode((PyCodeObject *) cmd, globals, locals);
}
- str = source_as_string(cmd, "eval", "string, bytes or code");
+ cf.cf_flags = PyCF_SOURCE_IS_UTF8;
+ str = source_as_string(cmd, "eval", "string, bytes or code", &cf);
if (str == NULL)
return NULL;
while (*str == ' ' || *str == '\t')
str++;
- cf.cf_flags = PyCF_SOURCE_IS_UTF8;
(void)PyEval_MergeCompilerFlags(&cf);
result = PyRun_StringFlags(str, Py_eval_input, globals, locals, &cf);
Py_XDECREF(tmp);
@@ -779,12 +780,13 @@ builtin_exec(PyObject *self, PyObject *args)
v = PyEval_EvalCode((PyCodeObject *) prog, globals, locals);
}
else {
- char *str = source_as_string(prog, "exec",
- "string, bytes or code");
+ char *str;
PyCompilerFlags cf;
+ cf.cf_flags = PyCF_SOURCE_IS_UTF8;
+ str = source_as_string(prog, "exec",
+ "string, bytes or code", &cf);
if (str == NULL)
return NULL;
- cf.cf_flags = PyCF_SOURCE_IS_UTF8;
if (PyEval_MergeCompilerFlags(&cf))
v = PyRun_StringFlags(str, Py_file_input, globals,
locals, &cf);
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
index 65c6f5f..dee18b6 100644
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -1002,9 +1002,17 @@ PyRun_InteractiveLoopFlags(FILE *fp, const char *filename, PyCompilerFlags *flag
}
/* compute parser flags based on compiler flags */
-#define PARSER_FLAGS(flags) \
- ((flags) ? ((((flags)->cf_flags & PyCF_DONT_IMPLY_DEDENT) ? \
- PyPARSE_DONT_IMPLY_DEDENT : 0)) : 0)
+static int PARSER_FLAGS(PyCompilerFlags *flags)
+{
+ int parser_flags = 0;
+ if (!flags)
+ return 0;
+ if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT)
+ parser_flags |= PyPARSE_DONT_IMPLY_DEDENT;
+ if (flags->cf_flags & PyCF_IGNORE_COOKIE)
+ parser_flags |= PyPARSE_IGNORE_COOKIE;
+ return parser_flags;
+}
#if 0
/* Keep an example of flags with future keyword support. */