From 3c60833e1e53f6239825b44f76fa22172feb1790 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Wed, 26 Mar 2008 22:01:37 +0000 Subject: Patch #2477: Added from __future__ import unicode_literals The new PyParser_*Ex() functions are based on Neal's suggestion and initial patch. The new __future__ feature makes all '' and r'' unicode strings. b'' and br'' stay (byte) strings. --- Include/code.h | 1 + Include/compile.h | 1 + Include/parsetok.h | 8 ++++++++ Include/pythonrun.h | 3 ++- Lib/__future__.py | 6 ++++++ Misc/NEWS | 8 ++++++++ Parser/parser.c | 20 ++++++++++++-------- Parser/parsetok.c | 33 ++++++++++++++++++++++++++++----- Python/ast.c | 25 ++++++++++++++----------- Python/future.c | 2 ++ Python/import.c | 3 ++- Python/pythonrun.c | 30 +++++++++++++++++++++++------- 12 files changed, 107 insertions(+), 33 deletions(-) diff --git a/Include/code.h b/Include/code.h index 0e89b88..8c00700 100644 --- a/Include/code.h +++ b/Include/code.h @@ -49,6 +49,7 @@ typedef struct { #define CO_FUTURE_ABSOLUTE_IMPORT 0x4000 /* do absolute imports by default */ #define CO_FUTURE_WITH_STATEMENT 0x8000 #define CO_FUTURE_PRINT_FUNCTION 0x10000 +#define CO_FUTURE_UNICODE_LITERALS 0x20000 /* This should be defined if a future statement modifies the syntax. For example, when a keyword is added. diff --git a/Include/compile.h b/Include/compile.h index d703edb..43a470d 100644 --- a/Include/compile.h +++ b/Include/compile.h @@ -25,6 +25,7 @@ typedef struct { #define FUTURE_ABSOLUTE_IMPORT "absolute_import" #define FUTURE_WITH_STATEMENT "with_statement" #define FUTURE_PRINT_FUNCTION "print_function" +#define FUTURE_UNICODE_LITERALS "unicode_literals" struct _mod; /* Declare the existence of this type */ diff --git a/Include/parsetok.h b/Include/parsetok.h index 808c72c..ec1eb6f 100644 --- a/Include/parsetok.h +++ b/Include/parsetok.h @@ -28,6 +28,7 @@ typedef struct { #endif #define PyPARSE_PRINT_IS_FUNCTION 0x0004 +#define PyPARSE_UNICODE_LITERALS 0x0008 @@ -41,11 +42,18 @@ PyAPI_FUNC(node *) PyParser_ParseStringFlags(const char *, grammar *, int, PyAPI_FUNC(node *) PyParser_ParseFileFlags(FILE *, const char *, grammar *, int, char *, char *, perrdetail *, int); +PyAPI_FUNC(node *) PyParser_ParseFileFlagsEx(FILE *, const char *, grammar *, + int, char *, char *, + perrdetail *, int *); PyAPI_FUNC(node *) PyParser_ParseStringFlagsFilename(const char *, const char *, grammar *, int, perrdetail *, int); +PyAPI_FUNC(node *) PyParser_ParseStringFlagsFilenameEx(const char *, + const char *, + grammar *, int, + perrdetail *, int *); /* Note that he following function is defined in pythonrun.c not parsetok.c. */ PyAPI_FUNC(void) PyParser_SetError(perrdetail *); diff --git a/Include/pythonrun.h b/Include/pythonrun.h index a4dd914..7dfff28 100644 --- a/Include/pythonrun.h +++ b/Include/pythonrun.h @@ -8,7 +8,8 @@ extern "C" { #endif #define PyCF_MASK (CO_FUTURE_DIVISION | CO_FUTURE_ABSOLUTE_IMPORT | \ - CO_FUTURE_WITH_STATEMENT|CO_FUTURE_PRINT_FUNCTION) + CO_FUTURE_WITH_STATEMENT | CO_FUTURE_PRINT_FUNCTION | \ + CO_FUTURE_UNICODE_LITERALS) #define PyCF_MASK_OBSOLETE (CO_NESTED) #define PyCF_SOURCE_IS_UTF8 0x0100 #define PyCF_DONT_IMPLY_DEDENT 0x0200 diff --git a/Lib/__future__.py b/Lib/__future__.py index ea14bf3..9156459 100644 --- a/Lib/__future__.py +++ b/Lib/__future__.py @@ -54,6 +54,7 @@ all_feature_names = [ "absolute_import", "with_statement", "print_function", + "unicode_literals", ] __all__ = ["all_feature_names"] + all_feature_names @@ -68,6 +69,7 @@ CO_FUTURE_DIVISION = 0x2000 # division CO_FUTURE_ABSOLUTE_IMPORT = 0x4000 # perform absolute imports by default CO_FUTURE_WITH_STATEMENT = 0x8000 # with statement CO_FUTURE_PRINT_FUNCTION = 0x10000 # print function +CO_FUTURE_UNICODE_LITERALS = 0x20000 # unicode string literals class _Feature: def __init__(self, optionalRelease, mandatoryRelease, compiler_flag): @@ -120,3 +122,7 @@ with_statement = _Feature((2, 5, 0, "alpha", 1), print_function = _Feature((2, 6, 0, "alpha", 2), (3, 0, 0, "alpha", 0), CO_FUTURE_PRINT_FUNCTION) + +unicode_literals = _Feature((2, 6, 0, "alpha", 2), + (3, 0, 0, "alpha", 0), + CO_FUTURE_UNICODE_LITERALS) diff --git a/Misc/NEWS b/Misc/NEWS index ff6eb95..ce72fba 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,8 @@ What's New in Python 2.6 alpha 2? Core and builtins ----------------- +- Patch #2477: Added from __future__ import unicode_literals + - Added backport of bytearray type. - Issue #2355: add Py3k warning for buffer(). @@ -186,6 +188,12 @@ Build - Patch #2284: Add -x64 option to rt.bat. +C API +----- + +- Patch #2477: Added PyParser_ParseFileFlagsEx() and + PyParser_ParseStringFlagsFilenameEx() + What's New in Python 2.6 alpha 1? ================================= diff --git a/Parser/parser.c b/Parser/parser.c index 61da37d..8d52153 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -202,14 +202,18 @@ future_hack(parser_state *ps) for (i = 0; i < NCH(ch); i += 2) { cch = CHILD(ch, i); - if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME && - strcmp(STR(CHILD(cch, 0)), "with_statement") == 0) { - ps->p_flags |= CO_FUTURE_WITH_STATEMENT; - break; - } else if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME && - strcmp(STR(CHILD(cch, 0)), "print_function") == 0) { - ps->p_flags |= CO_FUTURE_PRINT_FUNCTION; - break; + if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) { + char *str_ch = STR(CHILD(cch, 0)); + if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) { + ps->p_flags |= CO_FUTURE_WITH_STATEMENT; + break; + } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) { + ps->p_flags |= CO_FUTURE_PRINT_FUNCTION; + break; + } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) { + ps->p_flags |= CO_FUTURE_UNICODE_LITERALS; + break; + } } } } diff --git a/Parser/parsetok.c b/Parser/parsetok.c index e4db574..d8c8f62 100644 --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -14,7 +14,7 @@ int Py_TabcheckFlag; /* Forward */ -static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int); +static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *); static void initerr(perrdetail *err_ret, const char* filename); /* Parse input coming from a string. Return error code, print some errors. */ @@ -37,6 +37,16 @@ PyParser_ParseStringFlagsFilename(const char *s, const char *filename, grammar *g, int start, perrdetail *err_ret, int flags) { + int iflags = flags; + return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start, + err_ret, &iflags); +} + +node * +PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename, + grammar *g, int start, + perrdetail *err_ret, int *flags) +{ struct tok_state *tok; initerr(err_ret, filename); @@ -70,6 +80,14 @@ node * PyParser_ParseFileFlags(FILE *fp, const char *filename, grammar *g, int start, char *ps1, char *ps2, perrdetail *err_ret, int flags) { + int iflags = flags; + return PyParser_ParseFileFlagsEx(fp, filename, g, start, ps1, ps2, err_ret, &iflags); +} + +node * +PyParser_ParseFileFlagsEx(FILE *fp, const char *filename, grammar *g, int start, + char *ps1, char *ps2, perrdetail *err_ret, int *flags) +{ struct tok_state *tok; initerr(err_ret, filename); @@ -85,7 +103,6 @@ PyParser_ParseFileFlags(FILE *fp, const char *filename, grammar *g, int start, tok->alterror++; } - return parsetok(tok, g, start, err_ret, flags); } @@ -110,7 +127,7 @@ warn(const char *msg, const char *filename, int lineno) static node * parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, - int flags) + int *flags) { parser_state *ps; node *n; @@ -123,8 +140,13 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, return NULL; } #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD - if (flags & PyPARSE_PRINT_IS_FUNCTION) + if (*flags & PyPARSE_PRINT_IS_FUNCTION) { ps->p_flags |= CO_FUTURE_PRINT_FUNCTION; + } + if (*flags & PyPARSE_UNICODE_LITERALS) { + ps->p_flags |= CO_FUTURE_UNICODE_LITERALS; + } + #endif for (;;) { @@ -147,7 +169,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, except if a certain flag is given -- codeop.py uses this. */ if (tok->indent && - !(flags & PyPARSE_DONT_IMPLY_DEDENT)) + !(*flags & PyPARSE_DONT_IMPLY_DEDENT)) { tok->pendin = -tok->indent; tok->indent = 0; @@ -191,6 +213,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, else n = NULL; + *flags = ps->p_flags; PyParser_Delete(ps); if (n == NULL) { diff --git a/Python/ast.c b/Python/ast.c index 1fc2324..bc91805 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -18,6 +18,7 @@ /* Data structure used internally */ struct compiling { char *c_encoding; /* source encoding */ + int c_future_unicode; /* __future__ unicode literals flag */ PyArena *c_arena; /* arena for allocating memeory */ const char *c_filename; /* filename */ }; @@ -36,7 +37,7 @@ static expr_ty ast_for_testlist_gexp(struct compiling *, const node *); static expr_ty ast_for_call(struct compiling *, const node *, expr_ty); static PyObject *parsenumber(const char *); -static PyObject *parsestr(const char *s, const char *encoding); +static PyObject *parsestr(struct compiling *, const char *); static PyObject *parsestrplus(struct compiling *, const node *n); #ifndef LINENO @@ -198,6 +199,7 @@ PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename, } else { c.c_encoding = NULL; } + c.c_future_unicode = flags && flags->cf_flags & CO_FUTURE_UNICODE_LITERALS; c.c_arena = arena; c.c_filename = filename; @@ -3247,13 +3249,13 @@ decode_unicode(const char *s, size_t len, int rawmode, const char *encoding) * parsestr parses it, and returns the decoded Python string object. */ static PyObject * -parsestr(const char *s, const char *encoding) +parsestr(struct compiling *c, const char *s) { size_t len; int quote = Py_CHARMASK(*s); int rawmode = 0; int need_encoding; - int unicode = 0; + int unicode = c->c_future_unicode; if (isalpha(quote) || quote == '_') { if (quote == 'u' || quote == 'U') { @@ -3262,6 +3264,7 @@ parsestr(const char *s, const char *encoding) } if (quote == 'b' || quote == 'B') { quote = *++s; + unicode = 0; } if (quote == 'r' || quote == 'R') { quote = *++s; @@ -3293,12 +3296,12 @@ parsestr(const char *s, const char *encoding) } #ifdef Py_USING_UNICODE if (unicode || Py_UnicodeFlag) { - return decode_unicode(s, len, rawmode, encoding); + return decode_unicode(s, len, rawmode, c->c_encoding); } #endif - need_encoding = (encoding != NULL && - strcmp(encoding, "utf-8") != 0 && - strcmp(encoding, "iso-8859-1") != 0); + need_encoding = (c->c_encoding != NULL && + strcmp(c->c_encoding, "utf-8") != 0 && + strcmp(c->c_encoding, "iso-8859-1") != 0); if (rawmode || strchr(s, '\\') == NULL) { if (need_encoding) { #ifndef Py_USING_UNICODE @@ -3310,7 +3313,7 @@ parsestr(const char *s, const char *encoding) PyObject *v, *u = PyUnicode_DecodeUTF8(s, len, NULL); if (u == NULL) return NULL; - v = PyUnicode_AsEncodedString(u, encoding, NULL); + v = PyUnicode_AsEncodedString(u, c->c_encoding, NULL); Py_DECREF(u); return v; #endif @@ -3320,7 +3323,7 @@ parsestr(const char *s, const char *encoding) } return PyString_DecodeEscape(s, len, NULL, unicode, - need_encoding ? encoding : NULL); + need_encoding ? c->c_encoding : NULL); } /* Build a Python string object out of a STRING atom. This takes care of @@ -3333,11 +3336,11 @@ parsestrplus(struct compiling *c, const node *n) PyObject *v; int i; REQ(CHILD(n, 0), STRING); - if ((v = parsestr(STR(CHILD(n, 0)), c->c_encoding)) != NULL) { + if ((v = parsestr(c, STR(CHILD(n, 0)))) != NULL) { /* String literal concatenation */ for (i = 1; i < NCH(n); i++) { PyObject *s; - s = parsestr(STR(CHILD(n, i)), c->c_encoding); + s = parsestr(c, STR(CHILD(n, i))); if (s == NULL) goto onError; if (PyString_Check(v) && PyString_Check(s)) { diff --git a/Python/future.c b/Python/future.c index 267e1b7..2c6aaa2 100644 --- a/Python/future.c +++ b/Python/future.c @@ -35,6 +35,8 @@ future_check_features(PyFutureFeatures *ff, stmt_ty s, const char *filename) ff->ff_features |= CO_FUTURE_WITH_STATEMENT; } else if (strcmp(feature, FUTURE_PRINT_FUNCTION) == 0) { ff->ff_features |= CO_FUTURE_PRINT_FUNCTION; + } else if (strcmp(feature, FUTURE_UNICODE_LITERALS) == 0) { + ff->ff_features |= CO_FUTURE_UNICODE_LITERALS; } else if (strcmp(feature, "braces") == 0) { PyErr_SetString(PyExc_SyntaxError, "not a chance"); diff --git a/Python/import.c b/Python/import.c index ecbec15..95cd20d 100644 --- a/Python/import.c +++ b/Python/import.c @@ -818,11 +818,12 @@ parse_source_module(const char *pathname, FILE *fp) { PyCodeObject *co = NULL; mod_ty mod; + PyCompilerFlags flags; PyArena *arena = PyArena_New(); if (arena == NULL) return NULL; - mod = PyParser_ASTFromFile(fp, pathname, Py_file_input, 0, 0, 0, + mod = PyParser_ASTFromFile(fp, pathname, Py_file_input, 0, 0, &flags, NULL, arena); if (mod) { co = PyAST_Compile(mod, pathname, NULL, arena); diff --git a/Python/pythonrun.c b/Python/pythonrun.c index 226fee3..423aae1 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -774,8 +774,11 @@ PyRun_InteractiveLoopFlags(FILE *fp, const char *filename, PyCompilerFlags *flag #define PARSER_FLAGS(flags) \ ((flags) ? ((((flags)->cf_flags & PyCF_DONT_IMPLY_DEDENT) ? \ PyPARSE_DONT_IMPLY_DEDENT : 0) \ - | ((flags)->cf_flags & CO_FUTURE_PRINT_FUNCTION ? \ - PyPARSE_PRINT_IS_FUNCTION : 0)) : 0) + | (((flags)->cf_flags & CO_FUTURE_PRINT_FUNCTION) ? \ + PyPARSE_PRINT_IS_FUNCTION : 0) \ + | (((flags)->cf_flags & CO_FUTURE_UNICODE_LITERALS) ? \ + PyPARSE_UNICODE_LITERALS : 0) \ + ) : 0) #endif int @@ -1390,11 +1393,12 @@ Py_SymtableString(const char *str, const char *filename, int start) { struct symtable *st; mod_ty mod; + PyCompilerFlags flags; PyArena *arena = PyArena_New(); if (arena == NULL) return NULL; - mod = PyParser_ASTFromString(str, filename, start, NULL, arena); + mod = PyParser_ASTFromString(str, filename, start, &flags, arena); if (mod == NULL) { PyArena_Free(arena); return NULL; @@ -1411,10 +1415,16 @@ PyParser_ASTFromString(const char *s, const char *filename, int start, { mod_ty mod; perrdetail err; - node *n = PyParser_ParseStringFlagsFilename(s, filename, + int iflags; + iflags = PARSER_FLAGS(flags); + + node *n = PyParser_ParseStringFlagsFilenameEx(s, filename, &_PyParser_Grammar, start, &err, - PARSER_FLAGS(flags)); + &iflags); if (n) { + if (flags) { + flags->cf_flags |= iflags & PyCF_MASK; + } mod = PyAST_FromNode(n, flags, filename, arena); PyNode_Free(n); return mod; @@ -1432,9 +1442,15 @@ PyParser_ASTFromFile(FILE *fp, const char *filename, int start, char *ps1, { mod_ty mod; perrdetail err; - node *n = PyParser_ParseFileFlags(fp, filename, &_PyParser_Grammar, - start, ps1, ps2, &err, PARSER_FLAGS(flags)); + int iflags; + + iflags = PARSER_FLAGS(flags); + node *n = PyParser_ParseFileFlagsEx(fp, filename, &_PyParser_Grammar, + start, ps1, ps2, &err, &iflags); if (n) { + if (flags) { + flags->cf_flags |= iflags & PyCF_MASK; + } mod = PyAST_FromNode(n, flags, filename, arena); PyNode_Free(n); return mod; -- cgit v0.12