From 4d6ec85a022d05f11966004edc36151ab26bb13a Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Wed, 26 Mar 2008 22:34:47 +0000 Subject: Merged revisions 61952-61953 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r61952 | mark.dickinson | 2008-03-26 22:41:36 +0100 (Wed, 26 Mar 2008) | 2 lines Typo: "objects reference count" -> "object's reference count" ........ r61953 | christian.heimes | 2008-03-26 23:01:37 +0100 (Wed, 26 Mar 2008) | 4 lines Patch #2477: Added from __future__ import unicode_literals The new PyParser_*Ex() functions are based on Neal's suggestion and initial patch. The new __future__ feature makes all '' and r'' unicode strings. b'' and br'' stay (byte) strings. ........ --- Doc/c-api/structures.rst | 2 +- Include/code.h | 1 + Include/compile.h | 1 + Include/parsetok.h | 10 ++++++++++ Lib/__future__.py | 6 ++++++ Parser/parser.c | 16 ++++++++++++---- Parser/parsetok.c | 34 ++++++++++++++++++++++++++++------ Python/ast.c | 24 +++++++++++------------- Python/future.c | 2 ++ Python/import.c | 3 ++- Python/pythonrun.c | 23 ++++++++++++++++++----- 11 files changed, 92 insertions(+), 30 deletions(-) diff --git a/Doc/c-api/structures.rst b/Doc/c-api/structures.rst index 7d47cf8..5079e0d 100644 --- a/Doc/c-api/structures.rst +++ b/Doc/c-api/structures.rst @@ -20,7 +20,7 @@ definition of all other Python objects. All object types are extensions of this type. This is a type which contains the information Python needs to treat a pointer to an object as an object. In a - normal "release" build, it contains only the objects reference count and a + normal "release" build, it contains only the object's reference count and a pointer to the corresponding type object. It corresponds to the fields defined by the expansion of the ``PyObject_HEAD`` macro. diff --git a/Include/code.h b/Include/code.h index e576bbb..a9b0708 100644 --- a/Include/code.h +++ b/Include/code.h @@ -49,6 +49,7 @@ typedef struct { #define CO_FUTURE_ABSOLUTE_IMPORT 0x4000 /* do absolute imports by default */ #define CO_FUTURE_WITH_STATEMENT 0x8000 #define CO_FUTURE_PRINT_FUNCTION 0x10000 +#define CO_FUTURE_UNICODE_LITERALS 0x20000 #endif /* This should be defined if a future statement modifies the syntax. diff --git a/Include/compile.h b/Include/compile.h index 391c710..d24cad7 100644 --- a/Include/compile.h +++ b/Include/compile.h @@ -25,6 +25,7 @@ typedef struct { #define FUTURE_ABSOLUTE_IMPORT "absolute_import" #define FUTURE_WITH_STATEMENT "with_statement" #define FUTURE_PRINT_FUNCTION "print_function" +#define FUTURE_UNICODE_LITERALS "unicode_literals" struct _mod; /* Declare the existence of this type */ PyAPI_FUNC(PyCodeObject *) PyAST_Compile(struct _mod *, const char *, diff --git a/Include/parsetok.h b/Include/parsetok.h index 71033dc..81f1771 100644 --- a/Include/parsetok.h +++ b/Include/parsetok.h @@ -25,6 +25,8 @@ typedef struct { #if 0 #define PyPARSE_WITH_IS_KEYWORD 0x0003 +#define PyPARSE_PRINT_IS_FUNCTION 0x0004 +#define PyPARSE_UNICODE_LITERALS 0x0008 #endif PyAPI_FUNC(node *) PyParser_ParseString(const char *, grammar *, int, @@ -38,11 +40,19 @@ PyAPI_FUNC(node *) PyParser_ParseFileFlags(FILE *, const char *, const char*, grammar *, int, char *, char *, perrdetail *, int); +PyAPI_FUNC(node *) PyParser_ParseFileFlagsEx(FILE *, const char *, + const char*, grammar *, + int, char *, char *, + perrdetail *, int *); PyAPI_FUNC(node *) PyParser_ParseStringFlagsFilename(const char *, const char *, grammar *, int, perrdetail *, int); +PyAPI_FUNC(node *) PyParser_ParseStringFlagsFilenameEx(const char *, + const char *, + grammar *, int, + perrdetail *, int *); /* Note that he following function is defined in pythonrun.c not parsetok.c. */ PyAPI_FUNC(void) PyParser_SetError(perrdetail *); diff --git a/Lib/__future__.py b/Lib/__future__.py index ea14bf3..9156459 100644 --- a/Lib/__future__.py +++ b/Lib/__future__.py @@ -54,6 +54,7 @@ all_feature_names = [ "absolute_import", "with_statement", "print_function", + "unicode_literals", ] __all__ = ["all_feature_names"] + all_feature_names @@ -68,6 +69,7 @@ CO_FUTURE_DIVISION = 0x2000 # division CO_FUTURE_ABSOLUTE_IMPORT = 0x4000 # perform absolute imports by default CO_FUTURE_WITH_STATEMENT = 0x8000 # with statement CO_FUTURE_PRINT_FUNCTION = 0x10000 # print function +CO_FUTURE_UNICODE_LITERALS = 0x20000 # unicode string literals class _Feature: def __init__(self, optionalRelease, mandatoryRelease, compiler_flag): @@ -120,3 +122,7 @@ with_statement = _Feature((2, 5, 0, "alpha", 1), print_function = _Feature((2, 6, 0, "alpha", 2), (3, 0, 0, "alpha", 0), CO_FUTURE_PRINT_FUNCTION) + +unicode_literals = _Feature((2, 6, 0, "alpha", 2), + (3, 0, 0, "alpha", 0), + CO_FUTURE_UNICODE_LITERALS) diff --git a/Parser/parser.c b/Parser/parser.c index 134e14c..9e115df 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -206,10 +206,18 @@ future_hack(parser_state *ps) for (i = 0; i < NCH(ch); i += 2) { cch = CHILD(ch, i); - if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME && - strcmp(STR(CHILD(cch, 0)), "with_statement") == 0) { - ps->p_flags |= CO_FUTURE_WITH_STATEMENT; - break; + if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) { + char *str_ch = STR(CHILD(cch, 0)); + if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) { + ps->p_flags |= CO_FUTURE_WITH_STATEMENT; + break; + } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) { + ps->p_flags |= CO_FUTURE_PRINT_FUNCTION; + break; + } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) { + ps->p_flags |= CO_FUTURE_UNICODE_LITERALS; + break; + } } } } diff --git a/Parser/parsetok.c b/Parser/parsetok.c index 708c26d..873be97 100644 --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -14,7 +14,7 @@ int Py_TabcheckFlag; /* Forward */ -static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int); +static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *); static void initerr(perrdetail *err_ret, const char* filename); /* Parse input coming from a string. Return error code, print some errors. */ @@ -37,6 +37,16 @@ PyParser_ParseStringFlagsFilename(const char *s, const char *filename, grammar *g, int start, perrdetail *err_ret, int flags) { + int iflags = flags; + return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start, + err_ret, &iflags); +} + +node * +PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename, + grammar *g, int start, + perrdetail *err_ret, int *flags) +{ struct tok_state *tok; initerr(err_ret, filename); @@ -64,10 +74,20 @@ PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start, } node * -PyParser_ParseFileFlags(FILE *fp, const char *filename, const char* enc, +PyParser_ParseFileFlags(FILE *fp, const char *filename, const char *enc, grammar *g, int start, char *ps1, char *ps2, perrdetail *err_ret, int flags) { + int iflags = flags; + return PyParser_ParseFileFlagsEx(fp, filename, enc, g, start, ps1, + ps2, err_ret, &iflags); +} + +node * +PyParser_ParseFileFlagsEx(FILE *fp, const char *filename, + const char *enc, grammar *g, int start, + char *ps1, char *ps2, perrdetail *err_ret, int *flags) +{ struct tok_state *tok; initerr(err_ret, filename); @@ -104,7 +124,7 @@ warn(const char *msg, const char *filename, int lineno) static node * parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, - int flags) + int *flags) { parser_state *ps; node *n; @@ -117,7 +137,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, return NULL; } #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD - if (flags & PyPARSE_WITH_IS_KEYWORD) + if (*flags & PyPARSE_WITH_IS_KEYWORD) ps->p_flags |= CO_FUTURE_WITH_STATEMENT; #endif @@ -141,7 +161,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, except if a certain flag is given -- codeop.py uses this. */ if (tok->indent && - !(flags & PyPARSE_DONT_IMPLY_DEDENT)) + !(*flags & PyPARSE_DONT_IMPLY_DEDENT)) { tok->pendin = -tok->indent; tok->indent = 0; @@ -205,7 +225,9 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, } else n = NULL; - +#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD + *flags = ps->p_flags; +#endif PyParser_Delete(ps); if (n == NULL) { diff --git a/Python/ast.c b/Python/ast.c index be25ac3..759dd73 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -35,7 +35,7 @@ static stmt_ty ast_for_classdef(struct compiling *, const node *, asdl_seq *); static expr_ty ast_for_call(struct compiling *, const node *, expr_ty); static PyObject *parsenumber(const char *); -static PyObject *parsestr(const node *n, const char *encoding, int *bytesmode); +static PyObject *parsestr(struct compiling *, const node *n, int *bytesmode); static PyObject *parsestrplus(struct compiling *, const node *n, int *bytesmode); @@ -3191,14 +3191,13 @@ decode_unicode(const char *s, size_t len, int rawmode, const char *encoding) * parsestr parses it, and returns the decoded Python string object. */ static PyObject * -parsestr(const node *n, const char *encoding, int *bytesmode) +parsestr(struct compiling *c, const node *n, int *bytesmode) { size_t len; const char *s = STR(n); int quote = Py_CHARMASK(*s); int rawmode = 0; int need_encoding; - if (isalpha(quote)) { if (quote == 'b' || quote == 'B') { quote = *++s; @@ -3233,7 +3232,7 @@ parsestr(const node *n, const char *encoding, int *bytesmode) } } if (!*bytesmode && !rawmode) { - return decode_unicode(s, len, rawmode, encoding); + return decode_unicode(s, len, rawmode, c->c_encoding); } if (*bytesmode) { /* Disallow non-ascii characters (but not escapes) */ @@ -3246,28 +3245,27 @@ parsestr(const node *n, const char *encoding, int *bytesmode) } } } - need_encoding = (!*bytesmode && encoding != NULL && - strcmp(encoding, "utf-8") != 0 && - strcmp(encoding, "iso-8859-1") != 0); + need_encoding = (!*bytesmode && c->c_encoding != NULL && + strcmp(c->c_encoding, "utf-8") != 0 && + strcmp(c->c_encoding, "iso-8859-1") != 0); if (rawmode || strchr(s, '\\') == NULL) { if (need_encoding) { PyObject *v, *u = PyUnicode_DecodeUTF8(s, len, NULL); if (u == NULL || !*bytesmode) return u; - v = PyUnicode_AsEncodedString(u, encoding, NULL); + v = PyUnicode_AsEncodedString(u, c->c_encoding, NULL); Py_DECREF(u); return v; } else if (*bytesmode) { return PyString_FromStringAndSize(s, len); - } else if (strcmp(encoding, "utf-8") == 0) { + } else if (strcmp(c->c_encoding, "utf-8") == 0) { return PyUnicode_FromStringAndSize(s, len); } else { return PyUnicode_DecodeLatin1(s, len, NULL); } } - return PyString_DecodeEscape(s, len, NULL, 1, - need_encoding ? encoding : NULL); + need_encoding ? c->c_encoding : NULL); } /* Build a Python string object out of a STRING+ atom. This takes care of @@ -3280,13 +3278,13 @@ parsestrplus(struct compiling *c, const node *n, int *bytesmode) PyObject *v; int i; REQ(CHILD(n, 0), STRING); - v = parsestr(CHILD(n, 0), c->c_encoding, bytesmode); + v = parsestr(c, CHILD(n, 0), bytesmode); if (v != NULL) { /* String literal concatenation */ for (i = 1; i < NCH(n); i++) { PyObject *s; int subbm = 0; - s = parsestr(CHILD(n, i), c->c_encoding, &subbm); + s = parsestr(c, CHILD(n, i), &subbm); if (s == NULL) goto onError; if (*bytesmode != subbm) { diff --git a/Python/future.c b/Python/future.c index d6333ff..a5bee0c 100644 --- a/Python/future.c +++ b/Python/future.c @@ -35,6 +35,8 @@ future_check_features(PyFutureFeatures *ff, stmt_ty s, const char *filename) continue; } else if (strcmp(feature, FUTURE_PRINT_FUNCTION) == 0) { continue; + } else if (strcmp(feature, FUTURE_UNICODE_LITERALS) == 0) { + continue; } else if (strcmp(feature, "braces") == 0) { PyErr_SetString(PyExc_SyntaxError, "not a chance"); diff --git a/Python/import.c b/Python/import.c index d201dae..33cb81c 100644 --- a/Python/import.c +++ b/Python/import.c @@ -821,12 +821,13 @@ parse_source_module(const char *pathname, FILE *fp) { PyCodeObject *co = NULL; mod_ty mod; + PyCompilerFlags flags; PyArena *arena = PyArena_New(); if (arena == NULL) return NULL; mod = PyParser_ASTFromFile(fp, pathname, NULL, - Py_file_input, 0, 0, 0, + Py_file_input, 0, 0, &flags, NULL, arena); if (mod) { co = PyAST_Compile(mod, pathname, NULL, arena); diff --git a/Python/pythonrun.c b/Python/pythonrun.c index d65d12d..5c17b69 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -1563,11 +1563,12 @@ Py_SymtableString(const char *str, const char *filename, int start) { struct symtable *st; mod_ty mod; + PyCompilerFlags flags; PyArena *arena = PyArena_New(); if (arena == NULL) return NULL; - mod = PyParser_ASTFromString(str, filename, start, NULL, arena); + mod = PyParser_ASTFromString(str, filename, start, &flags, arena); if (mod == NULL) { PyArena_Free(arena); return NULL; @@ -1584,10 +1585,16 @@ PyParser_ASTFromString(const char *s, const char *filename, int start, { mod_ty mod; perrdetail err; - node *n = PyParser_ParseStringFlagsFilename(s, filename, + int iflags; + iflags = PARSER_FLAGS(flags); + + node *n = PyParser_ParseStringFlagsFilenameEx(s, filename, &_PyParser_Grammar, start, &err, - PARSER_FLAGS(flags)); + &iflags); if (n) { + if (flags) { + flags->cf_flags |= iflags & PyCF_MASK; + } mod = PyAST_FromNode(n, flags, filename, arena); PyNode_Free(n); return mod; @@ -1606,10 +1613,16 @@ PyParser_ASTFromFile(FILE *fp, const char *filename, const char* enc, { mod_ty mod; perrdetail err; - node *n = PyParser_ParseFileFlags(fp, filename, enc, + int iflags; + + iflags = PARSER_FLAGS(flags); + node *n = PyParser_ParseFileFlagsEx(fp, filename, enc, &_PyParser_Grammar, - start, ps1, ps2, &err, PARSER_FLAGS(flags)); + start, ps1, ps2, &err, &iflags); if (n) { + if (flags) { + flags->cf_flags |= iflags & PyCF_MASK; + } mod = PyAST_FromNode(n, flags, filename, arena); PyNode_Free(n); return mod; -- cgit v0.12