summaryrefslogtreecommitdiffstats
path: root/Parser
diff options
context:
space:
mode:
Diffstat (limited to 'Parser')
-rw-r--r--Parser/Python.asdl14
-rw-r--r--Parser/asdl.py25
-rwxr-xr-xParser/asdl_c.py59
-rw-r--r--Parser/intrcheck.c174
-rw-r--r--Parser/parsetok.c37
-rw-r--r--Parser/parsetok_pgen.c2
-rw-r--r--Parser/pgenmain.c2
-rw-r--r--Parser/tokenizer.c79
-rw-r--r--Parser/tokenizer.h9
9 files changed, 139 insertions, 262 deletions
diff --git a/Parser/Python.asdl b/Parser/Python.asdl
index 9407b2f..6955199 100644
--- a/Parser/Python.asdl
+++ b/Parser/Python.asdl
@@ -1,6 +1,6 @@
--- ASDL's four builtin types are identifier, int, string, object
+-- ASDL's five builtin types are identifier, int, string, bytes, object
-module Python version "$Revision$"
+module Python
{
mod = Module(stmt* body)
| Interactive(stmt* body)
@@ -28,11 +28,10 @@ module Python version "$Revision$"
| For(expr target, expr iter, stmt* body, stmt* orelse)
| While(expr test, stmt* body, stmt* orelse)
| If(expr test, stmt* body, stmt* orelse)
- | With(expr context_expr, expr? optional_vars, stmt* body)
+ | With(withitem* items, stmt* body)
| Raise(expr? exc, expr? cause)
- | TryExcept(stmt* body, excepthandler* handlers, stmt* orelse)
- | TryFinally(stmt* body, stmt* finalbody)
+ | Try(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody)
| Assert(expr test, expr? msg)
| Import(alias* names)
@@ -68,7 +67,7 @@ module Python version "$Revision$"
expr? starargs, expr? kwargs)
| Num(object n) -- a number as a PyObject.
| Str(string s) -- need to specify raw, unicode, etc?
- | Bytes(string s)
+ | Bytes(bytes s)
| Ellipsis
-- other literals? bools?
@@ -100,7 +99,6 @@ module Python version "$Revision$"
comprehension = (expr target, expr iter, expr* ifs)
- -- not sure what to call the first argument for raise and except
excepthandler = ExceptHandler(expr? type, identifier? name, stmt* body)
attributes (int lineno, int col_offset)
@@ -115,5 +113,7 @@ module Python version "$Revision$"
-- import name with optional 'as' alias.
alias = (identifier name, identifier? asname)
+
+ withitem = (expr context_expr, expr? optional_vars)
}
diff --git a/Parser/asdl.py b/Parser/asdl.py
index 7b4e2dc..01a8b5e 100644
--- a/Parser/asdl.py
+++ b/Parser/asdl.py
@@ -114,28 +114,20 @@ class ASDLParser(spark.GenericParser, object):
raise ASDLSyntaxError(tok.lineno, tok)
def p_module_0(self, info):
- " module ::= Id Id version { } "
- module, name, version, _0, _1 = info
+ " module ::= Id Id { } "
+ module, name, _0, _1 = info
if module.value != "module":
raise ASDLSyntaxError(module.lineno,
msg="expected 'module', found %s" % module)
- return Module(name, None, version)
+ return Module(name, None)
def p_module(self, info):
- " module ::= Id Id version { definitions } "
- module, name, version, _0, definitions, _1 = info
+ " module ::= Id Id { definitions } "
+ module, name, _0, definitions, _1 = info
if module.value != "module":
raise ASDLSyntaxError(module.lineno,
msg="expected 'module', found %s" % module)
- return Module(name, definitions, version)
-
- def p_version(self, info):
- "version ::= Id String"
- version, V = info
- if version.value != "version":
- raise ASDLSyntaxError(version.lineno,
- msg="expected 'version', found %" % version)
- return V
+ return Module(name, definitions)
def p_definition_0(self, definition):
" definitions ::= definition "
@@ -236,7 +228,7 @@ class ASDLParser(spark.GenericParser, object):
" field ::= Id ? "
return Field(type[0], opt=True)
-builtin_types = ("identifier", "string", "int", "bool", "object")
+builtin_types = ("identifier", "string", "bytes", "int", "object")
# below is a collection of classes to capture the AST of an AST :-)
# not sure if any of the methods are useful yet, but I'm adding them
@@ -246,10 +238,9 @@ class AST(object):
pass # a marker class
class Module(AST):
- def __init__(self, name, dfns, version):
+ def __init__(self, name, dfns):
self.name = name
self.dfns = dfns
- self.version = version
self.types = {} # maps type name to value (from dfns)
for type in dfns:
self.types[type.name.value] = type.value
diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py
index b85c07e..07c06de 100755
--- a/Parser/asdl_c.py
+++ b/Parser/asdl_c.py
@@ -5,6 +5,7 @@
# handle fields that have a type but no name
import os, sys
+import subprocess
import asdl
@@ -84,8 +85,16 @@ class EmitVisitor(asdl.VisitorBase):
def __init__(self, file):
self.file = file
+ self.identifiers = set()
super(EmitVisitor, self).__init__()
+ def emit_identifier(self, name):
+ name = str(name)
+ if name in self.identifiers:
+ return
+ self.emit("_Py_IDENTIFIER(%s);" % name, 0)
+ self.identifiers.add(name)
+
def emit(self, s, depth, reflow=True):
# XXX reflow long lines?
if reflow:
@@ -485,12 +494,12 @@ class Obj2ModVisitor(PickleVisitor):
def visitField(self, field, name, sum=None, prod=None, depth=0):
ctype = get_c_type(field.type)
- self.emit("if (PyObject_HasAttrString(obj, \"%s\")) {" % field.name, depth)
+ self.emit("if (_PyObject_HasAttrId(obj, &PyId_%s)) {" % field.name, depth)
self.emit("int res;", depth+1)
if field.seq:
self.emit("Py_ssize_t len;", depth+1)
self.emit("Py_ssize_t i;", depth+1)
- self.emit("tmp = PyObject_GetAttrString(obj, \"%s\");" % field.name, depth+1)
+ self.emit("tmp = _PyObject_GetAttrId(obj, &PyId_%s);" % field.name, depth+1)
self.emit("if (tmp == NULL) goto failed;", depth+1)
if field.seq:
self.emit("if (!PyList_Check(tmp)) {", depth+1)
@@ -552,6 +561,8 @@ class PyTypesDeclareVisitor(PickleVisitor):
self.emit("static PyTypeObject *%s_type;" % name, 0)
self.emit("static PyObject* ast2obj_%s(void*);" % name, 0)
if prod.fields:
+ for f in prod.fields:
+ self.emit_identifier(f.name)
self.emit("static char *%s_fields[]={" % name,0)
for f in prod.fields:
self.emit('"%s",' % f.name, 1)
@@ -560,6 +571,8 @@ class PyTypesDeclareVisitor(PickleVisitor):
def visitSum(self, sum, name):
self.emit("static PyTypeObject *%s_type;" % name, 0)
if sum.attributes:
+ for a in sum.attributes:
+ self.emit_identifier(a.name)
self.emit("static char *%s_attributes[] = {" % name, 0)
for a in sum.attributes:
self.emit('"%s",' % a.name, 1)
@@ -579,6 +592,8 @@ class PyTypesDeclareVisitor(PickleVisitor):
def visitConstructor(self, cons, name):
self.emit("static PyTypeObject *%s_type;" % cons.name, 0)
if cons.fields:
+ for t in cons.fields:
+ self.emit_identifier(t.name)
self.emit("static char *%s_fields[]={" % cons.name, 0)
for t in cons.fields:
self.emit('"%s",' % t.name, 1)
@@ -591,10 +606,11 @@ class PyTypesVisitor(PickleVisitor):
static int
ast_type_init(PyObject *self, PyObject *args, PyObject *kw)
{
+ _Py_IDENTIFIER(_fields);
Py_ssize_t i, numfields = 0;
int res = -1;
PyObject *key, *value, *fields;
- fields = PyObject_GetAttrString((PyObject*)Py_TYPE(self), "_fields");
+ fields = _PyObject_GetAttrId((PyObject*)Py_TYPE(self), &PyId__fields);
if (!fields)
PyErr_Clear();
if (fields) {
@@ -644,7 +660,8 @@ static PyObject *
ast_type_reduce(PyObject *self, PyObject *unused)
{
PyObject *res;
- PyObject *dict = PyObject_GetAttrString(self, "__dict__");
+ _Py_IDENTIFIER(__dict__);
+ PyObject *dict = _PyObject_GetAttrId(self, &PyId___dict__);
if (dict == NULL) {
if (PyErr_ExceptionMatches(PyExc_AttributeError))
PyErr_Clear();
@@ -730,6 +747,7 @@ static PyTypeObject* make_type(char *type, PyTypeObject* base, char**fields, int
static int add_attributes(PyTypeObject* type, char**attrs, int num_fields)
{
int i, result;
+ _Py_IDENTIFIER(_attributes);
PyObject *s, *l = PyTuple_New(num_fields);
if (!l)
return 0;
@@ -741,7 +759,7 @@ static int add_attributes(PyTypeObject* type, char**attrs, int num_fields)
}
PyTuple_SET_ITEM(l, i, s);
}
- result = PyObject_SetAttrString((PyObject*)type, "_attributes", l) >= 0;
+ result = _PyObject_SetAttrId((PyObject*)type, &PyId__attributes, l) >= 0;
Py_DECREF(l);
return result;
}
@@ -775,6 +793,7 @@ static PyObject* ast2obj_object(void *o)
}
#define ast2obj_identifier ast2obj_object
#define ast2obj_string ast2obj_object
+#define ast2obj_bytes ast2obj_object
static PyObject* ast2obj_int(long b)
{
@@ -812,6 +831,15 @@ static int obj2ast_string(PyObject* obj, PyObject** out, PyArena* arena)
return obj2ast_object(obj, out, arena);
}
+static int obj2ast_bytes(PyObject* obj, PyObject** out, PyArena* arena)
+{
+ if (!PyBytes_CheckExact(obj)) {
+ PyErr_SetString(PyExc_TypeError, "AST bytes must be of type bytes");
+ return 1;
+ }
+ return obj2ast_object(obj, out, arena);
+}
+
static int obj2ast_int(PyObject* obj, int* out, PyArena* arena)
{
int i;
@@ -910,10 +938,6 @@ class ASTModuleVisitor(PickleVisitor):
self.emit('if (PyDict_SetItemString(d, "AST", (PyObject*)&AST_type) < 0) return NULL;', 1)
self.emit('if (PyModule_AddIntConstant(m, "PyCF_ONLY_AST", PyCF_ONLY_AST) < 0)', 1)
self.emit("return NULL;", 2)
- # Value of version: "$Revision$"
- self.emit('if (PyModule_AddStringConstant(m, "__version__", "%s") < 0)'
- % mod.version, 1)
- self.emit("return NULL;", 2)
for dfn in mod.dfns:
self.visit(dfn)
self.emit("return m;", 1)
@@ -997,7 +1021,7 @@ class ObjVisitor(PickleVisitor):
for a in sum.attributes:
self.emit("value = ast2obj_%s(o->%s);" % (a.type, a.name), 1)
self.emit("if (!value) goto failed;", 1)
- self.emit('if (PyObject_SetAttrString(result, "%s", value) < 0)' % a.name, 1)
+ self.emit('if (_PyObject_SetAttrId(result, &PyId_%s, value) < 0)' % a.name, 1)
self.emit('goto failed;', 2)
self.emit('Py_DECREF(value);', 1)
self.func_end()
@@ -1043,7 +1067,7 @@ class ObjVisitor(PickleVisitor):
value = "o->v.%s.%s" % (name, field.name)
self.set(field, value, depth)
emit("if (!value) goto failed;", 0)
- emit('if (PyObject_SetAttrString(result, "%s", value) == -1)' % field.name, 0)
+ emit('if (_PyObject_SetAttrId(result, &PyId_%s, value) == -1)' % field.name, 0)
emit("goto failed;", 1)
emit("Py_DECREF(value);", 0)
@@ -1134,24 +1158,12 @@ class ChainOfVisitors:
common_msg = "/* File automatically generated by %s. */\n\n"
-c_file_msg = """
-/*
- __version__ %s.
-
- This module must be committed separately after each AST grammar change;
- The __version__ number is set to the revision number of the commit
- containing the grammar change.
-*/
-
-"""
-
def main(srcfile):
argv0 = sys.argv[0]
components = argv0.split(os.sep)
argv0 = os.sep.join(components[-2:])
auto_gen_msg = common_msg % argv0
mod = asdl.parse(srcfile)
- mod.version = "82163"
if not asdl.check(mod):
sys.exit(1)
if INC_DIR:
@@ -1173,7 +1185,6 @@ def main(srcfile):
p = os.path.join(SRC_DIR, str(mod.name) + "-ast.c")
f = open(p, "w")
f.write(auto_gen_msg)
- f.write(c_file_msg % mod.version)
f.write('#include "Python.h"\n')
f.write('#include "%s-ast.h"\n' % mod.name)
f.write('\n')
diff --git a/Parser/intrcheck.c b/Parser/intrcheck.c
deleted file mode 100644
index 4439864..0000000
--- a/Parser/intrcheck.c
+++ /dev/null
@@ -1,174 +0,0 @@
-
-/* Check for interrupts */
-
-#include "Python.h"
-#include "pythread.h"
-
-#ifdef QUICKWIN
-
-#include <io.h>
-
-void
-PyOS_InitInterrupts(void)
-{
-}
-
-void
-PyOS_FiniInterrupts(void)
-{
-}
-
-int
-PyOS_InterruptOccurred(void)
-{
- _wyield();
-}
-
-#define OK
-
-#endif /* QUICKWIN */
-
-#if defined(_M_IX86) && !defined(__QNX__)
-#include <io.h>
-#endif
-
-#if defined(MSDOS) && !defined(QUICKWIN)
-
-#ifdef __GNUC__
-
-/* This is for DJGPP's GO32 extender. I don't know how to trap
- * control-C (There's no API for ctrl-C, and I don't want to mess with
- * the interrupt vectors.) However, this DOES catch control-break.
- * --Amrit
- */
-
-#include <go32.h>
-
-void
-PyOS_InitInterrupts(void)
-{
- _go32_want_ctrl_break(1 /* TRUE */);
-}
-
-void
-PyOS_FiniInterrupts(void)
-{
-}
-
-int
-PyOS_InterruptOccurred(void)
-{
- return _go32_was_ctrl_break_hit();
-}
-
-#else /* !__GNUC__ */
-
-/* This might work for MS-DOS (untested though): */
-
-void
-PyOS_InitInterrupts(void)
-{
-}
-
-void
-PyOS_FiniInterrupts(void)
-{
-}
-
-int
-PyOS_InterruptOccurred(void)
-{
- int interrupted = 0;
- while (kbhit()) {
- if (getch() == '\003')
- interrupted = 1;
- }
- return interrupted;
-}
-
-#endif /* __GNUC__ */
-
-#define OK
-
-#endif /* MSDOS && !QUICKWIN */
-
-
-#ifndef OK
-
-/* Default version -- for real operating systems and for Standard C */
-
-#include <stdio.h>
-#include <string.h>
-#include <signal.h>
-
-static int interrupted;
-
-void
-PyErr_SetInterrupt(void)
-{
- interrupted = 1;
-}
-
-extern int PyErr_CheckSignals(void);
-
-static int
-checksignals_witharg(void * arg)
-{
- return PyErr_CheckSignals();
-}
-
-static void
-intcatcher(int sig)
-{
- extern void Py_Exit(int);
- static char message[] =
-"python: to interrupt a truly hanging Python program, interrupt once more.\n";
- switch (interrupted++) {
- case 0:
- break;
- case 1:
- write(2, message, strlen(message));
- break;
- case 2:
- interrupted = 0;
- Py_Exit(1);
- break;
- }
- PyOS_setsig(SIGINT, intcatcher);
- Py_AddPendingCall(checksignals_witharg, NULL);
-}
-
-static void (*old_siginthandler)(int) = SIG_DFL;
-
-void
-PyOS_InitInterrupts(void)
-{
- if ((old_siginthandler = PyOS_setsig(SIGINT, SIG_IGN)) != SIG_IGN)
- PyOS_setsig(SIGINT, intcatcher);
-}
-
-void
-PyOS_FiniInterrupts(void)
-{
- PyOS_setsig(SIGINT, old_siginthandler);
-}
-
-int
-PyOS_InterruptOccurred(void)
-{
- if (!interrupted)
- return 0;
- interrupted = 0;
- return 1;
-}
-
-#endif /* !OK */
-
-void
-PyOS_AfterFork(void)
-{
-#ifdef WITH_THREAD
- PyEval_ReInitThreads();
- PyThread_ReInitTLS();
-#endif
-}
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
index 73e7e3c..f22ac67 100644
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -13,7 +13,7 @@
/* Forward */
static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
-static void initerr(perrdetail *err_ret, const char* filename);
+static int initerr(perrdetail *err_ret, const char* filename);
/* Parse input coming from a string. Return error code, print some errors. */
node *
@@ -48,7 +48,8 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
struct tok_state *tok;
int exec_input = start == file_input;
- initerr(err_ret, filename);
+ if (initerr(err_ret, filename) < 0)
+ return NULL;
if (*flags & PyPARSE_IGNORE_COOKIE)
tok = PyTokenizer_FromUTF8(s, exec_input);
@@ -59,7 +60,10 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
return NULL;
}
- tok->filename = filename ? filename : "<string>";
+#ifndef PGEN
+ Py_INCREF(err_ret->filename);
+ tok->filename = err_ret->filename;
+#endif
return parsetok(tok, g, start, err_ret, flags);
}
@@ -90,13 +94,17 @@ PyParser_ParseFileFlagsEx(FILE *fp, const char *filename,
{
struct tok_state *tok;
- initerr(err_ret, filename);
+ if (initerr(err_ret, filename) < 0)
+ return NULL;
if ((tok = PyTokenizer_FromFile(fp, (char *)enc, ps1, ps2)) == NULL) {
err_ret->error = E_NOMEM;
return NULL;
}
- tok->filename = filename;
+#ifndef PGEN
+ Py_INCREF(err_ret->filename);
+ tok->filename = err_ret->filename;
+#endif
return parsetok(tok, g, start, err_ret, flags);
}
@@ -127,7 +135,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
{
parser_state *ps;
node *n;
- int started = 0, handling_import = 0, handling_with = 0;
+ int started = 0;
if ((ps = PyParser_New(g, start)) == NULL) {
fprintf(stderr, "no mem for new parser\n");
@@ -154,7 +162,6 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
}
if (type == ENDMARKER && started) {
type = NEWLINE; /* Add an extra newline */
- handling_with = handling_import = 0;
started = 0;
/* Add the right number of dedent tokens,
except if a certain flag is given --
@@ -227,7 +234,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
PyParser_Delete(ps);
if (n == NULL) {
- if (tok->lineno <= 1 && tok->done == E_EOF)
+ if (tok->done == E_EOF)
err_ret->error = E_EOF;
err_ret->lineno = tok->lineno;
if (tok->buf != NULL) {
@@ -270,14 +277,24 @@ done:
return n;
}
-static void
+static int
initerr(perrdetail *err_ret, const char *filename)
{
err_ret->error = E_OK;
- err_ret->filename = filename;
err_ret->lineno = 0;
err_ret->offset = 0;
err_ret->text = NULL;
err_ret->token = -1;
err_ret->expected = -1;
+#ifndef PGEN
+ if (filename)
+ err_ret->filename = PyUnicode_DecodeFSDefault(filename);
+ else
+ err_ret->filename = PyUnicode_FromString("<string>");
+ if (err_ret->filename == NULL) {
+ err_ret->error = E_ERROR;
+ return -1;
+ }
+#endif
+ return 0;
}
diff --git a/Parser/parsetok_pgen.c b/Parser/parsetok_pgen.c
new file mode 100644
index 0000000..97b9288
--- /dev/null
+++ b/Parser/parsetok_pgen.c
@@ -0,0 +1,2 @@
+#define PGEN
+#include "parsetok.c"
diff --git a/Parser/pgenmain.c b/Parser/pgenmain.c
index 4b7b55a..52b8380 100644
--- a/Parser/pgenmain.c
+++ b/Parser/pgenmain.c
@@ -29,6 +29,8 @@ int Py_IgnoreEnvironmentFlag;
/* Forward */
grammar *getgrammar(char *filename);
+void Py_Exit(int) _Py_NO_RETURN;
+
void
Py_Exit(int sts)
{
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 5ba12a4..c3b2f35 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -128,7 +128,6 @@ tok_new(void)
tok->prompt = tok->nextprompt = NULL;
tok->lineno = 0;
tok->level = 0;
- tok->filename = NULL;
tok->altwarning = 1;
tok->alterror = 1;
tok->alttabsize = 1;
@@ -140,6 +139,7 @@ tok_new(void)
tok->encoding = NULL;
tok->cont_line = 0;
#ifndef PGEN
+ tok->filename = NULL;
tok->decoding_readline = NULL;
tok->decoding_buffer = NULL;
#endif
@@ -462,6 +462,8 @@ static int
fp_setreadl(struct tok_state *tok, const char* enc)
{
PyObject *readline = NULL, *stream = NULL, *io = NULL;
+ _Py_IDENTIFIER(open);
+ _Py_IDENTIFIER(readline);
int fd;
io = PyImport_ImportModuleNoBlock("io");
@@ -474,13 +476,13 @@ fp_setreadl(struct tok_state *tok, const char* enc)
goto cleanup;
}
- stream = PyObject_CallMethod(io, "open", "isisOOO",
+ stream = _PyObject_CallMethodId(io, &PyId_open, "isisOOO",
fd, "r", -1, enc, Py_None, Py_None, Py_False);
if (stream == NULL)
goto cleanup;
Py_XDECREF(tok->decoding_readline);
- readline = PyObject_GetAttrString(stream, "readline");
+ readline = _PyObject_GetAttrId(stream, &PyId_readline);
tok->decoding_readline = readline;
/* The file has been reopened; parsing will restart from
@@ -545,7 +547,6 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
{
char *line = NULL;
int badchar = 0;
- PyObject *filename;
for (;;) {
if (tok->decoding_state == STATE_NORMAL) {
/* We already have a codec associated with
@@ -586,19 +587,12 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
if (badchar) {
/* Need to add 1 to the line number, since this line
has not been counted, yet. */
- if (tok->filename != NULL)
- filename = PyUnicode_DecodeFSDefault(tok->filename);
- else
- filename = PyUnicode_FromString("<file>");
- if (filename != NULL) {
- PyErr_Format(PyExc_SyntaxError,
- "Non-UTF-8 code starting with '\\x%.2x' "
- "in file %U on line %i, "
- "but no encoding declared; "
- "see http://python.org/dev/peps/pep-0263/ for details",
- badchar, filename, tok->lineno + 1);
- Py_DECREF(filename);
- }
+ PyErr_Format(PyExc_SyntaxError,
+ "Non-UTF-8 code starting with '\\x%.2x' "
+ "in file %U on line %i, "
+ "but no encoding declared; "
+ "see http://python.org/dev/peps/pep-0263/ for details",
+ badchar, tok->filename, tok->lineno + 1);
return error_ret(tok);
}
#endif
@@ -856,6 +850,7 @@ PyTokenizer_Free(struct tok_state *tok)
#ifndef PGEN
Py_XDECREF(tok->decoding_readline);
Py_XDECREF(tok->decoding_buffer);
+ Py_XDECREF(tok->filename);
#endif
if (tok->fp != NULL && tok->buf != NULL)
PyMem_FREE(tok->buf);
@@ -1250,8 +1245,13 @@ indenterror(struct tok_state *tok)
return 1;
}
if (tok->altwarning) {
- PySys_WriteStderr("%s: inconsistent use of tabs and spaces "
+#ifdef PGEN
+ PySys_WriteStderr("inconsistent use of tabs and spaces "
+ "in indentation\n");
+#else
+ PySys_FormatStderr("%U: inconsistent use of tabs and spaces "
"in indentation\n", tok->filename);
+#endif
tok->altwarning = 0;
}
return 0;
@@ -1260,14 +1260,16 @@ indenterror(struct tok_state *tok)
#ifdef PGEN
#define verify_identifier(tok) 1
#else
-/* Verify that the identifier follows PEP 3131. */
+/* Verify that the identifier follows PEP 3131.
+ All identifier strings are guaranteed to be "ready" unicode objects.
+ */
static int
verify_identifier(struct tok_state *tok)
{
PyObject *s;
int result;
s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
- if (s == NULL) {
+ if (s == NULL || PyUnicode_READY(s) == -1) {
if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
PyErr_Clear();
tok->done = E_IDENTIFIER;
@@ -1692,17 +1694,18 @@ PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
return result;
}
-/* Get -*- encoding -*- from a Python file.
+/* Get the encoding of a Python file. Check for the coding cookie and check if
+ the file starts with a BOM.
- PyTokenizer_FindEncoding returns NULL when it can't find the encoding in
- the first or second line of the file (in which case the encoding
- should be assumed to be PyUnicode_GetDefaultEncoding()).
+ PyTokenizer_FindEncodingFilename() returns NULL when it can't find the
+ encoding in the first or second line of the file (in which case the encoding
+ should be assumed to be UTF-8).
+
+ The char* returned is malloc'ed via PyMem_MALLOC() and thus must be freed
+ by the caller. */
- The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed
- by the caller.
-*/
char *
-PyTokenizer_FindEncoding(int fd)
+PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
{
struct tok_state *tok;
FILE *fp;
@@ -1721,6 +1724,20 @@ PyTokenizer_FindEncoding(int fd)
fclose(fp);
return NULL;
}
+#ifndef PGEN
+ if (filename != NULL) {
+ Py_INCREF(filename);
+ tok->filename = filename;
+ }
+ else {
+ tok->filename = PyUnicode_FromString("<string>");
+ if (tok->filename == NULL) {
+ fclose(fp);
+ PyTokenizer_Free(tok);
+ return encoding;
+ }
+ }
+#endif
while (tok->lineno < 2 && tok->done == E_OK) {
PyTokenizer_Get(tok, &p_start, &p_end);
}
@@ -1734,6 +1751,12 @@ PyTokenizer_FindEncoding(int fd)
return encoding;
}
+char *
+PyTokenizer_FindEncoding(int fd)
+{
+ return PyTokenizer_FindEncodingFilename(fd, NULL);
+}
+
#ifdef Py_DEBUG
void
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index 2be3bf2..ed1f3aa 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -40,7 +40,13 @@ struct tok_state {
int level; /* () [] {} Parentheses nesting level */
/* Used to allow free continuations inside them */
/* Stuff for checking on different tab sizes */
- const char *filename; /* encoded to the filesystem encoding */
+#ifndef PGEN
+ /* pgen doesn't have access to Python codecs, it cannot decode the input
+ filename. The bytes filename might be kept, but it is only used by
+ indenterror() and it is not really needed: pgen only compiles one file
+ (Grammar/Grammar). */
+ PyObject *filename;
+#endif
int altwarning; /* Issue warning if alternate tabs don't match */
int alterror; /* Issue error if alternate tabs don't match */
int alttabsize; /* Alternate tab spacing */
@@ -69,7 +75,6 @@ extern void PyTokenizer_Free(struct tok_state *);
extern int PyTokenizer_Get(struct tok_state *, char **, char **);
extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok,
int len, int *offset);
-extern char * PyTokenizer_FindEncoding(int);
#ifdef __cplusplus
}