summaryrefslogtreecommitdiffstats
path: root/Parser
diff options
context:
space:
mode:
Diffstat (limited to 'Parser')
-rw-r--r--Parser/Python.asdl4
-rwxr-xr-xParser/asdl_c.py8
-rw-r--r--Parser/grammar.mak45
-rw-r--r--Parser/myreadline.c2
-rw-r--r--Parser/parsetok.c19
-rw-r--r--Parser/pgenmain.c2
-rw-r--r--Parser/printgrammar.c2
-rw-r--r--Parser/tokenizer.c140
-rw-r--r--Parser/tokenizer.h7
9 files changed, 131 insertions, 98 deletions
diff --git a/Parser/Python.asdl b/Parser/Python.asdl
index 789e07b..9407b2f 100644
--- a/Parser/Python.asdl
+++ b/Parser/Python.asdl
@@ -17,7 +17,7 @@ module Python version "$Revision$"
expr? starargs,
expr? kwargs,
stmt* body,
- expr *decorator_list)
+ expr* decorator_list)
| Return(expr? value)
| Delete(expr* targets)
@@ -36,7 +36,7 @@ module Python version "$Revision$"
| Assert(expr test, expr? msg)
| Import(alias* names)
- | ImportFrom(identifier module, alias* names, int? level)
+ | ImportFrom(identifier? module, alias* names, int? level)
| Global(identifier* names)
| Nonlocal(identifier* names)
diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py
index 6df11f5..d6555d6 100755
--- a/Parser/asdl_c.py
+++ b/Parser/asdl_c.py
@@ -376,6 +376,7 @@ class Obj2ModVisitor(PickleVisitor):
self.emit(format % error, 1, reflow=False)
if add_label:
self.emit("failed:", 1)
+ self.emit("Py_XDECREF(tmp);", 1)
self.emit("return 1;", 1)
self.emit("}", 0)
self.emit("", 0)
@@ -720,7 +721,7 @@ static PyTypeObject* make_type(char *type, PyTypeObject* base, char**fields, int
}
PyTuple_SET_ITEM(fnames, i, field);
}
- result = PyObject_CallFunction((PyObject*)&PyType_Type, "U(O){sOss}",
+ result = PyObject_CallFunction((PyObject*)&PyType_Type, "s(O){sOss}",
type, base, "_fields", fnames, "__module__", "_ast");
Py_DECREF(fnames);
return (PyTypeObject*)result;
@@ -730,8 +731,9 @@ static int add_attributes(PyTypeObject* type, char**attrs, int num_fields)
{
int i, result;
PyObject *s, *l = PyTuple_New(num_fields);
- if (!l) return 0;
- for(i = 0; i < num_fields; i++) {
+ if (!l)
+ return 0;
+ for (i = 0; i < num_fields; i++) {
s = PyUnicode_FromString(attrs[i]);
if (!s) {
Py_DECREF(l);
diff --git a/Parser/grammar.mak b/Parser/grammar.mak
deleted file mode 100644
index 55f028f..0000000
--- a/Parser/grammar.mak
+++ /dev/null
@@ -1,45 +0,0 @@
-# This manages to rebuild graminit.{h, c} under MSVC 6 (Windows), via
-#
-# nmake /f grammar.mak
-#
-# You may also need to copy python23.dll into this directory, or get
-# it on your search path.
-#
-# The intermediate files can be nuked afterwards:
-#
-# nmake /f grammar.mak clean
-#
-# I don't understand the maze of preprocessor #define's on Windows, and
-# as a result this requires linking with python23.lib, so it's of no use
-# for bootstrapping (the cause appears to be a useless-- in this
-# particular case --pragma in PC\pyconfig.h, which demands that
-# python23.lib get linked in).
-
-LIBS= ..\PCbuild\python25.lib
-
-CFLAGS= /I ..\Include /I ..\PC /D MS_NO_COREDLL /D PGEN /MD
-
-GRAMMAR_H= ..\Include\graminit.h
-GRAMMAR_C= ..\Python\graminit.c
-GRAMMAR_INPUT= ..\Grammar\Grammar
-
-PGEN= pgen.exe
-
-POBJS= acceler.obj grammar1.obj listnode.obj node.obj parser.obj \
- parsetok.obj tokenizer.obj bitset.obj metagrammar.obj
-
-PARSER_OBJS= $(POBJS) myreadline.obj
-
-PGOBJS= firstsets.obj grammar.obj pgen.obj printgrammar.obj pgenmain.obj
-
-PGENOBJS= $(POBJS) $(PGOBJS)
-
-$(GRAMMAR_H) $(GRAMMAR_C): $(PGEN) $(GRAMMAR_INPUT)
- $(PGEN) $(GRAMMAR_INPUT) $(GRAMMAR_H) $(GRAMMAR_C)
-
-$(PGEN): $(PGENOBJS)
- $(CC) $(PGENOBJS) $(LIBS) /Fe$(PGEN)
-
-clean:
- del *.obj
- del $(PGEN)
diff --git a/Parser/myreadline.c b/Parser/myreadline.c
index 7166fc1..50802c3 100644
--- a/Parser/myreadline.c
+++ b/Parser/myreadline.c
@@ -87,7 +87,7 @@ my_fgets(char *buf, int len, FILE *fp)
#endif
if (s < 0)
return 1;
- /* try again */
+ /* try again */
continue;
}
#endif
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
index 16cf5cb..7636a54 100644
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -46,13 +46,14 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
perrdetail *err_ret, int *flags)
{
struct tok_state *tok;
+ int exec_input = start == file_input;
initerr(err_ret, filename);
if (*flags & PyPARSE_IGNORE_COOKIE)
- tok = PyTokenizer_FromUTF8(s);
+ tok = PyTokenizer_FromUTF8(s, exec_input);
else
- tok = PyTokenizer_FromString(s);
+ tok = PyTokenizer_FromString(s, exec_input);
if (tok == NULL) {
err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
return NULL;
@@ -240,16 +241,24 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
}
}
} else if (tok->encoding != NULL) {
+ /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
+ * allocated using PyMem_
+ */
node* r = PyNode_New(encoding_decl);
- if (!r) {
+ if (r)
+ r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
+ if (!r || !r->n_str) {
err_ret->error = E_NOMEM;
+ if (r)
+ PyObject_FREE(r);
n = NULL;
goto done;
}
- r->n_str = tok->encoding;
+ strcpy(r->n_str, tok->encoding);
+ PyMem_FREE(tok->encoding);
+ tok->encoding = NULL;
r->n_nchildren = 1;
r->n_child = n;
- tok->encoding = NULL;
n = r;
}
diff --git a/Parser/pgenmain.c b/Parser/pgenmain.c
index 88fa7f1..4b7b55a 100644
--- a/Parser/pgenmain.c
+++ b/Parser/pgenmain.c
@@ -13,6 +13,8 @@
- check for duplicate definitions of names (instead of fatal err)
*/
+#define PGEN
+
#include "Python.h"
#include "pgenheaders.h"
#include "grammar.h"
diff --git a/Parser/printgrammar.c b/Parser/printgrammar.c
index 01f552f..dd7e6ae 100644
--- a/Parser/printgrammar.c
+++ b/Parser/printgrammar.c
@@ -1,6 +1,8 @@
/* Print a bunch of C initializers that represent a grammar */
+#define PGEN
+
#include "pgenheaders.h"
#include "grammar.h"
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index d985131..3f6be2f 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -119,6 +119,7 @@ tok_new(void)
tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
tok->done = E_OK;
tok->fp = NULL;
+ tok->input = NULL;
tok->tabsize = TABSIZE;
tok->indent = 0;
tok->indstack[0] = 0;
@@ -145,6 +146,17 @@ tok_new(void)
return tok;
}
+static char *
+new_string(const char *s, Py_ssize_t len)
+{
+ char* result = (char *)PyMem_MALLOC(len + 1);
+ if (result != NULL) {
+ memcpy(result, s, len);
+ result[len] = '\0';
+ }
+ return result;
+}
+
#ifdef PGEN
static char *
@@ -159,10 +171,10 @@ decoding_feof(struct tok_state *tok)
return feof(tok->fp);
}
-static const char *
-decode_str(const char *str, struct tok_state *tok)
+static char *
+decode_str(const char *str, int exec_input, struct tok_state *tok)
{
- return str;
+ return new_string(str, strlen(str));
}
#else /* PGEN */
@@ -177,16 +189,6 @@ error_ret(struct tok_state *tok) /* XXX */
return NULL; /* as if it were EOF */
}
-static char *
-new_string(const char *s, Py_ssize_t len)
-{
- char* result = (char *)PyMem_MALLOC(len + 1);
- if (result != NULL) {
- memcpy(result, s, len);
- result[len] = '\0';
- }
- return result;
-}
static char *
get_normal_name(char *s) /* for utf-8 and latin-1 */
@@ -243,7 +245,7 @@ get_coding_spec(const char *s, Py_ssize_t size)
} while (t[0] == '\x20' || t[0] == '\t');
begin = t;
- while (isalnum(Py_CHARMASK(t[0])) ||
+ while (Py_ISALNUM(t[0]) ||
t[0] == '-' || t[0] == '_' || t[0] == '.')
t++;
@@ -460,17 +462,20 @@ static int
fp_setreadl(struct tok_state *tok, const char* enc)
{
PyObject *readline = NULL, *stream = NULL, *io = NULL;
+ int fd;
io = PyImport_ImportModuleNoBlock("io");
if (io == NULL)
goto cleanup;
- if (tok->filename)
- stream = PyObject_CallMethod(io, "open", "ssis",
- tok->filename, "r", -1, enc);
- else
- stream = PyObject_CallMethod(io, "open", "isisOOO",
- fileno(tok->fp), "r", -1, enc, Py_None, Py_None, Py_False);
+ fd = fileno(tok->fp);
+ if (lseek(fd, 0, SEEK_SET) == (off_t)-1) {
+ PyErr_SetFromErrnoWithFilename(PyExc_OSError, NULL);
+ goto cleanup;
+ }
+
+ stream = PyObject_CallMethod(io, "open", "isisOOO",
+ fd, "r", -1, enc, Py_None, Py_None, Py_False);
if (stream == NULL)
goto cleanup;
@@ -540,6 +545,7 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
{
char *line = NULL;
int badchar = 0;
+ PyObject *filename;
for (;;) {
if (tok->decoding_state == STATE_NORMAL) {
/* We already have a codec associated with
@@ -578,16 +584,18 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
}
}
if (badchar) {
- char buf[500];
/* Need to add 1 to the line number, since this line
has not been counted, yet. */
- sprintf(buf,
- "Non-UTF-8 code starting with '\\x%.2x' "
- "in file %.200s on line %i, "
- "but no encoding declared; "
- "see http://python.org/dev/peps/pep-0263/ for details",
- badchar, tok->filename, tok->lineno + 1);
- PyErr_SetString(PyExc_SyntaxError, buf);
+ filename = PyUnicode_DecodeFSDefault(tok->filename);
+ if (filename != NULL) {
+ PyErr_Format(PyExc_SyntaxError,
+ "Non-UTF-8 code starting with '\\x%.2x' "
+ "in file %U on line %i, "
+ "but no encoding declared; "
+ "see http://python.org/dev/peps/pep-0263/ for details",
+ badchar, filename, tok->lineno + 1);
+ Py_DECREF(filename);
+ }
return error_ret(tok);
}
#endif
@@ -652,17 +660,62 @@ translate_into_utf8(const char* str, const char* enc) {
return utf8;
}
+
+static char *
+translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
+ int skip_next_lf = 0, needed_length = strlen(s) + 2, final_length;
+ char *buf, *current;
+ char c = '\0';
+ buf = PyMem_MALLOC(needed_length);
+ if (buf == NULL) {
+ tok->done = E_NOMEM;
+ return NULL;
+ }
+ for (current = buf; *s; s++, current++) {
+ c = *s;
+ if (skip_next_lf) {
+ skip_next_lf = 0;
+ if (c == '\n') {
+ c = *++s;
+ if (!c)
+ break;
+ }
+ }
+ if (c == '\r') {
+ skip_next_lf = 1;
+ c = '\n';
+ }
+ *current = c;
+ }
+ /* If this is exec input, add a newline to the end of the string if
+ there isn't one already. */
+ if (exec_input && c != '\n') {
+ *current = '\n';
+ current++;
+ }
+ *current = '\0';
+ final_length = current - buf + 1;
+ if (final_length < needed_length && final_length)
+ /* should never fail */
+ buf = PyMem_REALLOC(buf, final_length);
+ return buf;
+}
+
/* Decode a byte string STR for use as the buffer of TOK.
Look for encoding declarations inside STR, and record them
inside TOK. */
static const char *
-decode_str(const char *str, struct tok_state *tok)
+decode_str(const char *input, int single, struct tok_state *tok)
{
PyObject* utf8 = NULL;
+ const char *str;
const char *s;
const char *newl[2] = {NULL, NULL};
int lineno = 0;
+ tok->input = str = translate_newlines(input, single, tok);
+ if (str == NULL)
+ return NULL;
tok->enc = NULL;
tok->str = str;
if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
@@ -713,12 +766,12 @@ decode_str(const char *str, struct tok_state *tok)
/* Set up tokenizer for string */
struct tok_state *
-PyTokenizer_FromString(const char *str)
+PyTokenizer_FromString(const char *str, int exec_input)
{
struct tok_state *tok = tok_new();
if (tok == NULL)
return NULL;
- str = (char *)decode_str(str, tok);
+ str = (char *)decode_str(str, exec_input, tok);
if (str == NULL) {
PyTokenizer_Free(tok);
return NULL;
@@ -730,11 +783,18 @@ PyTokenizer_FromString(const char *str)
}
struct tok_state *
-PyTokenizer_FromUTF8(const char *str)
+PyTokenizer_FromUTF8(const char *str, int exec_input)
{
struct tok_state *tok = tok_new();
if (tok == NULL)
return NULL;
+#ifndef PGEN
+ tok->input = str = translate_newlines(str, exec_input, tok);
+#endif
+ if (str == NULL) {
+ PyTokenizer_Free(tok);
+ return NULL;
+ }
tok->decoding_state = STATE_RAW;
tok->read_coding_spec = 1;
tok->enc = NULL;
@@ -751,7 +811,6 @@ PyTokenizer_FromUTF8(const char *str)
return tok;
}
-
/* Set up tokenizer for file */
struct tok_state *
@@ -797,6 +856,8 @@ PyTokenizer_Free(struct tok_state *tok)
#endif
if (tok->fp != NULL && tok->buf != NULL)
PyMem_FREE(tok->buf);
+ if (tok->input)
+ PyMem_FREE((char *)tok->input);
PyMem_FREE(tok);
}
@@ -832,6 +893,13 @@ tok_nextc(register struct tok_state *tok)
if (tok->prompt != NULL) {
char *newtok = PyOS_Readline(stdin, stdout, tok->prompt);
#ifndef PGEN
+ if (newtok != NULL) {
+ char *translated = translate_newlines(newtok, 0, tok);
+ PyMem_FREE(newtok);
+ if (translated == NULL)
+ return EOF;
+ newtok = translated;
+ }
if (tok->encoding && newtok && *newtok) {
/* Recode to UTF-8 */
Py_ssize_t buflen;
@@ -1407,10 +1475,8 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
c = tok_nextc(tok);
if (c == '.')
goto fraction;
-#ifndef WITHOUT_COMPLEX
if (c == 'j' || c == 'J')
goto imaginary;
-#endif
if (c == 'x' || c == 'X') {
/* Hex */
@@ -1462,10 +1528,8 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
goto fraction;
else if (c == 'e' || c == 'E')
goto exponent;
-#ifndef WITHOUT_COMPLEX
else if (c == 'j' || c == 'J')
goto imaginary;
-#endif
else if (nonzero) {
tok->done = E_TOKEN;
tok_backup(tok, c);
@@ -1502,12 +1566,10 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
c = tok_nextc(tok);
} while (isdigit(c));
}
-#ifndef WITHOUT_COMPLEX
if (c == 'j' || c == 'J')
/* Imaginary part */
imaginary:
c = tok_nextc(tok);
-#endif
}
}
tok_backup(tok, c);
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index 6c1742f..2be3bf2 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -53,15 +53,16 @@ struct tok_state {
int cont_line; /* whether we are in a continuation line. */
const char* line_start; /* pointer to start of current line */
#ifndef PGEN
- PyObject *decoding_readline; /* codecs.open(...).readline */
+ PyObject *decoding_readline; /* open(...).readline */
PyObject *decoding_buffer;
#endif
const char* enc; /* Encoding for the current str. */
const char* str;
+ const char* input; /* Tokenizer's newline translated copy of the string. */
};
-extern struct tok_state *PyTokenizer_FromString(const char *);
-extern struct tok_state *PyTokenizer_FromUTF8(const char *);
+extern struct tok_state *PyTokenizer_FromString(const char *, int);
+extern struct tok_state *PyTokenizer_FromUTF8(const char *, int);
extern struct tok_state *PyTokenizer_FromFile(FILE *, char*,
char *, char *);
extern void PyTokenizer_Free(struct tok_state *);