summaryrefslogtreecommitdiffstats
path: root/Parser/tokenizer.c
diff options
context:
space:
mode:
Diffstat (limited to 'Parser/tokenizer.c')
-rw-r--r--Parser/tokenizer.c2768
1 files changed, 1384 insertions, 1384 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index aef081d..90b1b68 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -19,17 +19,17 @@
#endif /* PGEN */
#define is_potential_identifier_start(c) (\
- (c >= 'a' && c <= 'z')\
- || (c >= 'A' && c <= 'Z')\
- || c == '_'\
- || (c >= 128))
+ (c >= 'a' && c <= 'z')\
+ || (c >= 'A' && c <= 'Z')\
+ || c == '_'\
+ || (c >= 128))
#define is_potential_identifier_char(c) (\
- (c >= 'a' && c <= 'z')\
- || (c >= 'A' && c <= 'Z')\
- || (c >= '0' && c <= '9')\
- || c == '_'\
- || (c >= 128))
+ (c >= 'a' && c <= 'z')\
+ || (c >= 'A' && c <= 'Z')\
+ || (c >= '0' && c <= '9')\
+ || c == '_'\
+ || (c >= 128))
extern char *PyOS_Readline(FILE *, FILE *, char *);
/* Return malloc'ed string including trailing \n;
@@ -48,62 +48,62 @@ static void tok_backup(struct tok_state *tok, int c);
/* Token names */
char *_PyParser_TokenNames[] = {
- "ENDMARKER",
- "NAME",
- "NUMBER",
- "STRING",
- "NEWLINE",
- "INDENT",
- "DEDENT",
- "LPAR",
- "RPAR",
- "LSQB",
- "RSQB",
- "COLON",
- "COMMA",
- "SEMI",
- "PLUS",
- "MINUS",
- "STAR",
- "SLASH",
- "VBAR",
- "AMPER",
- "LESS",
- "GREATER",
- "EQUAL",
- "DOT",
- "PERCENT",
- "LBRACE",
- "RBRACE",
- "EQEQUAL",
- "NOTEQUAL",
- "LESSEQUAL",
- "GREATEREQUAL",
- "TILDE",
- "CIRCUMFLEX",
- "LEFTSHIFT",
- "RIGHTSHIFT",
- "DOUBLESTAR",
- "PLUSEQUAL",
- "MINEQUAL",
- "STAREQUAL",
- "SLASHEQUAL",
- "PERCENTEQUAL",
- "AMPEREQUAL",
- "VBAREQUAL",
- "CIRCUMFLEXEQUAL",
- "LEFTSHIFTEQUAL",
- "RIGHTSHIFTEQUAL",
- "DOUBLESTAREQUAL",
- "DOUBLESLASH",
- "DOUBLESLASHEQUAL",
- "AT",
- "RARROW",
- "ELLIPSIS",
- /* This table must match the #defines in token.h! */
- "OP",
- "<ERRORTOKEN>",
- "<N_TOKENS>"
+ "ENDMARKER",
+ "NAME",
+ "NUMBER",
+ "STRING",
+ "NEWLINE",
+ "INDENT",
+ "DEDENT",
+ "LPAR",
+ "RPAR",
+ "LSQB",
+ "RSQB",
+ "COLON",
+ "COMMA",
+ "SEMI",
+ "PLUS",
+ "MINUS",
+ "STAR",
+ "SLASH",
+ "VBAR",
+ "AMPER",
+ "LESS",
+ "GREATER",
+ "EQUAL",
+ "DOT",
+ "PERCENT",
+ "LBRACE",
+ "RBRACE",
+ "EQEQUAL",
+ "NOTEQUAL",
+ "LESSEQUAL",
+ "GREATEREQUAL",
+ "TILDE",
+ "CIRCUMFLEX",
+ "LEFTSHIFT",
+ "RIGHTSHIFT",
+ "DOUBLESTAR",
+ "PLUSEQUAL",
+ "MINEQUAL",
+ "STAREQUAL",
+ "SLASHEQUAL",
+ "PERCENTEQUAL",
+ "AMPEREQUAL",
+ "VBAREQUAL",
+ "CIRCUMFLEXEQUAL",
+ "LEFTSHIFTEQUAL",
+ "RIGHTSHIFTEQUAL",
+ "DOUBLESTAREQUAL",
+ "DOUBLESLASH",
+ "DOUBLESLASHEQUAL",
+ "AT",
+ "RARROW",
+ "ELLIPSIS",
+ /* This table must match the #defines in token.h! */
+ "OP",
+ "<ERRORTOKEN>",
+ "<N_TOKENS>"
};
@@ -112,49 +112,49 @@ char *_PyParser_TokenNames[] = {
static struct tok_state *
tok_new(void)
{
- struct tok_state *tok = (struct tok_state *)PyMem_MALLOC(
- sizeof(struct tok_state));
- if (tok == NULL)
- return NULL;
- tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
- tok->done = E_OK;
- tok->fp = NULL;
- tok->input = NULL;
- tok->tabsize = TABSIZE;
- tok->indent = 0;
- tok->indstack[0] = 0;
- tok->atbol = 1;
- tok->pendin = 0;
- tok->prompt = tok->nextprompt = NULL;
- tok->lineno = 0;
- tok->level = 0;
- tok->filename = NULL;
- tok->altwarning = 1;
- tok->alterror = 1;
- tok->alttabsize = 1;
- tok->altindstack[0] = 0;
- tok->decoding_state = STATE_INIT;
- tok->decoding_erred = 0;
- tok->read_coding_spec = 0;
- tok->enc = NULL;
- tok->encoding = NULL;
- tok->cont_line = 0;
+ struct tok_state *tok = (struct tok_state *)PyMem_MALLOC(
+ sizeof(struct tok_state));
+ if (tok == NULL)
+ return NULL;
+ tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
+ tok->done = E_OK;
+ tok->fp = NULL;
+ tok->input = NULL;
+ tok->tabsize = TABSIZE;
+ tok->indent = 0;
+ tok->indstack[0] = 0;
+ tok->atbol = 1;
+ tok->pendin = 0;
+ tok->prompt = tok->nextprompt = NULL;
+ tok->lineno = 0;
+ tok->level = 0;
+ tok->filename = NULL;
+ tok->altwarning = 1;
+ tok->alterror = 1;
+ tok->alttabsize = 1;
+ tok->altindstack[0] = 0;
+ tok->decoding_state = STATE_INIT;
+ tok->decoding_erred = 0;
+ tok->read_coding_spec = 0;
+ tok->enc = NULL;
+ tok->encoding = NULL;
+ tok->cont_line = 0;
#ifndef PGEN
- tok->decoding_readline = NULL;
- tok->decoding_buffer = NULL;
+ tok->decoding_readline = NULL;
+ tok->decoding_buffer = NULL;
#endif
- return tok;
+ return tok;
}
static char *
new_string(const char *s, Py_ssize_t len)
{
- char* result = (char *)PyMem_MALLOC(len + 1);
- if (result != NULL) {
- memcpy(result, s, len);
- result[len] = '\0';
- }
- return result;
+ char* result = (char *)PyMem_MALLOC(len + 1);
+ if (result != NULL) {
+ memcpy(result, s, len);
+ result[len] = '\0';
+ }
+ return result;
}
#ifdef PGEN
@@ -162,19 +162,19 @@ new_string(const char *s, Py_ssize_t len)
static char *
decoding_fgets(char *s, int size, struct tok_state *tok)
{
- return fgets(s, size, tok->fp);
+ return fgets(s, size, tok->fp);
}
static int
decoding_feof(struct tok_state *tok)
{
- return feof(tok->fp);
+ return feof(tok->fp);
}
static char *
decode_str(const char *str, int exec_input, struct tok_state *tok)
{
- return new_string(str, strlen(str));
+ return new_string(str, strlen(str));
}
#else /* PGEN */
@@ -182,41 +182,41 @@ decode_str(const char *str, int exec_input, struct tok_state *tok)
static char *
error_ret(struct tok_state *tok) /* XXX */
{
- tok->decoding_erred = 1;
- if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */
- PyMem_FREE(tok->buf);
- tok->buf = NULL;
- return NULL; /* as if it were EOF */
+ tok->decoding_erred = 1;
+ if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */
+ PyMem_FREE(tok->buf);
+ tok->buf = NULL;
+ return NULL; /* as if it were EOF */
}
static char *
-get_normal_name(char *s) /* for utf-8 and latin-1 */
+get_normal_name(char *s) /* for utf-8 and latin-1 */
{
- char buf[13];
- int i;
- for (i = 0; i < 12; i++) {
- int c = s[i];
- if (c == '\0')
- break;
- else if (c == '_')
- buf[i] = '-';
- else
- buf[i] = tolower(c);
- }
- buf[i] = '\0';
- if (strcmp(buf, "utf-8") == 0 ||
- strncmp(buf, "utf-8-", 6) == 0)
- return "utf-8";
- else if (strcmp(buf, "latin-1") == 0 ||
- strcmp(buf, "iso-8859-1") == 0 ||
- strcmp(buf, "iso-latin-1") == 0 ||
- strncmp(buf, "latin-1-", 8) == 0 ||
- strncmp(buf, "iso-8859-1-", 11) == 0 ||
- strncmp(buf, "iso-latin-1-", 12) == 0)
- return "iso-8859-1";
- else
- return s;
+ char buf[13];
+ int i;
+ for (i = 0; i < 12; i++) {
+ int c = s[i];
+ if (c == '\0')
+ break;
+ else if (c == '_')
+ buf[i] = '-';
+ else
+ buf[i] = tolower(c);
+ }
+ buf[i] = '\0';
+ if (strcmp(buf, "utf-8") == 0 ||
+ strncmp(buf, "utf-8-", 6) == 0)
+ return "utf-8";
+ else if (strcmp(buf, "latin-1") == 0 ||
+ strcmp(buf, "iso-8859-1") == 0 ||
+ strcmp(buf, "iso-latin-1") == 0 ||
+ strncmp(buf, "latin-1-", 8) == 0 ||
+ strncmp(buf, "iso-8859-1-", 11) == 0 ||
+ strncmp(buf, "iso-latin-1-", 12) == 0)
+ return "iso-8859-1";
+ else
+ return s;
}
/* Return the coding spec in S, or NULL if none is found. */
@@ -224,43 +224,43 @@ get_normal_name(char *s) /* for utf-8 and latin-1 */
static char *
get_coding_spec(const char *s, Py_ssize_t size)
{
- Py_ssize_t i;
- /* Coding spec must be in a comment, and that comment must be
- * the only statement on the source code line. */
- for (i = 0; i < size - 6; i++) {
- if (s[i] == '#')
- break;
- if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014')
- return NULL;
- }
- for (; i < size - 6; i++) { /* XXX inefficient search */
- const char* t = s + i;
- if (strncmp(t, "coding", 6) == 0) {
- const char* begin = NULL;
- t += 6;
- if (t[0] != ':' && t[0] != '=')
- continue;
- do {
- t++;
- } while (t[0] == '\x20' || t[0] == '\t');
-
- begin = t;
- while (Py_ISALNUM(t[0]) ||
- t[0] == '-' || t[0] == '_' || t[0] == '.')
- t++;
-
- if (begin < t) {
- char* r = new_string(begin, t - begin);
- char* q = get_normal_name(r);
- if (r != q) {
- PyMem_FREE(r);
- r = new_string(q, strlen(q));
- }
- return r;
- }
- }
- }
- return NULL;
+ Py_ssize_t i;
+ /* Coding spec must be in a comment, and that comment must be
+ * the only statement on the source code line. */
+ for (i = 0; i < size - 6; i++) {
+ if (s[i] == '#')
+ break;
+ if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014')
+ return NULL;
+ }
+ for (; i < size - 6; i++) { /* XXX inefficient search */
+ const char* t = s + i;
+ if (strncmp(t, "coding", 6) == 0) {
+ const char* begin = NULL;
+ t += 6;
+ if (t[0] != ':' && t[0] != '=')
+ continue;
+ do {
+ t++;
+ } while (t[0] == '\x20' || t[0] == '\t');
+
+ begin = t;
+ while (Py_ISALNUM(t[0]) ||
+ t[0] == '-' || t[0] == '_' || t[0] == '.')
+ t++;
+
+ if (begin < t) {
+ char* r = new_string(begin, t - begin);
+ char* q = get_normal_name(r);
+ if (r != q) {
+ PyMem_FREE(r);
+ r = new_string(q, strlen(q));
+ }
+ return r;
+ }
+ }
+ }
+ return NULL;
}
/* Check whether the line contains a coding spec. If it does,
@@ -270,42 +270,42 @@ get_coding_spec(const char *s, Py_ssize_t size)
static int
check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
- int set_readline(struct tok_state *, const char *))
+ int set_readline(struct tok_state *, const char *))
{
- char * cs;
- int r = 1;
-
- if (tok->cont_line)
- /* It's a continuation line, so it can't be a coding spec. */
- return 1;
- cs = get_coding_spec(line, size);
- if (cs != NULL) {
- tok->read_coding_spec = 1;
- if (tok->encoding == NULL) {
- assert(tok->decoding_state == STATE_RAW);
- if (strcmp(cs, "utf-8") == 0) {
- tok->encoding = cs;
- } else {
- r = set_readline(tok, cs);
- if (r) {
- tok->encoding = cs;
- tok->decoding_state = STATE_NORMAL;
- }
- else
- PyMem_FREE(cs);
- }
- } else { /* then, compare cs with BOM */
- r = (strcmp(tok->encoding, cs) == 0);
- PyMem_FREE(cs);
- }
- }
- if (!r) {
- cs = tok->encoding;
- if (!cs)
- cs = "with BOM";
- PyErr_Format(PyExc_SyntaxError, "encoding problem: %s", cs);
- }
- return r;
+ char * cs;
+ int r = 1;
+
+ if (tok->cont_line)
+ /* It's a continuation line, so it can't be a coding spec. */
+ return 1;
+ cs = get_coding_spec(line, size);
+ if (cs != NULL) {
+ tok->read_coding_spec = 1;
+ if (tok->encoding == NULL) {
+ assert(tok->decoding_state == STATE_RAW);
+ if (strcmp(cs, "utf-8") == 0) {
+ tok->encoding = cs;
+ } else {
+ r = set_readline(tok, cs);
+ if (r) {
+ tok->encoding = cs;
+ tok->decoding_state = STATE_NORMAL;
+ }
+ else
+ PyMem_FREE(cs);
+ }
+ } else { /* then, compare cs with BOM */
+ r = (strcmp(tok->encoding, cs) == 0);
+ PyMem_FREE(cs);
+ }
+ }
+ if (!r) {
+ cs = tok->encoding;
+ if (!cs)
+ cs = "with BOM";
+ PyErr_Format(PyExc_SyntaxError, "encoding problem: %s", cs);
+ }
+ return r;
}
/* See whether the file starts with a BOM. If it does,
@@ -314,62 +314,62 @@ check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
static int
check_bom(int get_char(struct tok_state *),
- void unget_char(int, struct tok_state *),
- int set_readline(struct tok_state *, const char *),
- struct tok_state *tok)
+ void unget_char(int, struct tok_state *),
+ int set_readline(struct tok_state *, const char *),
+ struct tok_state *tok)
{
- int ch1, ch2, ch3;
- ch1 = get_char(tok);
- tok->decoding_state = STATE_RAW;
- if (ch1 == EOF) {
- return 1;
- } else if (ch1 == 0xEF) {
- ch2 = get_char(tok);
- if (ch2 != 0xBB) {
- unget_char(ch2, tok);
- unget_char(ch1, tok);
- return 1;
- }
- ch3 = get_char(tok);
- if (ch3 != 0xBF) {
- unget_char(ch3, tok);
- unget_char(ch2, tok);
- unget_char(ch1, tok);
- return 1;
- }
+ int ch1, ch2, ch3;
+ ch1 = get_char(tok);
+ tok->decoding_state = STATE_RAW;
+ if (ch1 == EOF) {
+ return 1;
+ } else if (ch1 == 0xEF) {
+ ch2 = get_char(tok);
+ if (ch2 != 0xBB) {
+ unget_char(ch2, tok);
+ unget_char(ch1, tok);
+ return 1;
+ }
+ ch3 = get_char(tok);
+ if (ch3 != 0xBF) {
+ unget_char(ch3, tok);
+ unget_char(ch2, tok);
+ unget_char(ch1, tok);
+ return 1;
+ }
#if 0
- /* Disable support for UTF-16 BOMs until a decision
- is made whether this needs to be supported. */
- } else if (ch1 == 0xFE) {
- ch2 = get_char(tok);
- if (ch2 != 0xFF) {
- unget_char(ch2, tok);
- unget_char(ch1, tok);
- return 1;
- }
- if (!set_readline(tok, "utf-16-be"))
- return 0;
- tok->decoding_state = STATE_NORMAL;
- } else if (ch1 == 0xFF) {
- ch2 = get_char(tok);
- if (ch2 != 0xFE) {
- unget_char(ch2, tok);
- unget_char(ch1, tok);
- return 1;
- }
- if (!set_readline(tok, "utf-16-le"))
- return 0;
- tok->decoding_state = STATE_NORMAL;
+ /* Disable support for UTF-16 BOMs until a decision
+ is made whether this needs to be supported. */
+ } else if (ch1 == 0xFE) {
+ ch2 = get_char(tok);
+ if (ch2 != 0xFF) {
+ unget_char(ch2, tok);
+ unget_char(ch1, tok);
+ return 1;
+ }
+ if (!set_readline(tok, "utf-16-be"))
+ return 0;
+ tok->decoding_state = STATE_NORMAL;
+ } else if (ch1 == 0xFF) {
+ ch2 = get_char(tok);
+ if (ch2 != 0xFE) {
+ unget_char(ch2, tok);
+ unget_char(ch1, tok);
+ return 1;
+ }
+ if (!set_readline(tok, "utf-16-le"))
+ return 0;
+ tok->decoding_state = STATE_NORMAL;
#endif
- } else {
- unget_char(ch1, tok);
- return 1;
- }
- if (tok->encoding != NULL)
- PyMem_FREE(tok->encoding);
- tok->encoding = new_string("utf-8", 5); /* resulting is in utf-8 */
- /* No need to set_readline: input is already utf-8 */
- return 1;
+ } else {
+ unget_char(ch1, tok);
+ return 1;
+ }
+ if (tok->encoding != NULL)
+ PyMem_FREE(tok->encoding);
+ tok->encoding = new_string("utf-8", 5); /* resulting is in utf-8 */
+ /* No need to set_readline: input is already utf-8 */
+ return 1;
}
/* Read a line of text from TOK into S, using the stream in TOK.
@@ -378,74 +378,74 @@ check_bom(int get_char(struct tok_state *),
On entry, tok->decoding_buffer will be one of:
1) NULL: need to call tok->decoding_readline to get a new line
2) PyUnicodeObject *: decoding_feof has called tok->decoding_readline and
- stored the result in tok->decoding_buffer
+ stored the result in tok->decoding_buffer
3) PyByteArrayObject *: previous call to fp_readl did not have enough room
- (in the s buffer) to copy entire contents of the line read
- by tok->decoding_readline. tok->decoding_buffer has the overflow.
- In this case, fp_readl is called in a loop (with an expanded buffer)
- until the buffer ends with a '\n' (or until the end of the file is
- reached): see tok_nextc and its calls to decoding_fgets.
+ (in the s buffer) to copy entire contents of the line read
+ by tok->decoding_readline. tok->decoding_buffer has the overflow.
+ In this case, fp_readl is called in a loop (with an expanded buffer)
+ until the buffer ends with a '\n' (or until the end of the file is
+ reached): see tok_nextc and its calls to decoding_fgets.
*/
static char *
fp_readl(char *s, int size, struct tok_state *tok)
{
- PyObject* bufobj;
- const char *buf;
- Py_ssize_t buflen;
-
- /* Ask for one less byte so we can terminate it */
- assert(size > 0);
- size--;
-
- if (tok->decoding_buffer) {
- bufobj = tok->decoding_buffer;
- Py_INCREF(bufobj);
- }
- else
- {
- bufobj = PyObject_CallObject(tok->decoding_readline, NULL);
- if (bufobj == NULL)
- goto error;
- }
- if (PyUnicode_CheckExact(bufobj))
- {
- buf = _PyUnicode_AsStringAndSize(bufobj, &buflen);
- if (buf == NULL) {
- goto error;
- }
- }
- else
- {
- buf = PyByteArray_AsString(bufobj);
- if (buf == NULL) {
- goto error;
- }
- buflen = PyByteArray_GET_SIZE(bufobj);
- }
-
- Py_XDECREF(tok->decoding_buffer);
- if (buflen > size) {
- /* Too many chars, the rest goes into tok->decoding_buffer */
- tok->decoding_buffer = PyByteArray_FromStringAndSize(buf+size,
- buflen-size);
- if (tok->decoding_buffer == NULL)
- goto error;
- buflen = size;
- }
- else
- tok->decoding_buffer = NULL;
-
- memcpy(s, buf, buflen);
- s[buflen] = '\0';
- if (buflen == 0) /* EOF */
- s = NULL;
- Py_DECREF(bufobj);
- return s;
+ PyObject* bufobj;
+ const char *buf;
+ Py_ssize_t buflen;
+
+ /* Ask for one less byte so we can terminate it */
+ assert(size > 0);
+ size--;
+
+ if (tok->decoding_buffer) {
+ bufobj = tok->decoding_buffer;
+ Py_INCREF(bufobj);
+ }
+ else
+ {
+ bufobj = PyObject_CallObject(tok->decoding_readline, NULL);
+ if (bufobj == NULL)
+ goto error;
+ }
+ if (PyUnicode_CheckExact(bufobj))
+ {
+ buf = _PyUnicode_AsStringAndSize(bufobj, &buflen);
+ if (buf == NULL) {
+ goto error;
+ }
+ }
+ else
+ {
+ buf = PyByteArray_AsString(bufobj);
+ if (buf == NULL) {
+ goto error;
+ }
+ buflen = PyByteArray_GET_SIZE(bufobj);
+ }
+
+ Py_XDECREF(tok->decoding_buffer);
+ if (buflen > size) {
+ /* Too many chars, the rest goes into tok->decoding_buffer */
+ tok->decoding_buffer = PyByteArray_FromStringAndSize(buf+size,
+ buflen-size);
+ if (tok->decoding_buffer == NULL)
+ goto error;
+ buflen = size;
+ }
+ else
+ tok->decoding_buffer = NULL;
+
+ memcpy(s, buf, buflen);
+ s[buflen] = '\0';
+ if (buflen == 0) /* EOF */
+ s = NULL;
+ Py_DECREF(bufobj);
+ return s;
error:
- Py_XDECREF(bufobj);
- return error_ret(tok);
+ Py_XDECREF(bufobj);
+ return error_ret(tok);
}
/* Set the readline function for TOK to a StreamReader's
@@ -461,49 +461,49 @@ error:
static int
fp_setreadl(struct tok_state *tok, const char* enc)
{
- PyObject *readline = NULL, *stream = NULL, *io = NULL;
-
- io = PyImport_ImportModuleNoBlock("io");
- if (io == NULL)
- goto cleanup;
-
- if (tok->filename)
- stream = PyObject_CallMethod(io, "open", "ssis",
- tok->filename, "r", -1, enc);
- else
- stream = PyObject_CallMethod(io, "open", "isisOOO",
- fileno(tok->fp), "r", -1, enc, Py_None, Py_None, Py_False);
- if (stream == NULL)
- goto cleanup;
-
- Py_XDECREF(tok->decoding_readline);
- readline = PyObject_GetAttrString(stream, "readline");
- tok->decoding_readline = readline;
-
- /* The file has been reopened; parsing will restart from
- * the beginning of the file, we have to reset the line number.
- * But this function has been called from inside tok_nextc() which
- * will increment lineno before it returns. So we set it -1 so that
- * the next call to tok_nextc() will start with tok->lineno == 0.
- */
- tok->lineno = -1;
+ PyObject *readline = NULL, *stream = NULL, *io = NULL;
+
+ io = PyImport_ImportModuleNoBlock("io");
+ if (io == NULL)
+ goto cleanup;
+
+ if (tok->filename)
+ stream = PyObject_CallMethod(io, "open", "ssis",
+ tok->filename, "r", -1, enc);
+ else
+ stream = PyObject_CallMethod(io, "open", "isisOOO",
+ fileno(tok->fp), "r", -1, enc, Py_None, Py_None, Py_False);
+ if (stream == NULL)
+ goto cleanup;
+
+ Py_XDECREF(tok->decoding_readline);
+ readline = PyObject_GetAttrString(stream, "readline");
+ tok->decoding_readline = readline;
+
+ /* The file has been reopened; parsing will restart from
+ * the beginning of the file, we have to reset the line number.
+ * But this function has been called from inside tok_nextc() which
+ * will increment lineno before it returns. So we set it -1 so that
+ * the next call to tok_nextc() will start with tok->lineno == 0.
+ */
+ tok->lineno = -1;
cleanup:
- Py_XDECREF(stream);
- Py_XDECREF(io);
- return readline != NULL;
+ Py_XDECREF(stream);
+ Py_XDECREF(io);
+ return readline != NULL;
}
/* Fetch the next byte from TOK. */
static int fp_getc(struct tok_state *tok) {
- return getc(tok->fp);
+ return getc(tok->fp);
}
/* Unfetch the last byte back into TOK. */
static void fp_ungetc(int c, struct tok_state *tok) {
- ungetc(c, tok->fp);
+ ungetc(c, tok->fp);
}
/* Check whether the characters at s start a valid
@@ -511,27 +511,27 @@ static void fp_ungetc(int c, struct tok_state *tok) {
the sequence if yes, 0 if not. */
static int valid_utf8(const unsigned char* s)
{
- int expected = 0;
- int length;
- if (*s < 0x80)
- /* single-byte code */
- return 1;
- if (*s < 0xc0)
- /* following byte */
- return 0;
- if (*s < 0xE0)
- expected = 1;
- else if (*s < 0xF0)
- expected = 2;
- else if (*s < 0xF8)
- expected = 3;
- else
- return 0;
- length = expected + 1;
- for (; expected; expected--)
- if (s[expected] < 0x80 || s[expected] >= 0xC0)
- return 0;
- return length;
+ int expected = 0;
+ int length;
+ if (*s < 0x80)
+ /* single-byte code */
+ return 1;
+ if (*s < 0xc0)
+ /* following byte */
+ return 0;
+ if (*s < 0xE0)
+ expected = 1;
+ else if (*s < 0xF0)
+ expected = 2;
+ else if (*s < 0xF8)
+ expected = 3;
+ else
+ return 0;
+ length = expected + 1;
+ for (; expected; expected--)
+ if (s[expected] < 0x80 || s[expected] >= 0xC0)
+ return 0;
+ return length;
}
/* Read a line of input from TOK. Determine encoding
@@ -540,93 +540,93 @@ static int valid_utf8(const unsigned char* s)
static char *
decoding_fgets(char *s, int size, struct tok_state *tok)
{
- char *line = NULL;
- int badchar = 0;
- for (;;) {
- if (tok->decoding_state == STATE_NORMAL) {
- /* We already have a codec associated with
- this input. */
- line = fp_readl(s, size, tok);
- break;
- } else if (tok->decoding_state == STATE_RAW) {
- /* We want a 'raw' read. */
- line = Py_UniversalNewlineFgets(s, size,
- tok->fp, NULL);
- break;
- } else {
- /* We have not yet determined the encoding.
- If an encoding is found, use the file-pointer
- reader functions from now on. */
- if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok))
- return error_ret(tok);
- assert(tok->decoding_state != STATE_INIT);
- }
- }
- if (line != NULL && tok->lineno < 2 && !tok->read_coding_spec) {
- if (!check_coding_spec(line, strlen(line), tok, fp_setreadl)) {
- return error_ret(tok);
- }
- }
+ char *line = NULL;
+ int badchar = 0;
+ for (;;) {
+ if (tok->decoding_state == STATE_NORMAL) {
+ /* We already have a codec associated with
+ this input. */
+ line = fp_readl(s, size, tok);
+ break;
+ } else if (tok->decoding_state == STATE_RAW) {
+ /* We want a 'raw' read. */
+ line = Py_UniversalNewlineFgets(s, size,
+ tok->fp, NULL);
+ break;
+ } else {
+ /* We have not yet determined the encoding.
+ If an encoding is found, use the file-pointer
+ reader functions from now on. */
+ if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok))
+ return error_ret(tok);
+ assert(tok->decoding_state != STATE_INIT);
+ }
+ }
+ if (line != NULL && tok->lineno < 2 && !tok->read_coding_spec) {
+ if (!check_coding_spec(line, strlen(line), tok, fp_setreadl)) {
+ return error_ret(tok);
+ }
+ }
#ifndef PGEN
- /* The default encoding is UTF-8, so make sure we don't have any
- non-UTF-8 sequences in it. */
- if (line && !tok->encoding) {
- unsigned char *c;
- int length;
- for (c = (unsigned char *)line; *c; c += length)
- if (!(length = valid_utf8(c))) {
- badchar = *c;
- break;
- }
- }
- if (badchar) {
- /* Need to add 1 to the line number, since this line
- has not been counted, yet. */
- PyErr_Format(PyExc_SyntaxError,
- "Non-UTF-8 code starting with '\\x%.2x' "
- "in file %.200s on line %i, "
- "but no encoding declared; "
- "see http://python.org/dev/peps/pep-0263/ for details",
- badchar, tok->filename, tok->lineno + 1);
- return error_ret(tok);
- }
+ /* The default encoding is UTF-8, so make sure we don't have any
+ non-UTF-8 sequences in it. */
+ if (line && !tok->encoding) {
+ unsigned char *c;
+ int length;
+ for (c = (unsigned char *)line; *c; c += length)
+ if (!(length = valid_utf8(c))) {
+ badchar = *c;
+ break;
+ }
+ }
+ if (badchar) {
+ /* Need to add 1 to the line number, since this line
+ has not been counted, yet. */
+ PyErr_Format(PyExc_SyntaxError,
+ "Non-UTF-8 code starting with '\\x%.2x' "
+ "in file %.200s on line %i, "
+ "but no encoding declared; "
+ "see http://python.org/dev/peps/pep-0263/ for details",
+ badchar, tok->filename, tok->lineno + 1);
+ return error_ret(tok);
+ }
#endif
- return line;
+ return line;
}
static int
decoding_feof(struct tok_state *tok)
{
- if (tok->decoding_state != STATE_NORMAL) {
- return feof(tok->fp);
- } else {
- PyObject* buf = tok->decoding_buffer;
- if (buf == NULL) {
- buf = PyObject_CallObject(tok->decoding_readline, NULL);
- if (buf == NULL) {
- error_ret(tok);
- return 1;
- } else {
- tok->decoding_buffer = buf;
- }
- }
- return PyObject_Length(buf) == 0;
- }
+ if (tok->decoding_state != STATE_NORMAL) {
+ return feof(tok->fp);
+ } else {
+ PyObject* buf = tok->decoding_buffer;
+ if (buf == NULL) {
+ buf = PyObject_CallObject(tok->decoding_readline, NULL);
+ if (buf == NULL) {
+ error_ret(tok);
+ return 1;
+ } else {
+ tok->decoding_buffer = buf;
+ }
+ }
+ return PyObject_Length(buf) == 0;
+ }
}
/* Fetch a byte from TOK, using the string buffer. */
static int
buf_getc(struct tok_state *tok) {
- return Py_CHARMASK(*tok->str++);
+ return Py_CHARMASK(*tok->str++);
}
/* Unfetch a byte from TOK, using the string buffer. */
static void
buf_ungetc(int c, struct tok_state *tok) {
- tok->str--;
- assert(Py_CHARMASK(*tok->str) == c); /* tok->cur may point to read-only segment */
+ tok->str--;
+ assert(Py_CHARMASK(*tok->str) == c); /* tok->cur may point to read-only segment */
}
/* Set the readline function for TOK to ENC. For the string-based
@@ -634,8 +634,8 @@ buf_ungetc(int c, struct tok_state *tok) {
static int
buf_setreadl(struct tok_state *tok, const char* enc) {
- tok->enc = enc;
- return 1;
+ tok->enc = enc;
+ return 1;
}
/* Return a UTF-8 encoding Python string object from the
@@ -643,54 +643,54 @@ buf_setreadl(struct tok_state *tok, const char* enc) {
static PyObject *
translate_into_utf8(const char* str, const char* enc) {
- PyObject *utf8;
- PyObject* buf = PyUnicode_Decode(str, strlen(str), enc, NULL);
- if (buf == NULL)
- return NULL;
- utf8 = PyUnicode_AsUTF8String(buf);
- Py_DECREF(buf);
- return utf8;
+ PyObject *utf8;
+ PyObject* buf = PyUnicode_Decode(str, strlen(str), enc, NULL);
+ if (buf == NULL)
+ return NULL;
+ utf8 = PyUnicode_AsUTF8String(buf);
+ Py_DECREF(buf);
+ return utf8;
}
static char *
translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
- int skip_next_lf = 0, needed_length = strlen(s) + 2, final_length;
- char *buf, *current;
- char c = '\0';
- buf = PyMem_MALLOC(needed_length);
- if (buf == NULL) {
- tok->done = E_NOMEM;
- return NULL;
- }
- for (current = buf; *s; s++, current++) {
- c = *s;
- if (skip_next_lf) {
- skip_next_lf = 0;
- if (c == '\n') {
- c = *++s;
- if (!c)
- break;
- }
- }
- if (c == '\r') {
- skip_next_lf = 1;
- c = '\n';
- }
- *current = c;
- }
- /* If this is exec input, add a newline to the end of the string if
- there isn't one already. */
- if (exec_input && c != '\n') {
- *current = '\n';
- current++;
- }
- *current = '\0';
- final_length = current - buf + 1;
- if (final_length < needed_length && final_length)
- /* should never fail */
- buf = PyMem_REALLOC(buf, final_length);
- return buf;
+ int skip_next_lf = 0, needed_length = strlen(s) + 2, final_length;
+ char *buf, *current;
+ char c = '\0';
+ buf = PyMem_MALLOC(needed_length);
+ if (buf == NULL) {
+ tok->done = E_NOMEM;
+ return NULL;
+ }
+ for (current = buf; *s; s++, current++) {
+ c = *s;
+ if (skip_next_lf) {
+ skip_next_lf = 0;
+ if (c == '\n') {
+ c = *++s;
+ if (!c)
+ break;
+ }
+ }
+ if (c == '\r') {
+ skip_next_lf = 1;
+ c = '\n';
+ }
+ *current = c;
+ }
+ /* If this is exec input, add a newline to the end of the string if
+ there isn't one already. */
+ if (exec_input && c != '\n') {
+ *current = '\n';
+ current++;
+ }
+ *current = '\0';
+ final_length = current - buf + 1;
+ if (final_length < needed_length && final_length)
+ /* should never fail */
+ buf = PyMem_REALLOC(buf, final_length);
+ return buf;
}
/* Decode a byte string STR for use as the buffer of TOK.
@@ -700,57 +700,57 @@ translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
static const char *
decode_str(const char *input, int single, struct tok_state *tok)
{
- PyObject* utf8 = NULL;
- const char *str;
- const char *s;
- const char *newl[2] = {NULL, NULL};
- int lineno = 0;
- tok->input = str = translate_newlines(input, single, tok);
- if (str == NULL)
- return NULL;
- tok->enc = NULL;
- tok->str = str;
- if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
- return error_ret(tok);
- str = tok->str; /* string after BOM if any */
- assert(str);
- if (tok->enc != NULL) {
- utf8 = translate_into_utf8(str, tok->enc);
- if (utf8 == NULL)
- return error_ret(tok);
- str = PyBytes_AsString(utf8);
- }
- for (s = str;; s++) {
- if (*s == '\0') break;
- else if (*s == '\n') {
- assert(lineno < 2);
- newl[lineno] = s;
- lineno++;
- if (lineno == 2) break;
- }
- }
- tok->enc = NULL;
- /* need to check line 1 and 2 separately since check_coding_spec
- assumes a single line as input */
- if (newl[0]) {
- if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl))
- return error_ret(tok);
- if (tok->enc == NULL && newl[1]) {
- if (!check_coding_spec(newl[0]+1, newl[1] - newl[0],
- tok, buf_setreadl))
- return error_ret(tok);
- }
- }
- if (tok->enc != NULL) {
- assert(utf8 == NULL);
- utf8 = translate_into_utf8(str, tok->enc);
- if (utf8 == NULL)
- return error_ret(tok);
- str = PyBytes_AS_STRING(utf8);
- }
- assert(tok->decoding_buffer == NULL);
- tok->decoding_buffer = utf8; /* CAUTION */
- return str;
+ PyObject* utf8 = NULL;
+ const char *str;
+ const char *s;
+ const char *newl[2] = {NULL, NULL};
+ int lineno = 0;
+ tok->input = str = translate_newlines(input, single, tok);
+ if (str == NULL)
+ return NULL;
+ tok->enc = NULL;
+ tok->str = str;
+ if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
+ return error_ret(tok);
+ str = tok->str; /* string after BOM if any */
+ assert(str);
+ if (tok->enc != NULL) {
+ utf8 = translate_into_utf8(str, tok->enc);
+ if (utf8 == NULL)
+ return error_ret(tok);
+ str = PyBytes_AsString(utf8);
+ }
+ for (s = str;; s++) {
+ if (*s == '\0') break;
+ else if (*s == '\n') {
+ assert(lineno < 2);
+ newl[lineno] = s;
+ lineno++;
+ if (lineno == 2) break;
+ }
+ }
+ tok->enc = NULL;
+ /* need to check line 1 and 2 separately since check_coding_spec
+ assumes a single line as input */
+ if (newl[0]) {
+ if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl))
+ return error_ret(tok);
+ if (tok->enc == NULL && newl[1]) {
+ if (!check_coding_spec(newl[0]+1, newl[1] - newl[0],
+ tok, buf_setreadl))
+ return error_ret(tok);
+ }
+ }
+ if (tok->enc != NULL) {
+ assert(utf8 == NULL);
+ utf8 = translate_into_utf8(str, tok->enc);
+ if (utf8 == NULL)
+ return error_ret(tok);
+ str = PyBytes_AS_STRING(utf8);
+ }
+ assert(tok->decoding_buffer == NULL);
+ tok->decoding_buffer = utf8; /* CAUTION */
+ return str;
}
#endif /* PGEN */
@@ -760,47 +760,47 @@ decode_str(const char *input, int single, struct tok_state *tok)
struct tok_state *
PyTokenizer_FromString(const char *str, int exec_input)
{
- struct tok_state *tok = tok_new();
- if (tok == NULL)
- return NULL;
- str = (char *)decode_str(str, exec_input, tok);
- if (str == NULL) {
- PyTokenizer_Free(tok);
- return NULL;
- }
-
- /* XXX: constify members. */
- tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
- return tok;
+ struct tok_state *tok = tok_new();
+ if (tok == NULL)
+ return NULL;
+ str = (char *)decode_str(str, exec_input, tok);
+ if (str == NULL) {
+ PyTokenizer_Free(tok);
+ return NULL;
+ }
+
+ /* XXX: constify members. */
+ tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
+ return tok;
}
struct tok_state *
PyTokenizer_FromUTF8(const char *str, int exec_input)
{
- struct tok_state *tok = tok_new();
- if (tok == NULL)
- return NULL;
+ struct tok_state *tok = tok_new();
+ if (tok == NULL)
+ return NULL;
#ifndef PGEN
- tok->input = str = translate_newlines(str, exec_input, tok);
+ tok->input = str = translate_newlines(str, exec_input, tok);
#endif
- if (str == NULL) {
- PyTokenizer_Free(tok);
- return NULL;
- }
- tok->decoding_state = STATE_RAW;
- tok->read_coding_spec = 1;
- tok->enc = NULL;
- tok->str = str;
- tok->encoding = (char *)PyMem_MALLOC(6);
- if (!tok->encoding) {
- PyTokenizer_Free(tok);
- return NULL;
- }
- strcpy(tok->encoding, "utf-8");
-
- /* XXX: constify members. */
- tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
- return tok;
+ if (str == NULL) {
+ PyTokenizer_Free(tok);
+ return NULL;
+ }
+ tok->decoding_state = STATE_RAW;
+ tok->read_coding_spec = 1;
+ tok->enc = NULL;
+ tok->str = str;
+ tok->encoding = (char *)PyMem_MALLOC(6);
+ if (!tok->encoding) {
+ PyTokenizer_Free(tok);
+ return NULL;
+ }
+ strcpy(tok->encoding, "utf-8");
+
+ /* XXX: constify members. */
+ tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
+ return tok;
}
/* Set up tokenizer for file */
@@ -808,30 +808,30 @@ PyTokenizer_FromUTF8(const char *str, int exec_input)
struct tok_state *
PyTokenizer_FromFile(FILE *fp, char* enc, char *ps1, char *ps2)
{
- struct tok_state *tok = tok_new();
- if (tok == NULL)
- return NULL;
- if ((tok->buf = (char *)PyMem_MALLOC(BUFSIZ)) == NULL) {
- PyTokenizer_Free(tok);
- return NULL;
- }
- tok->cur = tok->inp = tok->buf;
- tok->end = tok->buf + BUFSIZ;
- tok->fp = fp;
- tok->prompt = ps1;
- tok->nextprompt = ps2;
- if (enc != NULL) {
- /* Must copy encoding declaration since it
- gets copied into the parse tree. */
- tok->encoding = PyMem_MALLOC(strlen(enc)+1);
- if (!tok->encoding) {
- PyTokenizer_Free(tok);
- return NULL;
- }
- strcpy(tok->encoding, enc);
- tok->decoding_state = STATE_NORMAL;
- }
- return tok;
+ struct tok_state *tok = tok_new();
+ if (tok == NULL)
+ return NULL;
+ if ((tok->buf = (char *)PyMem_MALLOC(BUFSIZ)) == NULL) {
+ PyTokenizer_Free(tok);
+ return NULL;
+ }
+ tok->cur = tok->inp = tok->buf;
+ tok->end = tok->buf + BUFSIZ;
+ tok->fp = fp;
+ tok->prompt = ps1;
+ tok->nextprompt = ps2;
+ if (enc != NULL) {
+ /* Must copy encoding declaration since it
+ gets copied into the parse tree. */
+ tok->encoding = PyMem_MALLOC(strlen(enc)+1);
+ if (!tok->encoding) {
+ PyTokenizer_Free(tok);
+ return NULL;
+ }
+ strcpy(tok->encoding, enc);
+ tok->decoding_state = STATE_NORMAL;
+ }
+ return tok;
}
@@ -840,17 +840,17 @@ PyTokenizer_FromFile(FILE *fp, char* enc, char *ps1, char *ps2)
void
PyTokenizer_Free(struct tok_state *tok)
{
- if (tok->encoding != NULL)
- PyMem_FREE(tok->encoding);
+ if (tok->encoding != NULL)
+ PyMem_FREE(tok->encoding);
#ifndef PGEN
- Py_XDECREF(tok->decoding_readline);
- Py_XDECREF(tok->decoding_buffer);
+ Py_XDECREF(tok->decoding_readline);
+ Py_XDECREF(tok->decoding_buffer);
#endif
- if (tok->fp != NULL && tok->buf != NULL)
- PyMem_FREE(tok->buf);
- if (tok->input)
- PyMem_FREE((char *)tok->input);
- PyMem_FREE(tok);
+ if (tok->fp != NULL && tok->buf != NULL)
+ PyMem_FREE(tok->buf);
+ if (tok->input)
+ PyMem_FREE((char *)tok->input);
+ PyMem_FREE(tok);
}
/* Get next char, updating state; error code goes into tok->done */
@@ -858,188 +858,188 @@ PyTokenizer_Free(struct tok_state *tok)
static int
tok_nextc(register struct tok_state *tok)
{
- for (;;) {
- if (tok->cur != tok->inp) {
- return Py_CHARMASK(*tok->cur++); /* Fast path */
- }
- if (tok->done != E_OK)
- return EOF;
- if (tok->fp == NULL) {
- char *end = strchr(tok->inp, '\n');
- if (end != NULL)
- end++;
- else {
- end = strchr(tok->inp, '\0');
- if (end == tok->inp) {
- tok->done = E_EOF;
- return EOF;
- }
- }
- if (tok->start == NULL)
- tok->buf = tok->cur;
- tok->line_start = tok->cur;
- tok->lineno++;
- tok->inp = end;
- return Py_CHARMASK(*tok->cur++);
- }
- if (tok->prompt != NULL) {
- char *newtok = PyOS_Readline(stdin, stdout, tok->prompt);
+ for (;;) {
+ if (tok->cur != tok->inp) {
+ return Py_CHARMASK(*tok->cur++); /* Fast path */
+ }
+ if (tok->done != E_OK)
+ return EOF;
+ if (tok->fp == NULL) {
+ char *end = strchr(tok->inp, '\n');
+ if (end != NULL)
+ end++;
+ else {
+ end = strchr(tok->inp, '\0');
+ if (end == tok->inp) {
+ tok->done = E_EOF;
+ return EOF;
+ }
+ }
+ if (tok->start == NULL)
+ tok->buf = tok->cur;
+ tok->line_start = tok->cur;
+ tok->lineno++;
+ tok->inp = end;
+ return Py_CHARMASK(*tok->cur++);
+ }
+ if (tok->prompt != NULL) {
+ char *newtok = PyOS_Readline(stdin, stdout, tok->prompt);
#ifndef PGEN
- if (tok->encoding && newtok && *newtok) {
- /* Recode to UTF-8 */
- Py_ssize_t buflen;
- const char* buf;
- PyObject *u = translate_into_utf8(newtok, tok->encoding);
- PyMem_FREE(newtok);
- if (!u) {
- tok->done = E_DECODE;
- return EOF;
- }
- buflen = PyBytes_GET_SIZE(u);
- buf = PyBytes_AS_STRING(u);
- if (!buf) {
- Py_DECREF(u);
- tok->done = E_DECODE;
- return EOF;
- }
- newtok = PyMem_MALLOC(buflen+1);
- strcpy(newtok, buf);
- Py_DECREF(u);
- }
+ if (tok->encoding && newtok && *newtok) {
+ /* Recode to UTF-8 */
+ Py_ssize_t buflen;
+ const char* buf;
+ PyObject *u = translate_into_utf8(newtok, tok->encoding);
+ PyMem_FREE(newtok);
+ if (!u) {
+ tok->done = E_DECODE;
+ return EOF;
+ }
+ buflen = PyBytes_GET_SIZE(u);
+ buf = PyBytes_AS_STRING(u);
+ if (!buf) {
+ Py_DECREF(u);
+ tok->done = E_DECODE;
+ return EOF;
+ }
+ newtok = PyMem_MALLOC(buflen+1);
+ strcpy(newtok, buf);
+ Py_DECREF(u);
+ }
#endif
- if (tok->nextprompt != NULL)
- tok->prompt = tok->nextprompt;
- if (newtok == NULL)
- tok->done = E_INTR;
- else if (*newtok == '\0') {
- PyMem_FREE(newtok);
- tok->done = E_EOF;
- }
- else if (tok->start != NULL) {
- size_t start = tok->start - tok->buf;
- size_t oldlen = tok->cur - tok->buf;
- size_t newlen = oldlen + strlen(newtok);
- char *buf = tok->buf;
- buf = (char *)PyMem_REALLOC(buf, newlen+1);
- tok->lineno++;
- if (buf == NULL) {
- PyMem_FREE(tok->buf);
- tok->buf = NULL;
- PyMem_FREE(newtok);
- tok->done = E_NOMEM;
- return EOF;
- }
- tok->buf = buf;
- tok->cur = tok->buf + oldlen;
- tok->line_start = tok->cur;
- strcpy(tok->buf + oldlen, newtok);
- PyMem_FREE(newtok);
- tok->inp = tok->buf + newlen;
- tok->end = tok->inp + 1;
- tok->start = tok->buf + start;
- }
- else {
- tok->lineno++;
- if (tok->buf != NULL)
- PyMem_FREE(tok->buf);
- tok->buf = newtok;
- tok->line_start = tok->buf;
- tok->cur = tok->buf;
- tok->line_start = tok->buf;
- tok->inp = strchr(tok->buf, '\0');
- tok->end = tok->inp + 1;
- }
- }
- else {
- int done = 0;
- Py_ssize_t cur = 0;
- char *pt;
- if (tok->start == NULL) {
- if (tok->buf == NULL) {
- tok->buf = (char *)
- PyMem_MALLOC(BUFSIZ);
- if (tok->buf == NULL) {
- tok->done = E_NOMEM;
- return EOF;
- }
- tok->end = tok->buf + BUFSIZ;
- }
- if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf),
- tok) == NULL) {
- tok->done = E_EOF;
- done = 1;
- }
- else {
- tok->done = E_OK;
- tok->inp = strchr(tok->buf, '\0');
- done = tok->inp[-1] == '\n';
- }
- }
- else {
- cur = tok->cur - tok->buf;
- if (decoding_feof(tok)) {
- tok->done = E_EOF;
- done = 1;
- }
- else
- tok->done = E_OK;
- }
- tok->lineno++;
- /* Read until '\n' or EOF */
- while (!done) {
- Py_ssize_t curstart = tok->start == NULL ? -1 :
- tok->start - tok->buf;
- Py_ssize_t curvalid = tok->inp - tok->buf;
- Py_ssize_t newsize = curvalid + BUFSIZ;
- char *newbuf = tok->buf;
- newbuf = (char *)PyMem_REALLOC(newbuf,
- newsize);
- if (newbuf == NULL) {
- tok->done = E_NOMEM;
- tok->cur = tok->inp;
- return EOF;
- }
- tok->buf = newbuf;
- tok->inp = tok->buf + curvalid;
- tok->end = tok->buf + newsize;
- tok->start = curstart < 0 ? NULL :
- tok->buf + curstart;
- if (decoding_fgets(tok->inp,
- (int)(tok->end - tok->inp),
- tok) == NULL) {
- /* Break out early on decoding
- errors, as tok->buf will be NULL
- */
- if (tok->decoding_erred)
- return EOF;
- /* Last line does not end in \n,
- fake one */
- strcpy(tok->inp, "\n");
- }
- tok->inp = strchr(tok->inp, '\0');
- done = tok->inp[-1] == '\n';
- }
- if (tok->buf != NULL) {
- tok->cur = tok->buf + cur;
- tok->line_start = tok->cur;
- /* replace "\r\n" with "\n" */
- /* For Mac leave the \r, giving a syntax error */
- pt = tok->inp - 2;
- if (pt >= tok->buf && *pt == '\r') {
- *pt++ = '\n';
- *pt = '\0';
- tok->inp = pt;
- }
- }
- }
- if (tok->done != E_OK) {
- if (tok->prompt != NULL)
- PySys_WriteStderr("\n");
- tok->cur = tok->inp;
- return EOF;
- }
- }
- /*NOTREACHED*/
+ if (tok->nextprompt != NULL)
+ tok->prompt = tok->nextprompt;
+ if (newtok == NULL)
+ tok->done = E_INTR;
+ else if (*newtok == '\0') {
+ PyMem_FREE(newtok);
+ tok->done = E_EOF;
+ }
+ else if (tok->start != NULL) {
+ size_t start = tok->start - tok->buf;
+ size_t oldlen = tok->cur - tok->buf;
+ size_t newlen = oldlen + strlen(newtok);
+ char *buf = tok->buf;
+ buf = (char *)PyMem_REALLOC(buf, newlen+1);
+ tok->lineno++;
+ if (buf == NULL) {
+ PyMem_FREE(tok->buf);
+ tok->buf = NULL;
+ PyMem_FREE(newtok);
+ tok->done = E_NOMEM;
+ return EOF;
+ }
+ tok->buf = buf;
+ tok->cur = tok->buf + oldlen;
+ tok->line_start = tok->cur;
+ strcpy(tok->buf + oldlen, newtok);
+ PyMem_FREE(newtok);
+ tok->inp = tok->buf + newlen;
+ tok->end = tok->inp + 1;
+ tok->start = tok->buf + start;
+ }
+ else {
+ tok->lineno++;
+ if (tok->buf != NULL)
+ PyMem_FREE(tok->buf);
+ tok->buf = newtok;
+ tok->line_start = tok->buf;
+ tok->cur = tok->buf;
+ tok->line_start = tok->buf;
+ tok->inp = strchr(tok->buf, '\0');
+ tok->end = tok->inp + 1;
+ }
+ }
+ else {
+ int done = 0;
+ Py_ssize_t cur = 0;
+ char *pt;
+ if (tok->start == NULL) {
+ if (tok->buf == NULL) {
+ tok->buf = (char *)
+ PyMem_MALLOC(BUFSIZ);
+ if (tok->buf == NULL) {
+ tok->done = E_NOMEM;
+ return EOF;
+ }
+ tok->end = tok->buf + BUFSIZ;
+ }
+ if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf),
+ tok) == NULL) {
+ tok->done = E_EOF;
+ done = 1;
+ }
+ else {
+ tok->done = E_OK;
+ tok->inp = strchr(tok->buf, '\0');
+ done = tok->inp[-1] == '\n';
+ }
+ }
+ else {
+ cur = tok->cur - tok->buf;
+ if (decoding_feof(tok)) {
+ tok->done = E_EOF;
+ done = 1;
+ }
+ else
+ tok->done = E_OK;
+ }
+ tok->lineno++;
+ /* Read until '\n' or EOF */
+ while (!done) {
+ Py_ssize_t curstart = tok->start == NULL ? -1 :
+ tok->start - tok->buf;
+ Py_ssize_t curvalid = tok->inp - tok->buf;
+ Py_ssize_t newsize = curvalid + BUFSIZ;
+ char *newbuf = tok->buf;
+ newbuf = (char *)PyMem_REALLOC(newbuf,
+ newsize);
+ if (newbuf == NULL) {
+ tok->done = E_NOMEM;
+ tok->cur = tok->inp;
+ return EOF;
+ }
+ tok->buf = newbuf;
+ tok->inp = tok->buf + curvalid;
+ tok->end = tok->buf + newsize;
+ tok->start = curstart < 0 ? NULL :
+ tok->buf + curstart;
+ if (decoding_fgets(tok->inp,
+ (int)(tok->end - tok->inp),
+ tok) == NULL) {
+ /* Break out early on decoding
+ errors, as tok->buf will be NULL
+ */
+ if (tok->decoding_erred)
+ return EOF;
+ /* Last line does not end in \n,
+ fake one */
+ strcpy(tok->inp, "\n");
+ }
+ tok->inp = strchr(tok->inp, '\0');
+ done = tok->inp[-1] == '\n';
+ }
+ if (tok->buf != NULL) {
+ tok->cur = tok->buf + cur;
+ tok->line_start = tok->cur;
+ /* replace "\r\n" with "\n" */
+ /* For Mac leave the \r, giving a syntax error */
+ pt = tok->inp - 2;
+ if (pt >= tok->buf && *pt == '\r') {
+ *pt++ = '\n';
+ *pt = '\0';
+ tok->inp = pt;
+ }
+ }
+ }
+ if (tok->done != E_OK) {
+ if (tok->prompt != NULL)
+ PySys_WriteStderr("\n");
+ tok->cur = tok->inp;
+ return EOF;
+ }
+ }
+ /*NOTREACHED*/
}
@@ -1048,12 +1048,12 @@ tok_nextc(register struct tok_state *tok)
static void
tok_backup(register struct tok_state *tok, register int c)
{
- if (c != EOF) {
- if (--tok->cur < tok->buf)
- Py_FatalError("tok_backup: beginning of buffer");
- if (*tok->cur != c)
- *tok->cur = c;
- }
+ if (c != EOF) {
+ if (--tok->cur < tok->buf)
+ Py_FatalError("tok_backup: beginning of buffer");
+ if (*tok->cur != c)
+ *tok->cur = c;
+ }
}
@@ -1062,181 +1062,181 @@ tok_backup(register struct tok_state *tok, register int c)
int
PyToken_OneChar(int c)
{
- switch (c) {
- case '(': return LPAR;
- case ')': return RPAR;
- case '[': return LSQB;
- case ']': return RSQB;
- case ':': return COLON;
- case ',': return COMMA;
- case ';': return SEMI;
- case '+': return PLUS;
- case '-': return MINUS;
- case '*': return STAR;
- case '/': return SLASH;
- case '|': return VBAR;
- case '&': return AMPER;
- case '<': return LESS;
- case '>': return GREATER;
- case '=': return EQUAL;
- case '.': return DOT;
- case '%': return PERCENT;
- case '{': return LBRACE;
- case '}': return RBRACE;
- case '^': return CIRCUMFLEX;
- case '~': return TILDE;
- case '@': return AT;
- default: return OP;
- }
+ switch (c) {
+ case '(': return LPAR;
+ case ')': return RPAR;
+ case '[': return LSQB;
+ case ']': return RSQB;
+ case ':': return COLON;
+ case ',': return COMMA;
+ case ';': return SEMI;
+ case '+': return PLUS;
+ case '-': return MINUS;
+ case '*': return STAR;
+ case '/': return SLASH;
+ case '|': return VBAR;
+ case '&': return AMPER;
+ case '<': return LESS;
+ case '>': return GREATER;
+ case '=': return EQUAL;
+ case '.': return DOT;
+ case '%': return PERCENT;
+ case '{': return LBRACE;
+ case '}': return RBRACE;
+ case '^': return CIRCUMFLEX;
+ case '~': return TILDE;
+ case '@': return AT;
+ default: return OP;
+ }
}
int
PyToken_TwoChars(int c1, int c2)
{
- switch (c1) {
- case '=':
- switch (c2) {
- case '=': return EQEQUAL;
- }
- break;
- case '!':
- switch (c2) {
- case '=': return NOTEQUAL;
- }
- break;
- case '<':
- switch (c2) {
- case '>': return NOTEQUAL;
- case '=': return LESSEQUAL;
- case '<': return LEFTSHIFT;
- }
- break;
- case '>':
- switch (c2) {
- case '=': return GREATEREQUAL;
- case '>': return RIGHTSHIFT;
- }
- break;
- case '+':
- switch (c2) {
- case '=': return PLUSEQUAL;
- }
- break;
- case '-':
- switch (c2) {
- case '=': return MINEQUAL;
- case '>': return RARROW;
- }
- break;
- case '*':
- switch (c2) {
- case '*': return DOUBLESTAR;
- case '=': return STAREQUAL;
- }
- break;
- case '/':
- switch (c2) {
- case '/': return DOUBLESLASH;
- case '=': return SLASHEQUAL;
- }
- break;
- case '|':
- switch (c2) {
- case '=': return VBAREQUAL;
- }
- break;
- case '%':
- switch (c2) {
- case '=': return PERCENTEQUAL;
- }
- break;
- case '&':
- switch (c2) {
- case '=': return AMPEREQUAL;
- }
- break;
- case '^':
- switch (c2) {
- case '=': return CIRCUMFLEXEQUAL;
- }
- break;
- }
- return OP;
+ switch (c1) {
+ case '=':
+ switch (c2) {
+ case '=': return EQEQUAL;
+ }
+ break;
+ case '!':
+ switch (c2) {
+ case '=': return NOTEQUAL;
+ }
+ break;
+ case '<':
+ switch (c2) {
+ case '>': return NOTEQUAL;
+ case '=': return LESSEQUAL;
+ case '<': return LEFTSHIFT;
+ }
+ break;
+ case '>':
+ switch (c2) {
+ case '=': return GREATEREQUAL;
+ case '>': return RIGHTSHIFT;
+ }
+ break;
+ case '+':
+ switch (c2) {
+ case '=': return PLUSEQUAL;
+ }
+ break;
+ case '-':
+ switch (c2) {
+ case '=': return MINEQUAL;
+ case '>': return RARROW;
+ }
+ break;
+ case '*':
+ switch (c2) {
+ case '*': return DOUBLESTAR;
+ case '=': return STAREQUAL;
+ }
+ break;
+ case '/':
+ switch (c2) {
+ case '/': return DOUBLESLASH;
+ case '=': return SLASHEQUAL;
+ }
+ break;
+ case '|':
+ switch (c2) {
+ case '=': return VBAREQUAL;
+ }
+ break;
+ case '%':
+ switch (c2) {
+ case '=': return PERCENTEQUAL;
+ }
+ break;
+ case '&':
+ switch (c2) {
+ case '=': return AMPEREQUAL;
+ }
+ break;
+ case '^':
+ switch (c2) {
+ case '=': return CIRCUMFLEXEQUAL;
+ }
+ break;
+ }
+ return OP;
}
int
PyToken_ThreeChars(int c1, int c2, int c3)
{
- switch (c1) {
- case '<':
- switch (c2) {
- case '<':
- switch (c3) {
- case '=':
- return LEFTSHIFTEQUAL;
- }
- break;
- }
- break;
- case '>':
- switch (c2) {
- case '>':
- switch (c3) {
- case '=':
- return RIGHTSHIFTEQUAL;
- }
- break;
- }
- break;
- case '*':
- switch (c2) {
- case '*':
- switch (c3) {
- case '=':
- return DOUBLESTAREQUAL;
- }
- break;
- }
- break;
- case '/':
- switch (c2) {
- case '/':
- switch (c3) {
- case '=':
- return DOUBLESLASHEQUAL;
- }
- break;
- }
- break;
+ switch (c1) {
+ case '<':
+ switch (c2) {
+ case '<':
+ switch (c3) {
+ case '=':
+ return LEFTSHIFTEQUAL;
+ }
+ break;
+ }
+ break;
+ case '>':
+ switch (c2) {
+ case '>':
+ switch (c3) {
+ case '=':
+ return RIGHTSHIFTEQUAL;
+ }
+ break;
+ }
+ break;
+ case '*':
+ switch (c2) {
+ case '*':
+ switch (c3) {
+ case '=':
+ return DOUBLESTAREQUAL;
+ }
+ break;
+ }
+ break;
+ case '/':
+ switch (c2) {
+ case '/':
+ switch (c3) {
+ case '=':
+ return DOUBLESLASHEQUAL;
+ }
+ break;
+ }
+ break;
+ case '.':
+ switch (c2) {
case '.':
- switch (c2) {
- case '.':
- switch (c3) {
- case '.':
- return ELLIPSIS;
- }
- break;
- }
- break;
- }
- return OP;
+ switch (c3) {
+ case '.':
+ return ELLIPSIS;
+ }
+ break;
+ }
+ break;
+ }
+ return OP;
}
static int
indenterror(struct tok_state *tok)
{
- if (tok->alterror) {
- tok->done = E_TABSPACE;
- tok->cur = tok->inp;
- return 1;
- }
- if (tok->altwarning) {
- PySys_WriteStderr("%s: inconsistent use of tabs and spaces "
- "in indentation\n", tok->filename);
- tok->altwarning = 0;
- }
- return 0;
+ if (tok->alterror) {
+ tok->done = E_TABSPACE;
+ tok->cur = tok->inp;
+ return 1;
+ }
+ if (tok->altwarning) {
+ PySys_WriteStderr("%s: inconsistent use of tabs and spaces "
+ "in indentation\n", tok->filename);
+ tok->altwarning = 0;
+ }
+ return 0;
}
#ifdef PGEN
@@ -1246,23 +1246,23 @@ indenterror(struct tok_state *tok)
static int
verify_identifier(struct tok_state *tok)
{
- PyObject *s;
- int result;
- s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
- if (s == NULL) {
- if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
- PyErr_Clear();
- tok->done = E_IDENTIFIER;
- } else {
- tok->done = E_ERROR;
- }
- return 0;
- }
- result = PyUnicode_IsIdentifier(s);
- Py_DECREF(s);
- if (result == 0)
- tok->done = E_IDENTIFIER;
- return result;
+ PyObject *s;
+ int result;
+ s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
+ if (s == NULL) {
+ if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
+ PyErr_Clear();
+ tok->done = E_IDENTIFIER;
+ } else {
+ tok->done = E_ERROR;
+ }
+ return 0;
+ }
+ result = PyUnicode_IsIdentifier(s);
+ Py_DECREF(s);
+ if (result == 0)
+ tok->done = E_IDENTIFIER;
+ return result;
}
#endif
@@ -1271,407 +1271,407 @@ verify_identifier(struct tok_state *tok)
static int
tok_get(register struct tok_state *tok, char **p_start, char **p_end)
{
- register int c;
- int blankline, nonascii;
+ register int c;
+ int blankline, nonascii;
- *p_start = *p_end = NULL;
+ *p_start = *p_end = NULL;
nextline:
- tok->start = NULL;
- blankline = 0;
-
- /* Get indentation level */
- if (tok->atbol) {
- register int col = 0;
- register int altcol = 0;
- tok->atbol = 0;
- for (;;) {
- c = tok_nextc(tok);
- if (c == ' ')
- col++, altcol++;
- else if (c == '\t') {
- col = (col/tok->tabsize + 1) * tok->tabsize;
- altcol = (altcol/tok->alttabsize + 1)
- * tok->alttabsize;
- }
- else if (c == '\014') /* Control-L (formfeed) */
- col = altcol = 0; /* For Emacs users */
- else
- break;
- }
- tok_backup(tok, c);
- if (c == '#' || c == '\n') {
- /* Lines with only whitespace and/or comments
- shouldn't affect the indentation and are
- not passed to the parser as NEWLINE tokens,
- except *totally* empty lines in interactive
- mode, which signal the end of a command group. */
- if (col == 0 && c == '\n' && tok->prompt != NULL)
- blankline = 0; /* Let it through */
- else
- blankline = 1; /* Ignore completely */
- /* We can't jump back right here since we still
- may need to skip to the end of a comment */
- }
- if (!blankline && tok->level == 0) {
- if (col == tok->indstack[tok->indent]) {
- /* No change */
- if (altcol != tok->altindstack[tok->indent]) {
- if (indenterror(tok))
- return ERRORTOKEN;
- }
- }
- else if (col > tok->indstack[tok->indent]) {
- /* Indent -- always one */
- if (tok->indent+1 >= MAXINDENT) {
- tok->done = E_TOODEEP;
- tok->cur = tok->inp;
- return ERRORTOKEN;
- }
- if (altcol <= tok->altindstack[tok->indent]) {
- if (indenterror(tok))
- return ERRORTOKEN;
- }
- tok->pendin++;
- tok->indstack[++tok->indent] = col;
- tok->altindstack[tok->indent] = altcol;
- }
- else /* col < tok->indstack[tok->indent] */ {
- /* Dedent -- any number, must be consistent */
- while (tok->indent > 0 &&
- col < tok->indstack[tok->indent]) {
- tok->pendin--;
- tok->indent--;
- }
- if (col != tok->indstack[tok->indent]) {
- tok->done = E_DEDENT;
- tok->cur = tok->inp;
- return ERRORTOKEN;
- }
- if (altcol != tok->altindstack[tok->indent]) {
- if (indenterror(tok))
- return ERRORTOKEN;
- }
- }
- }
- }
-
- tok->start = tok->cur;
-
- /* Return pending indents/dedents */
- if (tok->pendin != 0) {
- if (tok->pendin < 0) {
- tok->pendin++;
- return DEDENT;
- }
- else {
- tok->pendin--;
- return INDENT;
- }
- }
+ tok->start = NULL;
+ blankline = 0;
+
+ /* Get indentation level */
+ if (tok->atbol) {
+ register int col = 0;
+ register int altcol = 0;
+ tok->atbol = 0;
+ for (;;) {
+ c = tok_nextc(tok);
+ if (c == ' ')
+ col++, altcol++;
+ else if (c == '\t') {
+ col = (col/tok->tabsize + 1) * tok->tabsize;
+ altcol = (altcol/tok->alttabsize + 1)
+ * tok->alttabsize;
+ }
+ else if (c == '\014') /* Control-L (formfeed) */
+ col = altcol = 0; /* For Emacs users */
+ else
+ break;
+ }
+ tok_backup(tok, c);
+ if (c == '#' || c == '\n') {
+ /* Lines with only whitespace and/or comments
+ shouldn't affect the indentation and are
+ not passed to the parser as NEWLINE tokens,
+ except *totally* empty lines in interactive
+ mode, which signal the end of a command group. */
+ if (col == 0 && c == '\n' && tok->prompt != NULL)
+ blankline = 0; /* Let it through */
+ else
+ blankline = 1; /* Ignore completely */
+ /* We can't jump back right here since we still
+ may need to skip to the end of a comment */
+ }
+ if (!blankline && tok->level == 0) {
+ if (col == tok->indstack[tok->indent]) {
+ /* No change */
+ if (altcol != tok->altindstack[tok->indent]) {
+ if (indenterror(tok))
+ return ERRORTOKEN;
+ }
+ }
+ else if (col > tok->indstack[tok->indent]) {
+ /* Indent -- always one */
+ if (tok->indent+1 >= MAXINDENT) {
+ tok->done = E_TOODEEP;
+ tok->cur = tok->inp;
+ return ERRORTOKEN;
+ }
+ if (altcol <= tok->altindstack[tok->indent]) {
+ if (indenterror(tok))
+ return ERRORTOKEN;
+ }
+ tok->pendin++;
+ tok->indstack[++tok->indent] = col;
+ tok->altindstack[tok->indent] = altcol;
+ }
+ else /* col < tok->indstack[tok->indent] */ {
+ /* Dedent -- any number, must be consistent */
+ while (tok->indent > 0 &&
+ col < tok->indstack[tok->indent]) {
+ tok->pendin--;
+ tok->indent--;
+ }
+ if (col != tok->indstack[tok->indent]) {
+ tok->done = E_DEDENT;
+ tok->cur = tok->inp;
+ return ERRORTOKEN;
+ }
+ if (altcol != tok->altindstack[tok->indent]) {
+ if (indenterror(tok))
+ return ERRORTOKEN;
+ }
+ }
+ }
+ }
+
+ tok->start = tok->cur;
+
+ /* Return pending indents/dedents */
+ if (tok->pendin != 0) {
+ if (tok->pendin < 0) {
+ tok->pendin++;
+ return DEDENT;
+ }
+ else {
+ tok->pendin--;
+ return INDENT;
+ }
+ }
again:
- tok->start = NULL;
- /* Skip spaces */
- do {
- c = tok_nextc(tok);
- } while (c == ' ' || c == '\t' || c == '\014');
-
- /* Set start of current token */
- tok->start = tok->cur - 1;
-
- /* Skip comment */
- if (c == '#')
- while (c != EOF && c != '\n')
- c = tok_nextc(tok);
-
- /* Check for EOF and errors now */
- if (c == EOF) {
- return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
- }
-
- /* Identifier (most frequent token!) */
- nonascii = 0;
- if (is_potential_identifier_start(c)) {
- /* Process b"", r"" and br"" */
- if (c == 'b' || c == 'B') {
- c = tok_nextc(tok);
- if (c == '"' || c == '\'')
- goto letter_quote;
- }
- if (c == 'r' || c == 'R') {
- c = tok_nextc(tok);
- if (c == '"' || c == '\'')
- goto letter_quote;
- }
- while (is_potential_identifier_char(c)) {
- if (c >= 128)
- nonascii = 1;
- c = tok_nextc(tok);
- }
- tok_backup(tok, c);
- if (nonascii &&
- !verify_identifier(tok)) {
- tok->done = E_IDENTIFIER;
- return ERRORTOKEN;
- }
- *p_start = tok->start;
- *p_end = tok->cur;
- return NAME;
- }
-
- /* Newline */
- if (c == '\n') {
- tok->atbol = 1;
- if (blankline || tok->level > 0)
- goto nextline;
- *p_start = tok->start;
- *p_end = tok->cur - 1; /* Leave '\n' out of the string */
- tok->cont_line = 0;
- return NEWLINE;
- }
-
- /* Period or number starting with period? */
- if (c == '.') {
- c = tok_nextc(tok);
- if (isdigit(c)) {
- goto fraction;
- } else if (c == '.') {
- c = tok_nextc(tok);
- if (c == '.') {
- *p_start = tok->start;
- *p_end = tok->cur;
- return ELLIPSIS;
- } else {
- tok_backup(tok, c);
- }
- tok_backup(tok, '.');
- } else {
- tok_backup(tok, c);
- }
- *p_start = tok->start;
- *p_end = tok->cur;
- return DOT;
- }
-
- /* Number */
- if (isdigit(c)) {
- if (c == '0') {
- /* Hex, octal or binary -- maybe. */
- c = tok_nextc(tok);
- if (c == '.')
- goto fraction;
- if (c == 'j' || c == 'J')
- goto imaginary;
- if (c == 'x' || c == 'X') {
-
- /* Hex */
- c = tok_nextc(tok);
- if (!isxdigit(c)) {
- tok->done = E_TOKEN;
- tok_backup(tok, c);
- return ERRORTOKEN;
- }
- do {
- c = tok_nextc(tok);
- } while (isxdigit(c));
- }
- else if (c == 'o' || c == 'O') {
- /* Octal */
- c = tok_nextc(tok);
- if (c < '0' || c >= '8') {
- tok->done = E_TOKEN;
- tok_backup(tok, c);
- return ERRORTOKEN;
- }
- do {
- c = tok_nextc(tok);
- } while ('0' <= c && c < '8');
- }
- else if (c == 'b' || c == 'B') {
- /* Binary */
- c = tok_nextc(tok);
- if (c != '0' && c != '1') {
- tok->done = E_TOKEN;
- tok_backup(tok, c);
- return ERRORTOKEN;
- }
- do {
- c = tok_nextc(tok);
- } while (c == '0' || c == '1');
- }
- else {
- int nonzero = 0;
- /* maybe old-style octal; c is first char of it */
- /* in any case, allow '0' as a literal */
- while (c == '0')
- c = tok_nextc(tok);
- while (isdigit(c)) {
- nonzero = 1;
- c = tok_nextc(tok);
- }
- if (c == '.')
- goto fraction;
- else if (c == 'e' || c == 'E')
- goto exponent;
- else if (c == 'j' || c == 'J')
- goto imaginary;
- else if (nonzero) {
- tok->done = E_TOKEN;
- tok_backup(tok, c);
- return ERRORTOKEN;
- }
- }
- }
- else {
- /* Decimal */
- do {
- c = tok_nextc(tok);
- } while (isdigit(c));
- {
- /* Accept floating point numbers. */
- if (c == '.') {
- fraction:
- /* Fraction */
- do {
- c = tok_nextc(tok);
- } while (isdigit(c));
- }
- if (c == 'e' || c == 'E') {
- exponent:
- /* Exponent part */
- c = tok_nextc(tok);
- if (c == '+' || c == '-')
- c = tok_nextc(tok);
- if (!isdigit(c)) {
- tok->done = E_TOKEN;
- tok_backup(tok, c);
- return ERRORTOKEN;
- }
- do {
- c = tok_nextc(tok);
- } while (isdigit(c));
- }
- if (c == 'j' || c == 'J')
- /* Imaginary part */
- imaginary:
- c = tok_nextc(tok);
- }
- }
- tok_backup(tok, c);
- *p_start = tok->start;
- *p_end = tok->cur;
- return NUMBER;
- }
+ tok->start = NULL;
+ /* Skip spaces */
+ do {
+ c = tok_nextc(tok);
+ } while (c == ' ' || c == '\t' || c == '\014');
+
+ /* Set start of current token */
+ tok->start = tok->cur - 1;
+
+ /* Skip comment */
+ if (c == '#')
+ while (c != EOF && c != '\n')
+ c = tok_nextc(tok);
+
+ /* Check for EOF and errors now */
+ if (c == EOF) {
+ return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
+ }
+
+ /* Identifier (most frequent token!) */
+ nonascii = 0;
+ if (is_potential_identifier_start(c)) {
+ /* Process b"", r"" and br"" */
+ if (c == 'b' || c == 'B') {
+ c = tok_nextc(tok);
+ if (c == '"' || c == '\'')
+ goto letter_quote;
+ }
+ if (c == 'r' || c == 'R') {
+ c = tok_nextc(tok);
+ if (c == '"' || c == '\'')
+ goto letter_quote;
+ }
+ while (is_potential_identifier_char(c)) {
+ if (c >= 128)
+ nonascii = 1;
+ c = tok_nextc(tok);
+ }
+ tok_backup(tok, c);
+ if (nonascii &&
+ !verify_identifier(tok)) {
+ tok->done = E_IDENTIFIER;
+ return ERRORTOKEN;
+ }
+ *p_start = tok->start;
+ *p_end = tok->cur;
+ return NAME;
+ }
+
+ /* Newline */
+ if (c == '\n') {
+ tok->atbol = 1;
+ if (blankline || tok->level > 0)
+ goto nextline;
+ *p_start = tok->start;
+ *p_end = tok->cur - 1; /* Leave '\n' out of the string */
+ tok->cont_line = 0;
+ return NEWLINE;
+ }
+
+ /* Period or number starting with period? */
+ if (c == '.') {
+ c = tok_nextc(tok);
+ if (isdigit(c)) {
+ goto fraction;
+ } else if (c == '.') {
+ c = tok_nextc(tok);
+ if (c == '.') {
+ *p_start = tok->start;
+ *p_end = tok->cur;
+ return ELLIPSIS;
+ } else {
+ tok_backup(tok, c);
+ }
+ tok_backup(tok, '.');
+ } else {
+ tok_backup(tok, c);
+ }
+ *p_start = tok->start;
+ *p_end = tok->cur;
+ return DOT;
+ }
+
+ /* Number */
+ if (isdigit(c)) {
+ if (c == '0') {
+ /* Hex, octal or binary -- maybe. */
+ c = tok_nextc(tok);
+ if (c == '.')
+ goto fraction;
+ if (c == 'j' || c == 'J')
+ goto imaginary;
+ if (c == 'x' || c == 'X') {
+
+ /* Hex */
+ c = tok_nextc(tok);
+ if (!isxdigit(c)) {
+ tok->done = E_TOKEN;
+ tok_backup(tok, c);
+ return ERRORTOKEN;
+ }
+ do {
+ c = tok_nextc(tok);
+ } while (isxdigit(c));
+ }
+ else if (c == 'o' || c == 'O') {
+ /* Octal */
+ c = tok_nextc(tok);
+ if (c < '0' || c >= '8') {
+ tok->done = E_TOKEN;
+ tok_backup(tok, c);
+ return ERRORTOKEN;
+ }
+ do {
+ c = tok_nextc(tok);
+ } while ('0' <= c && c < '8');
+ }
+ else if (c == 'b' || c == 'B') {
+ /* Binary */
+ c = tok_nextc(tok);
+ if (c != '0' && c != '1') {
+ tok->done = E_TOKEN;
+ tok_backup(tok, c);
+ return ERRORTOKEN;
+ }
+ do {
+ c = tok_nextc(tok);
+ } while (c == '0' || c == '1');
+ }
+ else {
+ int nonzero = 0;
+ /* maybe old-style octal; c is first char of it */
+ /* in any case, allow '0' as a literal */
+ while (c == '0')
+ c = tok_nextc(tok);
+ while (isdigit(c)) {
+ nonzero = 1;
+ c = tok_nextc(tok);
+ }
+ if (c == '.')
+ goto fraction;
+ else if (c == 'e' || c == 'E')
+ goto exponent;
+ else if (c == 'j' || c == 'J')
+ goto imaginary;
+ else if (nonzero) {
+ tok->done = E_TOKEN;
+ tok_backup(tok, c);
+ return ERRORTOKEN;
+ }
+ }
+ }
+ else {
+ /* Decimal */
+ do {
+ c = tok_nextc(tok);
+ } while (isdigit(c));
+ {
+ /* Accept floating point numbers. */
+ if (c == '.') {
+ fraction:
+ /* Fraction */
+ do {
+ c = tok_nextc(tok);
+ } while (isdigit(c));
+ }
+ if (c == 'e' || c == 'E') {
+ exponent:
+ /* Exponent part */
+ c = tok_nextc(tok);
+ if (c == '+' || c == '-')
+ c = tok_nextc(tok);
+ if (!isdigit(c)) {
+ tok->done = E_TOKEN;
+ tok_backup(tok, c);
+ return ERRORTOKEN;
+ }
+ do {
+ c = tok_nextc(tok);
+ } while (isdigit(c));
+ }
+ if (c == 'j' || c == 'J')
+ /* Imaginary part */
+ imaginary:
+ c = tok_nextc(tok);
+ }
+ }
+ tok_backup(tok, c);
+ *p_start = tok->start;
+ *p_end = tok->cur;
+ return NUMBER;
+ }
letter_quote:
- /* String */
- if (c == '\'' || c == '"') {
- int quote = c;
- int quote_size = 1; /* 1 or 3 */
- int end_quote_size = 0;
-
- /* Find the quote size and start of string */
- c = tok_nextc(tok);
- if (c == quote) {
- c = tok_nextc(tok);
- if (c == quote)
- quote_size = 3;
- else
- end_quote_size = 1; /* empty string found */
- }
- if (c != quote)
- tok_backup(tok, c);
-
- /* Get rest of string */
- while (end_quote_size != quote_size) {
- c = tok_nextc(tok);
- if (c == EOF) {
- if (quote_size == 3)
- tok->done = E_EOFS;
- else
- tok->done = E_EOLS;
- tok->cur = tok->inp;
- return ERRORTOKEN;
- }
- if (quote_size == 1 && c == '\n') {
- tok->done = E_EOLS;
- tok->cur = tok->inp;
- return ERRORTOKEN;
- }
- if (c == quote)
- end_quote_size += 1;
- else {
- end_quote_size = 0;
- if (c == '\\')
- c = tok_nextc(tok); /* skip escaped char */
- }
- }
-
- *p_start = tok->start;
- *p_end = tok->cur;
- return STRING;
- }
-
- /* Line continuation */
- if (c == '\\') {
- c = tok_nextc(tok);
- if (c != '\n') {
- tok->done = E_LINECONT;
- tok->cur = tok->inp;
- return ERRORTOKEN;
- }
- tok->cont_line = 1;
- goto again; /* Read next line */
- }
-
- /* Check for two-character token */
- {
- int c2 = tok_nextc(tok);
- int token = PyToken_TwoChars(c, c2);
- if (token != OP) {
- int c3 = tok_nextc(tok);
- int token3 = PyToken_ThreeChars(c, c2, c3);
- if (token3 != OP) {
- token = token3;
- } else {
- tok_backup(tok, c3);
- }
- *p_start = tok->start;
- *p_end = tok->cur;
- return token;
- }
- tok_backup(tok, c2);
- }
-
- /* Keep track of parentheses nesting level */
- switch (c) {
- case '(':
- case '[':
- case '{':
- tok->level++;
- break;
- case ')':
- case ']':
- case '}':
- tok->level--;
- break;
- }
-
- /* Punctuation character */
- *p_start = tok->start;
- *p_end = tok->cur;
- return PyToken_OneChar(c);
+ /* String */
+ if (c == '\'' || c == '"') {
+ int quote = c;
+ int quote_size = 1; /* 1 or 3 */
+ int end_quote_size = 0;
+
+ /* Find the quote size and start of string */
+ c = tok_nextc(tok);
+ if (c == quote) {
+ c = tok_nextc(tok);
+ if (c == quote)
+ quote_size = 3;
+ else
+ end_quote_size = 1; /* empty string found */
+ }
+ if (c != quote)
+ tok_backup(tok, c);
+
+ /* Get rest of string */
+ while (end_quote_size != quote_size) {
+ c = tok_nextc(tok);
+ if (c == EOF) {
+ if (quote_size == 3)
+ tok->done = E_EOFS;
+ else
+ tok->done = E_EOLS;
+ tok->cur = tok->inp;
+ return ERRORTOKEN;
+ }
+ if (quote_size == 1 && c == '\n') {
+ tok->done = E_EOLS;
+ tok->cur = tok->inp;
+ return ERRORTOKEN;
+ }
+ if (c == quote)
+ end_quote_size += 1;
+ else {
+ end_quote_size = 0;
+ if (c == '\\')
+ c = tok_nextc(tok); /* skip escaped char */
+ }
+ }
+
+ *p_start = tok->start;
+ *p_end = tok->cur;
+ return STRING;
+ }
+
+ /* Line continuation */
+ if (c == '\\') {
+ c = tok_nextc(tok);
+ if (c != '\n') {
+ tok->done = E_LINECONT;
+ tok->cur = tok->inp;
+ return ERRORTOKEN;
+ }
+ tok->cont_line = 1;
+ goto again; /* Read next line */
+ }
+
+ /* Check for two-character token */
+ {
+ int c2 = tok_nextc(tok);
+ int token = PyToken_TwoChars(c, c2);
+ if (token != OP) {
+ int c3 = tok_nextc(tok);
+ int token3 = PyToken_ThreeChars(c, c2, c3);
+ if (token3 != OP) {
+ token = token3;
+ } else {
+ tok_backup(tok, c3);
+ }
+ *p_start = tok->start;
+ *p_end = tok->cur;
+ return token;
+ }
+ tok_backup(tok, c2);
+ }
+
+ /* Keep track of parentheses nesting level */
+ switch (c) {
+ case '(':
+ case '[':
+ case '{':
+ tok->level++;
+ break;
+ case ')':
+ case ']':
+ case '}':
+ tok->level--;
+ break;
+ }
+
+ /* Punctuation character */
+ *p_start = tok->start;
+ *p_end = tok->cur;
+ return PyToken_OneChar(c);
}
int
PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
{
- int result = tok_get(tok, p_start, p_end);
- if (tok->decoding_erred) {
- result = ERRORTOKEN;
- tok->done = E_DECODE;
- }
- return result;
+ int result = tok_get(tok, p_start, p_end);
+ if (tok->decoding_erred) {
+ result = ERRORTOKEN;
+ tok->done = E_DECODE;
+ }
+ return result;
}
/* Get -*- encoding -*- from a Python file.
@@ -1686,34 +1686,34 @@ PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
char *
PyTokenizer_FindEncoding(int fd)
{
- struct tok_state *tok;
- FILE *fp;
- char *p_start =NULL , *p_end =NULL , *encoding = NULL;
-
- fd = dup(fd);
- if (fd < 0) {
- return NULL;
- }
- fp = fdopen(fd, "r");
- if (fp == NULL) {
- return NULL;
- }
- tok = PyTokenizer_FromFile(fp, NULL, NULL, NULL);
- if (tok == NULL) {
- fclose(fp);
- return NULL;
- }
- while (tok->lineno < 2 && tok->done == E_OK) {
- PyTokenizer_Get(tok, &p_start, &p_end);
- }
- fclose(fp);
- if (tok->encoding) {
- encoding = (char *)PyMem_MALLOC(strlen(tok->encoding) + 1);
- if (encoding)
- strcpy(encoding, tok->encoding);
- }
- PyTokenizer_Free(tok);
- return encoding;
+ struct tok_state *tok;
+ FILE *fp;
+ char *p_start =NULL , *p_end =NULL , *encoding = NULL;
+
+ fd = dup(fd);
+ if (fd < 0) {
+ return NULL;
+ }
+ fp = fdopen(fd, "r");
+ if (fp == NULL) {
+ return NULL;
+ }
+ tok = PyTokenizer_FromFile(fp, NULL, NULL, NULL);
+ if (tok == NULL) {
+ fclose(fp);
+ return NULL;
+ }
+ while (tok->lineno < 2 && tok->done == E_OK) {
+ PyTokenizer_Get(tok, &p_start, &p_end);
+ }
+ fclose(fp);
+ if (tok->encoding) {
+ encoding = (char *)PyMem_MALLOC(strlen(tok->encoding) + 1);
+ if (encoding)
+ strcpy(encoding, tok->encoding);
+ }
+ PyTokenizer_Free(tok);
+ return encoding;
}
#ifdef Py_DEBUG
@@ -1721,9 +1721,9 @@ PyTokenizer_FindEncoding(int fd)
void
tok_dump(int type, char *start, char *end)
{
- printf("%s", _PyParser_TokenNames[type]);
- if (type == NAME || type == NUMBER || type == STRING || type == OP)
- printf("(%.*s)", (int)(end - start), start);
+ printf("%s", _PyParser_TokenNames[type]);
+ if (type == NAME || type == NUMBER || type == STRING || type == OP)
+ printf("(%.*s)", (int)(end - start), start);
}
#endif