summaryrefslogtreecommitdiffstats
path: root/Parser/tokenizer.h
diff options
context:
space:
mode:
Diffstat (limited to 'Parser/tokenizer.h')
-rw-r--r--Parser/tokenizer.h23
1 files changed, 15 insertions, 8 deletions
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index f15e252..2be3bf2 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -12,6 +12,12 @@ extern "C" {
#define MAXINDENT 100 /* Max indentation level */
+enum decoding_state {
+ STATE_INIT,
+ STATE_RAW,
+ STATE_NORMAL /* have a codec associated with input */
+};
+
/* Tokenizer state */
struct tok_state {
/* Input state; buf <= cur <= inp <= end */
@@ -34,35 +40,36 @@ struct tok_state {
int level; /* () [] {} Parentheses nesting level */
/* Used to allow free continuations inside them */
/* Stuff for checking on different tab sizes */
- const char *filename; /* For error messages */
+ const char *filename; /* encoded to the filesystem encoding */
int altwarning; /* Issue warning if alternate tabs don't match */
int alterror; /* Issue error if alternate tabs don't match */
int alttabsize; /* Alternate tab spacing */
int altindstack[MAXINDENT]; /* Stack of alternate indents */
/* Stuff for PEP 0263 */
- int decoding_state; /* -1:decoding, 0:init, 1:raw */
+ enum decoding_state decoding_state;
int decoding_erred; /* whether erred in decoding */
int read_coding_spec; /* whether 'coding:...' has been read */
- char *encoding;
+ char *encoding; /* Source encoding. */
int cont_line; /* whether we are in a continuation line. */
const char* line_start; /* pointer to start of current line */
#ifndef PGEN
- PyObject *decoding_readline; /* codecs.open(...).readline */
+ PyObject *decoding_readline; /* open(...).readline */
PyObject *decoding_buffer;
#endif
- const char* enc;
+ const char* enc; /* Encoding for the current str. */
const char* str;
const char* input; /* Tokenizer's newline translated copy of the string. */
};
extern struct tok_state *PyTokenizer_FromString(const char *, int);
-extern struct tok_state *PyTokenizer_FromFile(FILE *, char *, char *);
+extern struct tok_state *PyTokenizer_FromUTF8(const char *, int);
+extern struct tok_state *PyTokenizer_FromFile(FILE *, char*,
+ char *, char *);
extern void PyTokenizer_Free(struct tok_state *);
extern int PyTokenizer_Get(struct tok_state *, char **, char **);
-#if defined(PGEN) || defined(Py_USING_UNICODE)
extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok,
int len, int *offset);
-#endif
+extern char * PyTokenizer_FindEncoding(int);
#ifdef __cplusplus
}