1 files changed, 15 insertions, 8 deletions
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index f15e252..2be3bf2 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -12,6 +12,12 @@ extern "C" {
 
 #define MAXINDENT 100   /* Max indentation level */
 
+enum decoding_state {
+    STATE_INIT,
+    STATE_RAW,
+    STATE_NORMAL        /* have a codec associated with input */
+};
+
 /* Tokenizer state */
 struct tok_state {
     /* Input state; buf <= cur <= inp <= end */
@@ -34,35 +40,36 @@ struct tok_state {
     int level;          /* () [] {} Parentheses nesting level */
             /* Used to allow free continuations inside them */
     /* Stuff for checking on different tab sizes */
-    const char *filename;       /* For error messages */
+    const char *filename;   /* encoded to the filesystem encoding */
     int altwarning;     /* Issue warning if alternate tabs don't match */
     int alterror;       /* Issue error if alternate tabs don't match */
     int alttabsize;     /* Alternate tab spacing */
     int altindstack[MAXINDENT];         /* Stack of alternate indents */
     /* Stuff for PEP 0263 */
-    int decoding_state;         /* -1:decoding, 0:init, 1:raw */
+    enum decoding_state decoding_state;
     int decoding_erred;         /* whether erred in decoding  */
     int read_coding_spec;       /* whether 'coding:...' has been read  */
-    char *encoding;
+    char *encoding;         /* Source encoding. */
     int cont_line;          /* whether we are in a continuation line. */
     const char* line_start;     /* pointer to start of current line */
 #ifndef PGEN
-    PyObject *decoding_readline; /* codecs.open(...).readline */
+    PyObject *decoding_readline; /* open(...).readline */
     PyObject *decoding_buffer;
 #endif
-    const char* enc;
+    const char* enc;        /* Encoding for the current str. */
     const char* str;
     const char* input; /* Tokenizer's newline translated copy of the string. */
 };
 
 extern struct tok_state *PyTokenizer_FromString(const char *, int);
-extern struct tok_state *PyTokenizer_FromFile(FILE *, char *, char *);
+extern struct tok_state *PyTokenizer_FromUTF8(const char *, int);
+extern struct tok_state *PyTokenizer_FromFile(FILE *, char*,
+                                              char *, char *);
 extern void PyTokenizer_Free(struct tok_state *);
 extern int PyTokenizer_Get(struct tok_state *, char **, char **);
-#if defined(PGEN) || defined(Py_USING_UNICODE)
 extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok,
                                           int len, int *offset);
-#endif
+extern char * PyTokenizer_FindEncoding(int);
 
 #ifdef __cplusplus
 }