diff options
Diffstat (limited to 'Parser/tokenizer.c')
-rw-r--r-- | Parser/tokenizer.c | 62 |
1 files changed, 62 insertions, 0 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 8654356..28fcf3c 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1522,6 +1522,68 @@ PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end) return result; } +/* This function is only called from parsetok. However, it cannot live + there, as it must be empty for PGEN, and we can check for PGEN only + in this file. */ + +#ifdef PGEN +char* +PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int* offset) +{ + return NULL; +} +#else +static PyObject * +dec_utf8(const char *enc, const char *text, size_t len) { + PyObject *ret = NULL; + PyObject *unicode_text = PyUnicode_DecodeUTF8(text, len, "replace"); + if (unicode_text) { + ret = PyUnicode_AsEncodedString(unicode_text, enc, "replace"); + Py_DECREF(unicode_text); + } + if (!ret) { + PyErr_Print(); + } + return ret; +} + +char * +PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset) +{ + char *text = NULL; + if (tok->encoding) { + /* convert source to original encondig */ + PyObject *lineobj = dec_utf8(tok->encoding, tok->buf, len); + if (lineobj != NULL) { + int linelen = PyString_Size(lineobj); + const char *line = PyString_AsString(lineobj); + text = PyObject_MALLOC(linelen + 1); + if (text != NULL && line != NULL) { + if (linelen) + strncpy(text, line, linelen); + text[linelen] = '\0'; + } + Py_DECREF(lineobj); + + /* adjust error offset */ + if (*offset > 1) { + PyObject *offsetobj = dec_utf8(tok->encoding, + tok->buf, *offset-1); + if (offsetobj) { + *offset = PyString_Size(offsetobj) + 1; + Py_DECREF(offsetobj); + } + } + + } + } + return text; + +} +#endif + + + #ifdef Py_DEBUG void |