summaryrefslogtreecommitdiffstats
path: root/Parser/tokenizer.c
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2002-08-07 12:33:18 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2002-08-07 12:33:18 (GMT)
commit019934b3ccd27463e1bed94b280acba65fb43ccf (patch)
treef7881b224647ba07887b3be252acf85184559f99 /Parser/tokenizer.c
parent63d5bead18d920be40053e93e807cd8e124d5d3f (diff)
downloadcpython-019934b3ccd27463e1bed94b280acba65fb43ccf.zip
cpython-019934b3ccd27463e1bed94b280acba65fb43ccf.tar.gz
cpython-019934b3ccd27463e1bed94b280acba65fb43ccf.tar.bz2
Fix PEP 263 code --without-unicode. Fixes #591943.
Diffstat (limited to 'Parser/tokenizer.c')
-rw-r--r--Parser/tokenizer.c18
1 files changed, 18 insertions, 0 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 3761a48..34fbfa6 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -256,11 +256,18 @@ check_coding_spec(const char* line, int size, struct tok_state *tok,
strcmp(cs, "iso-8859-1") == 0) {
tok->encoding = cs;
} else {
+#ifdef Py_USING_UNICODE
r = set_readline(tok, cs);
if (r) {
tok->encoding = cs;
tok->decoding_state = -1;
}
+#else
+ /* Without Unicode support, we cannot
+ process the coding spec. Since there
+ won't be any Unicode literals, that
+ won't matter. */
+#endif
}
} else { /* then, compare cs with BOM */
r = (strcmp(tok->encoding, cs) == 0);
@@ -317,6 +324,10 @@ check_bom(int get_char(struct tok_state *),
static char *
fp_readl(char *s, int size, struct tok_state *tok)
{
+#ifndef Py_USING_UNICODE
+ /* In a non-Unicode built, this should never be called. */
+ abort();
+#else
PyObject* utf8;
PyObject* buf = tok->decoding_buffer;
if (buf == NULL) {
@@ -338,6 +349,7 @@ fp_readl(char *s, int size, struct tok_state *tok)
if (s[0] == '\0') return NULL; /* EOF */
return s;
}
+#endif
}
/* Set the readline function for TOK to a StreamReader's
@@ -487,6 +499,7 @@ static int buf_setreadl(struct tok_state *tok, const char* enc) {
/* Return a UTF-8 encoding Python string object from the
C byte string STR, which is encoded with ENC. */
+#ifdef Py_USING_UNICODE
static PyObject *
translate_into_utf8(const char* str, const char* enc) {
PyObject *utf8;
@@ -497,6 +510,7 @@ translate_into_utf8(const char* str, const char* enc) {
Py_DECREF(buf);
return utf8;
}
+#endif
/* Decode a byte string STR for use as the buffer of TOK.
Look for encoding declarations inside STR, and record them
@@ -514,12 +528,14 @@ decode_str(const char *str, struct tok_state *tok)
return NULL;
str = tok->str; /* string after BOM if any */
assert(str);
+#ifdef Py_USING_UNICODE
if (tok->enc != NULL) {
utf8 = translate_into_utf8(str, tok->enc);
if (utf8 == NULL)
return NULL;
str = PyString_AsString(utf8);
}
+#endif
for (s = str;; s++) {
if (*s == '\0') break;
else if (*s == '\n') {
@@ -530,6 +546,7 @@ decode_str(const char *str, struct tok_state *tok)
tok->enc = NULL;
if (!check_coding_spec(str, s - str, tok, buf_setreadl))
return NULL;
+#ifdef Py_USING_UNICODE
if (tok->enc != NULL) {
assert(utf8 == NULL);
utf8 = translate_into_utf8(str, tok->enc);
@@ -537,6 +554,7 @@ decode_str(const char *str, struct tok_state *tok)
return NULL;
str = PyString_AsString(utf8);
}
+#endif
assert(tok->decoding_buffer == NULL);
tok->decoding_buffer = utf8; /* CAUTION */
return str;