summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2007-06-07 00:54:15 (GMT)
committerGuido van Rossum <guido@python.org>2007-06-07 00:54:15 (GMT)
commit9cbfffd1a63ee944dedda758ac1d1963d4cbf3b1 (patch)
tree80f78198ee6b9b7bd28d1124d62438f7b796c10d
parente7ba4956272a7105ea90dd505f70e5947aa27161 (diff)
downloadcpython-9cbfffd1a63ee944dedda758ac1d1963d4cbf3b1.zip
cpython-9cbfffd1a63ee944dedda758ac1d1963d4cbf3b1.tar.gz
cpython-9cbfffd1a63ee944dedda758ac1d1963d4cbf3b1.tar.bz2
tokenizer.c: make coding markup work again.
io.open() now takes all positional parameters (so we can conveniently call it from C code). test_tarfile.py no longer uses u"..." literals, but is otherwise still badly broken. This is a checkpoint; some more stuff now breaks.
-rw-r--r--Lib/io.py3
-rw-r--r--Lib/test/test_tarfile.py34
-rw-r--r--Parser/tokenizer.c30
3 files changed, 35 insertions, 32 deletions
diff --git a/Lib/io.py b/Lib/io.py
index df224e6..f1be881 100644
--- a/Lib/io.py
+++ b/Lib/io.py
@@ -49,7 +49,7 @@ class BlockingIOError(IOError):
self.characters_written = characters_written
-def open(file, mode="r", buffering=None, *, encoding=None, newline=None):
+def open(file, mode="r", buffering=None, encoding=None, newline=None):
"""Replacement for the built-in open function.
Args:
@@ -59,7 +59,6 @@ def open(file, mode="r", buffering=None, *, encoding=None, newline=None):
buffering: optional int >= 0 giving the buffer size; values
can be: 0 = unbuffered, 1 = line buffered,
larger = fully buffered.
- Keywords (for text modes only; *must* be given as keyword arguments):
encoding: optional string giving the text encoding.
newline: optional newlines specifier; must be None, '\n' or '\r\n';
specifies the line ending expected on input and written on
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index 636a45e..39504e1 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -432,17 +432,17 @@ class PaxReadTest(LongnameTest):
tarinfo = tar.getmember("pax/regtype1")
self.assertEqual(tarinfo.uname, "foo")
self.assertEqual(tarinfo.gname, "bar")
- self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"ÄÖÜäöüß")
+ self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), "ÄÖÜäöüß")
tarinfo = tar.getmember("pax/regtype2")
self.assertEqual(tarinfo.uname, "")
self.assertEqual(tarinfo.gname, "bar")
- self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"ÄÖÜäöüß")
+ self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), "ÄÖÜäöüß")
tarinfo = tar.getmember("pax/regtype3")
self.assertEqual(tarinfo.uname, "tarfile")
self.assertEqual(tarinfo.gname, "tarfile")
- self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"ÄÖÜäöüß")
+ self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), "ÄÖÜäöüß")
def test_pax_number_fields(self):
# All following number fields are read from the pax header.
@@ -727,11 +727,11 @@ class PaxWriteTest(GNUWriteTest):
def test_pax_global_header(self):
pax_headers = {
- u"foo": u"bar",
- u"uid": u"0",
- u"mtime": u"1.23",
- u"test": u"äöü",
- u"äöü": u"test"}
+ "foo": "bar",
+ "uid": "0",
+ "mtime": "1.23",
+ "test": "äöü",
+ "äöü": "test"}
tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, \
pax_headers=pax_headers)
@@ -756,11 +756,11 @@ class PaxWriteTest(GNUWriteTest):
def test_pax_extended_header(self):
# The fields from the pax header have priority over the
# TarInfo.
- pax_headers = {u"path": u"foo", u"uid": u"123"}
+ pax_headers = {"path": "foo", "uid": "123"}
tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, encoding="iso8859-1")
t = tarfile.TarInfo()
- t.name = u"äöü" # non-ASCII
+ t.name = "äöü" # non-ASCII
t.uid = 8**8 # too large
t.pax_headers = pax_headers
tar.addfile(t)
@@ -808,11 +808,11 @@ class UstarUnicodeTest(unittest.TestCase):
else:
tar.addfile(tarinfo)
- tarinfo.name = u"äöü"
+ tarinfo.name = "äöü"
self.assertRaises(UnicodeError, tar.addfile, tarinfo)
tarinfo.name = "foo"
- tarinfo.uname = u"äöü"
+ tarinfo.uname = "äöü"
self.assertRaises(UnicodeError, tar.addfile, tarinfo)
def test_unicode_argument(self):
@@ -825,7 +825,7 @@ class UstarUnicodeTest(unittest.TestCase):
tar.close()
def test_uname_unicode(self):
- for name in (u"äöü", "äöü"):
+ for name in ("äöü", "äöü"):
t = tarfile.TarInfo("foo")
t.uname = name
t.gname = name
@@ -860,9 +860,9 @@ class PaxUnicodeTest(UstarUnicodeTest):
def test_error_handlers(self):
# Test if the unicode error handlers work correctly for characters
# that cannot be expressed in a given encoding.
- self._create_unicode_name(u"äöü")
+ self._create_unicode_name("äöü")
- for handler, name in (("utf-8", u"äöü".encode("utf8")),
+ for handler, name in (("utf-8", "äöü".encode("utf8")),
("replace", "???"), ("ignore", "")):
tar = tarfile.open(tmpname, format=self.format, encoding="ascii",
errors=handler)
@@ -874,11 +874,11 @@ class PaxUnicodeTest(UstarUnicodeTest):
def test_error_handler_utf8(self):
# Create a pathname that has one component representable using
# iso8859-1 and the other only in iso8859-15.
- self._create_unicode_name(u"äöü/¤")
+ self._create_unicode_name("äöü/¤")
tar = tarfile.open(tmpname, format=self.format, encoding="iso8859-1",
errors="utf-8")
- self.assertEqual(tar.getnames()[0], "äöü/" + u"¤".encode("utf8"))
+ self.assertEqual(tar.getnames()[0], "äöü/" + "¤".encode("utf8"))
class AppendTest(unittest.TestCase):
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index a43094b..f3eeb2c 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -396,25 +396,29 @@ fp_readl(char *s, int size, struct tok_state *tok)
static int
fp_setreadl(struct tok_state *tok, const char* enc)
{
- PyObject *reader, *stream, *readline;
+ PyObject *readline = NULL, *stream = NULL, *io = NULL;
+ int ok = 0;
- /* XXX: constify filename argument. */
- stream = PyFile_FromFile(tok->fp, (char*)tok->filename, "rb", NULL);
- if (stream == NULL)
- return 0;
+ io = PyImport_ImportModule("io");
+ if (io == NULL)
+ goto cleanup;
- reader = PyCodec_StreamReader(enc, stream, NULL);
- Py_DECREF(stream);
- if (reader == NULL)
- return 0;
+ stream = PyObject_CallMethod(io, "open", "ssis",
+ tok->filename, "r", -1, enc);
+ if (stream == NULL)
+ goto cleanup;
- readline = PyObject_GetAttrString(reader, "readline");
- Py_DECREF(reader);
+ readline = PyObject_GetAttrString(stream, "readline");
if (readline == NULL)
- return 0;
+ goto cleanup;
tok->decoding_readline = readline;
- return 1;
+ ok = 1;
+
+ cleanup:
+ Py_XDECREF(stream);
+ Py_XDECREF(io);
+ return ok;
}
/* Fetch the next byte from TOK. */