summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
Diffstat (limited to 'Lib')
-rw-r--r--Lib/io.py288
-rw-r--r--Lib/test/test_memoryio.py10
-rw-r--r--Lib/test/test_minidom.py3
-rw-r--r--Lib/test/test_uu.py34
-rw-r--r--Lib/xml/dom/minidom.py7
5 files changed, 320 insertions, 22 deletions
diff --git a/Lib/io.py b/Lib/io.py
index 0c993b1..f543e20 100644
--- a/Lib/io.py
+++ b/Lib/io.py
@@ -1769,20 +1769,20 @@ class TextIOWrapper(TextIOBase):
def newlines(self):
return self._decoder.newlines if self._decoder else None
-class StringIO(TextIOWrapper):
- """An in-memory stream for text. The initial_value argument sets the
- value of object. The other arguments are like those of TextIOWrapper's
- constructor.
+class _StringIO(TextIOWrapper):
+ """Text I/O implementation using an in-memory buffer.
+
+ The initial_value argument sets the value of object. The newline
+ argument is like the one of TextIOWrapper's constructor.
"""
# XXX This is really slow, but fully functional
- def __init__(self, initial_value="", encoding="utf-8",
- errors="strict", newline="\n"):
- super(StringIO, self).__init__(BytesIO(),
- encoding=encoding,
- errors=errors,
- newline=newline)
+ def __init__(self, initial_value="", newline="\n"):
+ super(_StringIO, self).__init__(BytesIO(),
+ encoding="utf-8",
+ errors="strict",
+ newline=newline)
if initial_value:
if not isinstance(initial_value, str):
initial_value = str(initial_value)
@@ -1792,3 +1792,271 @@ class StringIO(TextIOWrapper):
def getvalue(self):
self.flush()
return self.buffer.getvalue().decode(self._encoding, self._errors)
+
+try:
+ import _stringio
+
+ # This subclass is a reimplementation of the TextIOWrapper
+ # interface without any of its text decoding facilities. All the
+ # stored data is manipulated with the efficient
+ # _stringio._StringIO extension type. Also, the newline decoding
+ # mechanism of IncrementalNewlineDecoder is reimplemented here for
+ # efficiency. Doing otherwise, would require us to implement a
+ # fake decoder which would add an additional and unnecessary layer
+ # on top of the _StringIO methods.
+
+ class StringIO(_stringio._StringIO, TextIOBase):
+ """Text I/O implementation using an in-memory buffer.
+
+ The initial_value argument sets the value of object. The newline
+ argument is like the one of TextIOWrapper's constructor.
+ """
+
+ _CHUNK_SIZE = 4096
+
+ def __init__(self, initial_value="", newline="\n"):
+ if newline not in (None, "", "\n", "\r", "\r\n"):
+ raise ValueError("illegal newline value: %r" % (newline,))
+
+ self._readuniversal = not newline
+ self._readtranslate = newline is None
+ self._readnl = newline
+ self._writetranslate = newline != ""
+ self._writenl = newline or os.linesep
+ self._pending = ""
+ self._seennl = 0
+
+ # Reset the buffer first, in case __init__ is called
+ # multiple times.
+ self.truncate(0)
+ if initial_value is None:
+ initial_value = ""
+ self.write(initial_value)
+ self.seek(0)
+
+ @property
+ def buffer(self):
+ raise UnsupportedOperation("%s.buffer attribute is unsupported" %
+ self.__class__.__name__)
+
+ def _decode_newlines(self, input, final=False):
+ # decode input (with the eventual \r from a previous pass)
+ if self._pending:
+ input = self._pending + input
+
+ # retain last \r even when not translating data:
+ # then readline() is sure to get \r\n in one pass
+ if input.endswith("\r") and not final:
+ input = input[:-1]
+ self._pending = "\r"
+ else:
+ self._pending = ""
+
+ # Record which newlines are read
+ crlf = input.count('\r\n')
+ cr = input.count('\r') - crlf
+ lf = input.count('\n') - crlf
+ self._seennl |= (lf and self._LF) | (cr and self._CR) \
+ | (crlf and self._CRLF)
+
+ if self._readtranslate:
+ if crlf:
+ output = input.replace("\r\n", "\n")
+ if cr:
+ output = input.replace("\r", "\n")
+ else:
+ output = input
+
+ return output
+
+ def writable(self):
+ return True
+
+ def readable(self):
+ return True
+
+ def seekable(self):
+ return True
+
+ _read = _stringio._StringIO.read
+ _write = _stringio._StringIO.write
+ _tell = _stringio._StringIO.tell
+ _seek = _stringio._StringIO.seek
+ _truncate = _stringio._StringIO.truncate
+ _getvalue = _stringio._StringIO.getvalue
+
+ def getvalue(self) -> str:
+ """Retrieve the entire contents of the object."""
+ if self.closed:
+ raise ValueError("read on closed file")
+ return self._getvalue()
+
+ def write(self, s: str) -> int:
+ """Write string s to file.
+
+ Returns the number of characters written.
+ """
+ if self.closed:
+ raise ValueError("write to closed file")
+ if not isinstance(s, str):
+ raise TypeError("can't write %s to text stream" %
+ s.__class__.__name__)
+ length = len(s)
+ if self._writetranslate and self._writenl != "\n":
+ s = s.replace("\n", self._writenl)
+ self._pending = ""
+ self._write(s)
+ return length
+
+ def read(self, n: int = None) -> str:
+ """Read at most n characters, returned as a string.
+
+ If the argument is negative or omitted, read until EOF
+ is reached. Return an empty string at EOF.
+ """
+ if self.closed:
+ raise ValueError("read to closed file")
+ if n is None:
+ n = -1
+ res = self._pending
+ if n < 0:
+ res += self._decode_newlines(self._read(), True)
+ self._pending = ""
+ return res
+ else:
+ res = self._decode_newlines(self._read(n), True)
+ self._pending = res[n:]
+ return res[:n]
+
+ def tell(self) -> int:
+ """Tell the current file position."""
+ if self.closed:
+ raise ValueError("tell from closed file")
+ if self._pending:
+ return self._tell() - len(self._pending)
+ else:
+ return self._tell()
+
+ def seek(self, pos: int = None, whence: int = 0) -> int:
+ """Change stream position.
+
+ Seek to character offset pos relative to position indicated by whence:
+ 0 Start of stream (the default). pos should be >= 0;
+ 1 Current position - pos must be 0;
+ 2 End of stream - pos must be 0.
+ Returns the new absolute position.
+ """
+ if self.closed:
+ raise ValueError("seek from closed file")
+ self._pending = ""
+ return self._seek(pos, whence)
+
+ def truncate(self, pos: int = None) -> int:
+ """Truncate size to pos.
+
+ The pos argument defaults to the current file position, as
+ returned by tell(). Imply an absolute seek to pos.
+ Returns the new absolute position.
+ """
+ if self.closed:
+ raise ValueError("truncate from closed file")
+ self._pending = ""
+ return self._truncate(pos)
+
+ def readline(self, limit: int = None) -> str:
+ if self.closed:
+ raise ValueError("read from closed file")
+ if limit is None:
+ limit = -1
+ if limit >= 0:
+ # XXX: Hack to support limit argument, for backwards
+ # XXX compatibility
+ line = self.readline()
+ if len(line) <= limit:
+ return line
+ line, self._pending = line[:limit], line[limit:] + self._pending
+ return line
+
+ line = self._pending
+ self._pending = ""
+
+ start = 0
+ pos = endpos = None
+ while True:
+ if self._readtranslate:
+ # Newlines are already translated, only search for \n
+ pos = line.find('\n', start)
+ if pos >= 0:
+ endpos = pos + 1
+ break
+ else:
+ start = len(line)
+
+ elif self._readuniversal:
+ # Universal newline search. Find any of \r, \r\n, \n
+ # The decoder ensures that \r\n are not split in two pieces
+
+ # In C we'd look for these in parallel of course.
+ nlpos = line.find("\n", start)
+ crpos = line.find("\r", start)
+ if crpos == -1:
+ if nlpos == -1:
+ # Nothing found
+ start = len(line)
+ else:
+ # Found \n
+ endpos = nlpos + 1
+ break
+ elif nlpos == -1:
+ # Found lone \r
+ endpos = crpos + 1
+ break
+ elif nlpos < crpos:
+ # Found \n
+ endpos = nlpos + 1
+ break
+ elif nlpos == crpos + 1:
+ # Found \r\n
+ endpos = crpos + 2
+ break
+ else:
+ # Found \r
+ endpos = crpos + 1
+ break
+ else:
+ # non-universal
+ pos = line.find(self._readnl)
+ if pos >= 0:
+ endpos = pos + len(self._readnl)
+ break
+
+ # No line ending seen yet - get more data
+ more_line = self.read(self._CHUNK_SIZE)
+ if more_line:
+ line += more_line
+ else:
+ # end of file
+ return line
+
+ self._pending = line[endpos:]
+ return line[:endpos]
+
+ _LF = 1
+ _CR = 2
+ _CRLF = 4
+
+ @property
+ def newlines(self):
+ return (None,
+ "\n",
+ "\r",
+ ("\r", "\n"),
+ "\r\n",
+ ("\n", "\r\n"),
+ ("\r", "\r\n"),
+ ("\r", "\n", "\r\n")
+ )[self._seennl]
+
+
+except ImportError:
+ StringIO = _StringIO
diff --git a/Lib/test/test_memoryio.py b/Lib/test/test_memoryio.py
index 2d91cbd..d1745bc 100644
--- a/Lib/test/test_memoryio.py
+++ b/Lib/test/test_memoryio.py
@@ -10,7 +10,7 @@ import io
import sys
try:
- import _bytesio
+ import _bytesio, _stringio
has_c_implementation = True
except ImportError:
has_c_implementation = False
@@ -373,7 +373,7 @@ class PyBytesIOTest(MemoryTestMixin, unittest.TestCase):
class PyStringIOTest(MemoryTestMixin, unittest.TestCase):
buftype = str
- ioclass = io.StringIO
+ ioclass = io._StringIO
EOF = ""
def test_relative_seek(self):
@@ -404,10 +404,14 @@ if has_c_implementation:
class CBytesIOTest(PyBytesIOTest):
ioclass = io.BytesIO
+ class CStringIOTest(PyStringIOTest):
+ ioclass = io.StringIO
+
+
def test_main():
tests = [PyBytesIOTest, PyStringIOTest]
if has_c_implementation:
- tests.extend([CBytesIOTest])
+ tests.extend([CBytesIOTest, CStringIOTest])
support.run_unittest(*tests)
if __name__ == '__main__':
diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py
index ca1f836..c4c568f 100644
--- a/Lib/test/test_minidom.py
+++ b/Lib/test/test_minidom.py
@@ -3,7 +3,6 @@
import os
import sys
import pickle
-from io import StringIO
from test.support import verbose, run_unittest, TestSkipped
import unittest
@@ -80,7 +79,7 @@ class MinidomTest(unittest.TestCase):
self.confirm(t == s, "looking for %s, found %s" % (repr(s), repr(t)))
def testParseFromFile(self):
- dom = parse(StringIO(open(tstfile).read()))
+ dom = parse(open(tstfile))
dom.unlink()
self.confirm(isinstance(dom, Document))
diff --git a/Lib/test/test_uu.py b/Lib/test/test_uu.py
index 02d0171..d2b6e73 100644
--- a/Lib/test/test_uu.py
+++ b/Lib/test/test_uu.py
@@ -17,6 +17,32 @@ encodedtext = b"""\
M5&AE('-M;V]T:\"US8V%L960@<'ET:&]N(&-R97!T(&]V97(@=&AE('-L965P
(:6YG(&1O9PH """
+# Stolen from io.py
+class FakeIO(io.TextIOWrapper):
+ """Text I/O implementation using an in-memory buffer.
+
+ Can be a used as a drop-in replacement for sys.stdin and sys.stdout.
+ """
+
+ # XXX This is really slow, but fully functional
+
+ def __init__(self, initial_value="", encoding="utf-8",
+ errors="strict", newline="\n"):
+ super(FakeIO, self).__init__(io.BytesIO(),
+ encoding=encoding,
+ errors=errors,
+ newline=newline)
+ if initial_value:
+ if not isinstance(initial_value, str):
+ initial_value = str(initial_value)
+ self.write(initial_value)
+ self.seek(0)
+
+ def getvalue(self):
+ self.flush()
+ return self.buffer.getvalue().decode(self._encoding, self._errors)
+
+
def encodedtextwrapped(mode, filename):
return (bytes("begin %03o %s\n" % (mode, filename), "ascii") +
encodedtext + b"\n \nend\n")
@@ -76,15 +102,15 @@ class UUStdIOTest(unittest.TestCase):
sys.stdout = self.stdout
def test_encode(self):
- sys.stdin = io.StringIO(plaintext.decode("ascii"))
- sys.stdout = io.StringIO()
+ sys.stdin = FakeIO(plaintext.decode("ascii"))
+ sys.stdout = FakeIO()
uu.encode("-", "-", "t1", 0o666)
self.assertEqual(sys.stdout.getvalue(),
encodedtextwrapped(0o666, "t1").decode("ascii"))
def test_decode(self):
- sys.stdin = io.StringIO(encodedtextwrapped(0o666, "t1").decode("ascii"))
- sys.stdout = io.StringIO()
+ sys.stdin = FakeIO(encodedtextwrapped(0o666, "t1").decode("ascii"))
+ sys.stdout = FakeIO()
uu.decode("-", "-")
stdout = sys.stdout
sys.stdout = self.stdout
diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py
index f229369..3025ed7 100644
--- a/Lib/xml/dom/minidom.py
+++ b/Lib/xml/dom/minidom.py
@@ -14,6 +14,7 @@ Todo:
* SAX 2 namespaces
"""
+import codecs
import io
import xml.dom
@@ -49,16 +50,16 @@ class Node(xml.dom.Node):
# indent = the indentation string to prepend, per level
# newl = the newline string to append
use_encoding = "utf-8" if encoding is None else encoding
- writer = io.StringIO(encoding=use_encoding)
+ writer = codecs.getwriter(use_encoding)(io.BytesIO())
if self.nodeType == Node.DOCUMENT_NODE:
# Can pass encoding only to document, to put it into XML header
self.writexml(writer, "", indent, newl, encoding)
else:
self.writexml(writer, "", indent, newl)
if encoding is None:
- return writer.getvalue()
+ return writer.stream.getvalue().decode(use_encoding)
else:
- return writer.buffer.getvalue()
+ return writer.stream.getvalue()
def hasChildNodes(self):
if self.childNodes: