summaryrefslogtreecommitdiffstats
path: root/Modules/_stringio.c
diff options
context:
space:
mode:
authorBenjamin Peterson <benjamin@python.org>2009-03-04 00:14:51 (GMT)
committerBenjamin Peterson <benjamin@python.org>2009-03-04 00:14:51 (GMT)
commit4fa88fa0ba35e25ad9be66ebbdaba9aca553dc8b (patch)
tree8486abb218ed5bb1f7713416fb64104f7e2537b0 /Modules/_stringio.c
parent03ad812435f51261ba78bc5920d254ea0e123b37 (diff)
downloadcpython-4fa88fa0ba35e25ad9be66ebbdaba9aca553dc8b.zip
cpython-4fa88fa0ba35e25ad9be66ebbdaba9aca553dc8b.tar.gz
cpython-4fa88fa0ba35e25ad9be66ebbdaba9aca553dc8b.tar.bz2
merge the io-c branch: C implementation of the io module
The main io module now uses the C implementation. The Python one still exists in Lib/_pyio.py for ease of testing new features and usefulness to other implementers. The rewrite was done by Antoine Pitrou and Amaury Forgeot d'Arc. I was slightly helpful at the end. :) Following are the log messages from the io-c branch: Merged revisions 68683-68685,68687-68689,68693,68704,68741-68743,68745,68747,68752-68754,68756,68758,68812,68816-68817,68820-68822,68824-68825,68828,68876-68877,69037,69044,69104,69115,69194,69626-69629,69636,69638,69641-69642,69644-69654,69656-69661,69671,69677,69812-69815,69817,69827-69830,69839,69841-69845,69848,69850,69852,69854,69860,69865-69866,69868,69872-69873,69885,69888,69891-69893,69911,69913-69916,69963,70033,70035,70038,70041-70048,70067-70070,70075,70112,70133,70135,70140 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/io-c ........ r68683 | antoine.pitrou | 2009-01-17 17:13:48 -0600 (Sat, 17 Jan 2009) | 3 lines Merge in changes from the io-c sandbox. Tests will follow in separate commits. ........ r68684 | antoine.pitrou | 2009-01-17 17:17:26 -0600 (Sat, 17 Jan 2009) | 3 lines Fixes and additions to test_io.py ........ r68685 | antoine.pitrou | 2009-01-17 17:22:04 -0600 (Sat, 17 Jan 2009) | 1 line Fix test_fileio ........ r68687 | antoine.pitrou | 2009-01-17 17:35:11 -0600 (Sat, 17 Jan 2009) | 3 lines Add dependency to _iomodule.h for the various C sources ........ r68688 | antoine.pitrou | 2009-01-17 17:38:18 -0600 (Sat, 17 Jan 2009) | 3 lines These precautions are not needed anymore! ........ r68689 | antoine.pitrou | 2009-01-17 17:41:48 -0600 (Sat, 17 Jan 2009) | 3 lines Fix another test ........ r68693 | antoine.pitrou | 2009-01-17 17:49:58 -0600 (Sat, 17 Jan 2009) | 3 lines Fix test_uu (which was using private attributes of TextIOWrapper) ........ r68704 | antoine.pitrou | 2009-01-17 18:45:29 -0600 (Sat, 17 Jan 2009) | 3 lines Most io sources are Py_ssize_t-clean (I don't know about bytesio and stringio) ........ r68741 | antoine.pitrou | 2009-01-18 15:20:30 -0600 (Sun, 18 Jan 2009) | 3 lines Check return type in TextIOWrapper.__next__ ........ r68742 | antoine.pitrou | 2009-01-18 15:28:48 -0600 (Sun, 18 Jan 2009) | 4 lines Make binary buffered readline and iteration much faster (8x as fast as the IOBase generic implementation) ........ r68743 | antoine.pitrou | 2009-01-18 15:47:47 -0600 (Sun, 18 Jan 2009) | 3 lines Reinsert test_io_after_close (was removed by mistake) ........ r68745 | antoine.pitrou | 2009-01-18 16:16:06 -0600 (Sun, 18 Jan 2009) | 3 lines Add read, read1 and write methods to BufferedIOBase ........ r68747 | antoine.pitrou | 2009-01-18 16:35:58 -0600 (Sun, 18 Jan 2009) | 3 lines Kill test failure ........ r68752 | amaury.forgeotdarc | 2009-01-18 17:05:43 -0600 (Sun, 18 Jan 2009) | 3 lines Fix a segfault when e.g a BufferedReader is created with a FileIO in read mode. ........ r68753 | antoine.pitrou | 2009-01-18 17:13:09 -0600 (Sun, 18 Jan 2009) | 3 lines Add truncate() to text IO objects ........ r68754 | antoine.pitrou | 2009-01-18 17:51:08 -0600 (Sun, 18 Jan 2009) | 3 lines Remove IOBase.__del__ and replace it with custom code with tp_dealloc ........ r68756 | antoine.pitrou | 2009-01-18 18:10:16 -0600 (Sun, 18 Jan 2009) | 3 lines Remove irrelevant comment. ........ r68758 | antoine.pitrou | 2009-01-18 18:36:16 -0600 (Sun, 18 Jan 2009) | 3 lines in importlib:_fileio._FileIO -> _io.FileIO ........ r68812 | antoine.pitrou | 2009-01-20 14:15:51 -0600 (Tue, 20 Jan 2009) | 3 lines Add garbage collection support to FileIO objects ........ r68816 | antoine.pitrou | 2009-01-20 14:56:28 -0600 (Tue, 20 Jan 2009) | 3 lines Add GC support to Buffered and Text IO objects ........ r68817 | antoine.pitrou | 2009-01-20 15:19:45 -0600 (Tue, 20 Jan 2009) | 3 lines Add some file headers ........ r68820 | antoine.pitrou | 2009-01-20 15:29:59 -0600 (Tue, 20 Jan 2009) | 3 lines Add class TextIOBase ........ r68821 | antoine.pitrou | 2009-01-20 15:36:16 -0600 (Tue, 20 Jan 2009) | 3 lines Add properties to TextIOBase ........ r68822 | antoine.pitrou | 2009-01-20 15:41:19 -0600 (Tue, 20 Jan 2009) | 3 lines Disable the pure Python TextIOBase class, and inject C the implementation instead ........ r68824 | antoine.pitrou | 2009-01-20 16:36:28 -0600 (Tue, 20 Jan 2009) | 3 lines Fix two leaks ........ r68825 | antoine.pitrou | 2009-01-20 16:38:29 -0600 (Tue, 20 Jan 2009) | 3 lines FileIO.name is just a plain attribute, we can set it directly ........ r68828 | antoine.pitrou | 2009-01-20 17:06:33 -0600 (Tue, 20 Jan 2009) | 3 lines Speed up closed checks on text IO objects. Good for a 25% speedup on small ops. ........ r68876 | antoine.pitrou | 2009-01-23 17:01:25 -0600 (Fri, 23 Jan 2009) | 3 lines Two typos ........ r68877 | antoine.pitrou | 2009-01-23 18:13:20 -0600 (Fri, 23 Jan 2009) | 3 lines Remove two unused functions ........ r69037 | amaury.forgeotdarc | 2009-01-27 17:10:25 -0600 (Tue, 27 Jan 2009) | 2 lines Update the win32 project files ........ r69044 | antoine.pitrou | 2009-01-27 18:51:07 -0600 (Tue, 27 Jan 2009) | 3 lines Improve heuristic in IncrementalNewlineDecoder + some micro-optimizations ........ r69104 | antoine.pitrou | 2009-01-29 15:23:42 -0600 (Thu, 29 Jan 2009) | 3 lines Fix some crashers found by Victor ........ r69115 | hirokazu.yamamoto | 2009-01-29 20:36:28 -0600 (Thu, 29 Jan 2009) | 1 line Updated VC6 project file. ........ r69194 | antoine.pitrou | 2009-02-01 16:57:18 -0600 (Sun, 01 Feb 2009) | 3 lines Fix downcasting warnings in 32-bit mode with 64-bit offsets (Windows) ........ r69626 | benjamin.peterson | 2009-02-14 17:33:34 -0600 (Sat, 14 Feb 2009) | 1 line only catch AttributeError and UnsupportedOperation ........ r69627 | benjamin.peterson | 2009-02-14 21:35:28 -0600 (Sat, 14 Feb 2009) | 1 line give the IO module its own state and store the os and locale modules in it ........ r69628 | benjamin.peterson | 2009-02-14 22:08:32 -0600 (Sat, 14 Feb 2009) | 1 line put interned strings in the module state structure ........ r69629 | benjamin.peterson | 2009-02-14 22:15:29 -0600 (Sat, 14 Feb 2009) | 1 line put UnsupportedOperation in the module state ........ r69636 | benjamin.peterson | 2009-02-15 08:31:42 -0600 (Sun, 15 Feb 2009) | 1 line dealloc unsupported_operation ........ r69638 | benjamin.peterson | 2009-02-15 09:24:45 -0600 (Sun, 15 Feb 2009) | 1 line actually test the C implementation ........ r69641 | benjamin.peterson | 2009-02-15 10:12:37 -0600 (Sun, 15 Feb 2009) | 5 lines make interned strings globals again ;( putting them in the module state was asking for trouble when the module was dealloced before the classes in it were ........ r69642 | benjamin.peterson | 2009-02-15 10:19:45 -0600 (Sun, 15 Feb 2009) | 1 line actually test the python implementations ........ r69644 | antoine.pitrou | 2009-02-15 11:59:30 -0600 (Sun, 15 Feb 2009) | 3 lines Fix memory leak in destructor when a Python class inherits from IOBase (or an IOBase-derived type) ........ r69645 | antoine.pitrou | 2009-02-15 12:23:26 -0600 (Sun, 15 Feb 2009) | 3 lines Add a warning about the embarassing state of IOBase finalization ........ r69646 | antoine.pitrou | 2009-02-15 13:14:42 -0600 (Sun, 15 Feb 2009) | 3 lines Fix opening of 8-bit filenames with FileIO ........ r69647 | antoine.pitrou | 2009-02-15 13:20:22 -0600 (Sun, 15 Feb 2009) | 3 lines Fix leak in FileIO constructor ........ r69648 | antoine.pitrou | 2009-02-15 13:58:16 -0600 (Sun, 15 Feb 2009) | 3 lines Fix some refleaks ........ r69649 | antoine.pitrou | 2009-02-15 14:05:13 -0600 (Sun, 15 Feb 2009) | 3 lines Fix a leak in IOBase.writelines ........ r69650 | antoine.pitrou | 2009-02-15 14:11:56 -0600 (Sun, 15 Feb 2009) | 3 lines Fix memory leak in BufferedWriter.truncate ........ r69651 | antoine.pitrou | 2009-02-15 14:25:34 -0600 (Sun, 15 Feb 2009) | 3 lines Fix a leak in TextIOWrapper.seek ........ r69652 | antoine.pitrou | 2009-02-15 14:26:28 -0600 (Sun, 15 Feb 2009) | 3 lines Unify implementations of truncate for buffered objects ........ r69653 | antoine.pitrou | 2009-02-15 15:15:15 -0600 (Sun, 15 Feb 2009) | 3 lines Fix more leaks in TextIOWrapper ........ r69654 | antoine.pitrou | 2009-02-15 15:21:57 -0600 (Sun, 15 Feb 2009) | 3 lines Smaller chunk size for a faster test ........ r69656 | benjamin.peterson | 2009-02-15 17:29:48 -0600 (Sun, 15 Feb 2009) | 1 line braces make this much clearer ........ r69657 | benjamin.peterson | 2009-02-15 17:46:07 -0600 (Sun, 15 Feb 2009) | 1 line use the correct macro ........ r69658 | antoine.pitrou | 2009-02-15 19:38:59 -0600 (Sun, 15 Feb 2009) | 5 lines Fix crash in test_urllib2_localnet in debug mode. It was due to an HTTPResponse object being revived when calling its close() method in IOBase's tp_dealloc. _PyIOBase_finalize() starts looking scary... ........ r69659 | benjamin.peterson | 2009-02-15 20:55:48 -0600 (Sun, 15 Feb 2009) | 1 line fix segfault on initialization failing ........ r69660 | benjamin.peterson | 2009-02-15 21:09:31 -0600 (Sun, 15 Feb 2009) | 1 line apparently locale.getprefferedencoding() can raise a ImportError, too ........ r69661 | benjamin.peterson | 2009-02-15 21:54:15 -0600 (Sun, 15 Feb 2009) | 1 line it's amazing this worked at all; I was using the wrong structs! ........ r69671 | benjamin.peterson | 2009-02-16 08:38:27 -0600 (Mon, 16 Feb 2009) | 1 line add garbage collection support to bytesio ........ r69677 | benjamin.peterson | 2009-02-16 10:31:03 -0600 (Mon, 16 Feb 2009) | 5 lines reduce ImportError catching code duplication I'm not sure this makes the code clearer with its new gotos, but at least I added a big fat comment ........ r69812 | antoine.pitrou | 2009-02-20 13:50:16 -0600 (Fri, 20 Feb 2009) | 3 lines _StringIO now belongs to the _io modules, rather to its own _stringio module ........ r69813 | antoine.pitrou | 2009-02-20 13:58:22 -0600 (Fri, 20 Feb 2009) | 3 lines Add a test for StringIO properties ........ r69814 | antoine.pitrou | 2009-02-20 14:06:03 -0600 (Fri, 20 Feb 2009) | 3 lines Reimplement a few trivial StringIO functions and properties in C ........ r69815 | antoine.pitrou | 2009-02-20 14:13:11 -0600 (Fri, 20 Feb 2009) | 3 lines Add the line_buffering property to TextIOWrapper, and test for it ........ r69817 | antoine.pitrou | 2009-02-20 14:45:50 -0600 (Fri, 20 Feb 2009) | 4 lines Allow IncrementalNewlineDecoder to take unicode objects as decoding input if the decoder parameter is None This will help rewriting StringIO to C ........ r69827 | antoine.pitrou | 2009-02-20 19:00:30 -0600 (Fri, 20 Feb 2009) | 3 lines Rewrite most of StringIO in C. Some almost empty stubs remain to be converted. ........ r69828 | antoine.pitrou | 2009-02-20 19:09:25 -0600 (Fri, 20 Feb 2009) | 3 lines Plug a leak, and remove an unused string ........ r69829 | benjamin.peterson | 2009-02-20 20:02:28 -0600 (Fri, 20 Feb 2009) | 1 line this assertions makes more sense here ........ r69830 | benjamin.peterson | 2009-02-20 20:03:04 -0600 (Fri, 20 Feb 2009) | 1 line PyModule_AddObject can fail; simplify this code with a macro ........ r69839 | antoine.pitrou | 2009-02-21 12:54:01 -0600 (Sat, 21 Feb 2009) | 3 lines StringIO is now written entirely in C (and blazingly fast) ........ r69841 | benjamin.peterson | 2009-02-21 14:05:40 -0600 (Sat, 21 Feb 2009) | 1 line split the Python implementation of io into another module and rewrite the tests to test both implementations ........ r69842 | benjamin.peterson | 2009-02-21 14:10:00 -0600 (Sat, 21 Feb 2009) | 1 line closed is not a function ........ r69843 | benjamin.peterson | 2009-02-21 14:13:04 -0600 (Sat, 21 Feb 2009) | 1 line fix __all__ test ........ r69844 | benjamin.peterson | 2009-02-21 14:21:24 -0600 (Sat, 21 Feb 2009) | 1 line fix the rest of the Misc tests ........ r69845 | benjamin.peterson | 2009-02-21 14:26:59 -0600 (Sat, 21 Feb 2009) | 1 line RawIOBase is better for FileIO ........ r69848 | benjamin.peterson | 2009-02-21 15:33:53 -0600 (Sat, 21 Feb 2009) | 1 line fix some more tests broken by bag argument validation ........ r69850 | benjamin.peterson | 2009-02-21 16:16:42 -0600 (Sat, 21 Feb 2009) | 1 line make the python IncrementalNewineDecoder support a None decoder ........ r69852 | benjamin.peterson | 2009-02-21 16:36:09 -0600 (Sat, 21 Feb 2009) | 1 line fix a BlockingIOError.characters_written bug ........ r69854 | benjamin.peterson | 2009-02-21 16:49:02 -0600 (Sat, 21 Feb 2009) | 1 line check whence ........ r69860 | benjamin.peterson | 2009-02-21 17:42:50 -0600 (Sat, 21 Feb 2009) | 1 line fix some of these Misbehaving io tests ........ r69865 | benjamin.peterson | 2009-02-21 18:59:52 -0600 (Sat, 21 Feb 2009) | 1 line don't use super here() ........ r69866 | benjamin.peterson | 2009-02-21 19:05:28 -0600 (Sat, 21 Feb 2009) | 1 line use implementation specific classes ........ r69868 | benjamin.peterson | 2009-02-21 22:12:05 -0600 (Sat, 21 Feb 2009) | 1 line use a more DRY friendly approach to injecting module contents into test classes ........ r69872 | antoine.pitrou | 2009-02-22 13:39:45 -0600 (Sun, 22 Feb 2009) | 3 lines Sanitize destructor behaviour of IOBase. Now Python-defined attributes can be accessed from close(). ........ r69873 | antoine.pitrou | 2009-02-22 13:50:14 -0600 (Sun, 22 Feb 2009) | 4 lines Only set the internal fd after it has been checked to be valid (otherwise, the destructor will attempt to close it) ........ r69885 | benjamin.peterson | 2009-02-22 15:30:14 -0600 (Sun, 22 Feb 2009) | 1 line convert some other tests to use both io implementations ........ r69888 | antoine.pitrou | 2009-02-22 17:03:16 -0600 (Sun, 22 Feb 2009) | 3 lines Silence all exceptions when finalizing ........ r69891 | benjamin.peterson | 2009-02-22 17:27:24 -0600 (Sun, 22 Feb 2009) | 1 line convert another test to test both io implementations ........ r69892 | benjamin.peterson | 2009-02-22 17:32:15 -0600 (Sun, 22 Feb 2009) | 1 line help poor people like me to find their io tests (did I miss any?) ........ r69893 | benjamin.peterson | 2009-02-22 17:37:56 -0600 (Sun, 22 Feb 2009) | 1 line put a big note in the test telling people to write tests for both implementations now ........ r69911 | antoine.pitrou | 2009-02-23 13:57:18 -0600 (Mon, 23 Feb 2009) | 3 lines expose DEFAULT_BUFFER_SIZE again (fixes a bunch of test failures) ........ r69913 | antoine.pitrou | 2009-02-23 14:10:30 -0600 (Mon, 23 Feb 2009) | 4 lines Do the cyclic garbage collection tests only on the C version. The Python version is helpless as it uses __del__. ........ r69914 | antoine.pitrou | 2009-02-23 14:21:41 -0600 (Mon, 23 Feb 2009) | 3 lines Adapt test_largefile to test both implementations ........ r69915 | antoine.pitrou | 2009-02-23 14:25:14 -0600 (Mon, 23 Feb 2009) | 3 lines One small failure ........ r69916 | antoine.pitrou | 2009-02-23 14:28:33 -0600 (Mon, 23 Feb 2009) | 3 lines Add a comment, at BP's request ........ r69963 | antoine.pitrou | 2009-02-25 09:42:59 -0600 (Wed, 25 Feb 2009) | 3 lines Add a test of ABC inheritance ........ r70033 | antoine.pitrou | 2009-02-27 15:49:50 -0600 (Fri, 27 Feb 2009) | 3 lines The base classes now are ABCs. ........ r70035 | benjamin.peterson | 2009-02-27 15:57:41 -0600 (Fri, 27 Feb 2009) | 1 line good house keeping ........ r70038 | antoine.pitrou | 2009-02-27 17:05:23 -0600 (Fri, 27 Feb 2009) | 4 lines Make the buffer allocation overflow tests specific to the C implementation, since the Python implementation resizes its buffers when needed rather than allocating them up front. ........ r70041 | benjamin.peterson | 2009-02-27 18:26:12 -0600 (Fri, 27 Feb 2009) | 1 line kill java naming for sanity ........ r70042 | benjamin.peterson | 2009-02-27 18:28:53 -0600 (Fri, 27 Feb 2009) | 2 lines timingTest is superseded by iobench ........ r70043 | antoine.pitrou | 2009-02-27 19:13:50 -0600 (Fri, 27 Feb 2009) | 3 lines Remove the last traces of java naming in test_io ........ r70044 | antoine.pitrou | 2009-02-27 19:18:34 -0600 (Fri, 27 Feb 2009) | 3 lines Better resource cleanup ........ r70045 | antoine.pitrou | 2009-02-27 19:29:00 -0600 (Fri, 27 Feb 2009) | 3 lines Remove dubious uses of super(), and fix one test ........ r70046 | antoine.pitrou | 2009-02-27 19:31:00 -0600 (Fri, 27 Feb 2009) | 3 lines Bump up CHUNK_SIZE (no need to make the Python version look slower than it is) ........ r70047 | benjamin.peterson | 2009-02-27 20:03:26 -0600 (Fri, 27 Feb 2009) | 1 line fix typo ........ r70048 | benjamin.peterson | 2009-02-27 21:35:11 -0600 (Fri, 27 Feb 2009) | 1 line move code to a better place ........ r70067 | benjamin.peterson | 2009-02-28 10:43:20 -0600 (Sat, 28 Feb 2009) | 4 lines 1. make sure to undo buffered read aheads in BufferedRandom.seek() 2. refill the buffer if have <= 0 3. fix the last failing test_io test! ........ r70068 | benjamin.peterson | 2009-02-28 10:57:50 -0600 (Sat, 28 Feb 2009) | 1 line define read1() on the python implementation's BufferedIOBase ........ r70069 | benjamin.peterson | 2009-02-28 11:01:17 -0600 (Sat, 28 Feb 2009) | 1 line document read1() in BufferedIOBase ........ r70070 | benjamin.peterson | 2009-02-28 11:06:42 -0600 (Sat, 28 Feb 2009) | 1 line give credit where credit is due ........ r70075 | antoine.pitrou | 2009-02-28 13:34:59 -0600 (Sat, 28 Feb 2009) | 3 lines Amaury's name ........ r70112 | antoine.pitrou | 2009-03-02 17:11:55 -0600 (Mon, 02 Mar 2009) | 4 lines Looks like this is necessary in order to build cleanly under Windows (someone correct this if it's wrong, I'm no Windows user) ........ r70133 | benjamin.peterson | 2009-03-03 15:23:32 -0600 (Tue, 03 Mar 2009) | 1 line fix test_newline_property on _pyio.StringIO ........ r70135 | benjamin.peterson | 2009-03-03 15:47:30 -0600 (Tue, 03 Mar 2009) | 1 line fix typos and inconsistencies. thanks to Daniel Diniz ........ r70140 | benjamin.peterson | 2009-03-03 16:21:10 -0600 (Tue, 03 Mar 2009) | 1 line add the test from #5266 ........
Diffstat (limited to 'Modules/_stringio.c')
-rw-r--r--Modules/_stringio.c538
1 files changed, 462 insertions, 76 deletions
diff --git a/Modules/_stringio.c b/Modules/_stringio.c
index e627258..a88fcb7 100644
--- a/Modules/_stringio.c
+++ b/Modules/_stringio.c
@@ -1,8 +1,11 @@
+#define PY_SSIZE_T_CLEAN
#include "Python.h"
+#include "structmember.h"
+#include "_iomodule.h"
-/* This module is a stripped down version of _bytesio.c with a Py_UNICODE
- buffer. Most of the functionality is provided by subclassing _StringIO. */
-
+/* Implementation note: the buffer is always at least one character longer
+ than the enclosed string, for proper functioning of _PyIO_find_line_ending.
+*/
typedef struct {
PyObject_HEAD
@@ -10,8 +13,39 @@ typedef struct {
Py_ssize_t pos;
Py_ssize_t string_size;
size_t buf_size;
+
+ char ok; /* initialized? */
+ char closed;
+ char readuniversal;
+ char readtranslate;
+ PyObject *decoder;
+ PyObject *readnl;
+ PyObject *writenl;
+
+ PyObject *dict;
+ PyObject *weakreflist;
} StringIOObject;
+#define CHECK_INITIALIZED(self) \
+ if (self->ok <= 0) { \
+ PyErr_SetString(PyExc_ValueError, \
+ "I/O operation on uninitialized object"); \
+ return NULL; \
+ }
+
+#define CHECK_CLOSED(self) \
+ if (self->closed) { \
+ PyErr_SetString(PyExc_ValueError, \
+ "I/O operation on closed file"); \
+ return NULL; \
+ }
+
+PyDoc_STRVAR(stringio_doc,
+ "Text I/O implementation using an in-memory buffer.\n"
+ "\n"
+ "The initial_value argument sets the value of object. The newline\n"
+ "argument is like the one of TextIOWrapper's constructor.");
+
/* Internal routine for changing the size, in terms of characters, of the
buffer of StringIO objects. The caller should ensure that the 'size'
@@ -26,6 +60,8 @@ resize_buffer(StringIOObject *self, size_t size)
assert(self->buf != NULL);
+ /* Reserve one more char for line ending detection. */
+ size = size + 1;
/* For simplicity, stay in the range of the signed type. Anyway, Python
doesn't allow strings to be longer than this. */
if (size > PY_SSIZE_T_MAX)
@@ -67,13 +103,38 @@ resize_buffer(StringIOObject *self, size_t size)
return -1;
}
-/* Internal routine for writing a string of characters to the buffer of a
- StringIO object. Returns the number of bytes wrote, or -1 on error. */
+/* Internal routine for writing a whole PyUnicode object to the buffer of a
+ StringIO object. Returns 0 on success, or -1 on error. */
static Py_ssize_t
-write_str(StringIOObject *self, const Py_UNICODE *str, Py_ssize_t len)
+write_str(StringIOObject *self, PyObject *obj)
{
+ Py_UNICODE *str;
+ Py_ssize_t len;
+ PyObject *decoded = NULL;
assert(self->buf != NULL);
assert(self->pos >= 0);
+
+ if (self->decoder != NULL) {
+ decoded = _PyIncrementalNewlineDecoder_decode(
+ self->decoder, obj, 1 /* always final */);
+ }
+ else {
+ decoded = obj;
+ Py_INCREF(decoded);
+ }
+ if (self->writenl) {
+ PyObject *translated = PyUnicode_Replace(
+ decoded, _PyIO_str_nl, self->writenl, -1);
+ Py_DECREF(decoded);
+ decoded = translated;
+ }
+ if (decoded == NULL)
+ return -1;
+
+ assert(PyUnicode_Check(decoded));
+ str = PyUnicode_AS_UNICODE(decoded);
+ len = PyUnicode_GET_SIZE(decoded);
+
assert(len >= 0);
/* This overflow check is not strictly necessary. However, it avoids us to
@@ -82,11 +143,11 @@ write_str(StringIOObject *self, const Py_UNICODE *str, Py_ssize_t len)
if (self->pos > PY_SSIZE_T_MAX - len) {
PyErr_SetString(PyExc_OverflowError,
"new position too large");
- return -1;
+ goto fail;
}
if (self->pos + len > self->string_size) {
if (resize_buffer(self, self->pos + len) < 0)
- return -1;
+ goto fail;
}
if (self->pos > self->string_size) {
@@ -108,26 +169,47 @@ write_str(StringIOObject *self, const Py_UNICODE *str, Py_ssize_t len)
memcpy(self->buf + self->pos, str, len * sizeof(Py_UNICODE));
self->pos += len;
- /* Set the new length of the internal string if it has changed */
+ /* Set the new length of the internal string if it has changed. */
if (self->string_size < self->pos) {
self->string_size = self->pos;
}
- return len;
+ Py_DECREF(decoded);
+ return 0;
+
+fail:
+ Py_XDECREF(decoded);
+ return -1;
}
+PyDoc_STRVAR(stringio_getvalue_doc,
+ "Retrieve the entire contents of the object.");
+
static PyObject *
stringio_getvalue(StringIOObject *self)
{
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
return PyUnicode_FromUnicode(self->buf, self->string_size);
}
+PyDoc_STRVAR(stringio_tell_doc,
+ "Tell the current file position.");
+
static PyObject *
stringio_tell(StringIOObject *self)
{
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
return PyLong_FromSsize_t(self->pos);
}
+PyDoc_STRVAR(stringio_read_doc,
+ "Read at most n characters, returned as a string.\n"
+ "\n"
+ "If the argument is negative or omitted, read until EOF\n"
+ "is reached. Return an empty string at EOF.\n");
+
static PyObject *
stringio_read(StringIOObject *self, PyObject *args)
{
@@ -135,11 +217,13 @@ stringio_read(StringIOObject *self, PyObject *args)
Py_UNICODE *output;
PyObject *arg = Py_None;
+ CHECK_INITIALIZED(self);
if (!PyArg_ParseTuple(args, "|O:read", &arg))
return NULL;
+ CHECK_CLOSED(self);
- if (PyLong_Check(arg)) {
- size = PyLong_AsSsize_t(arg);
+ if (PyNumber_Check(arg)) {
+ size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
if (size == -1 && PyErr_Occurred())
return NULL;
}
@@ -161,24 +245,127 @@ stringio_read(StringIOObject *self, PyObject *args)
size = 0;
}
- assert(self->buf != NULL);
output = self->buf + self->pos;
self->pos += size;
-
return PyUnicode_FromUnicode(output, size);
}
+/* Internal helper, used by stringio_readline and stringio_iternext */
+static PyObject *
+_stringio_readline(StringIOObject *self, Py_ssize_t limit)
+{
+ Py_UNICODE *start, *end, old_char;
+ Py_ssize_t len, consumed;
+
+ /* In case of overseek, return the empty string */
+ if (self->pos >= self->string_size)
+ return PyUnicode_FromString("");
+
+ start = self->buf + self->pos;
+ if (limit < 0 || limit > self->string_size - self->pos)
+ limit = self->string_size - self->pos;
+
+ end = start + limit;
+ old_char = *end;
+ *end = '\0';
+ len = _PyIO_find_line_ending(
+ self->readtranslate, self->readuniversal, self->readnl,
+ start, end, &consumed);
+ *end = old_char;
+ /* If we haven't found any line ending, we just return everything
+ (`consumed` is ignored). */
+ if (len < 0)
+ len = limit;
+ self->pos += len;
+ return PyUnicode_FromUnicode(start, len);
+}
+
+PyDoc_STRVAR(stringio_readline_doc,
+ "Read until newline or EOF.\n"
+ "\n"
+ "Returns an empty string if EOF is hit immediately.\n");
+
+static PyObject *
+stringio_readline(StringIOObject *self, PyObject *args)
+{
+ PyObject *arg = Py_None;
+ Py_ssize_t limit = -1;
+
+ CHECK_INITIALIZED(self);
+ if (!PyArg_ParseTuple(args, "|O:readline", &arg))
+ return NULL;
+ CHECK_CLOSED(self);
+
+ if (PyNumber_Check(arg)) {
+ limit = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
+ if (limit == -1 && PyErr_Occurred())
+ return NULL;
+ }
+ else if (arg != Py_None) {
+ PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
+ Py_TYPE(arg)->tp_name);
+ return NULL;
+ }
+ return _stringio_readline(self, limit);
+}
+
+static PyObject *
+stringio_iternext(StringIOObject *self)
+{
+ PyObject *line;
+
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
+
+ if (Py_TYPE(self) == &PyStringIO_Type) {
+ /* Skip method call overhead for speed */
+ line = _stringio_readline(self, -1);
+ }
+ else {
+ /* XXX is subclassing StringIO really supported? */
+ line = PyObject_CallMethodObjArgs((PyObject *)self,
+ _PyIO_str_readline, NULL);
+ if (line && !PyUnicode_Check(line)) {
+ PyErr_Format(PyExc_IOError,
+ "readline() should have returned an str object, "
+ "not '%.200s'", Py_TYPE(line)->tp_name);
+ Py_DECREF(line);
+ return NULL;
+ }
+ }
+
+ if (line == NULL)
+ return NULL;
+
+ if (PyUnicode_GET_SIZE(line) == 0) {
+ /* Reached EOF */
+ Py_DECREF(line);
+ return NULL;
+ }
+
+ return line;
+}
+
+PyDoc_STRVAR(stringio_truncate_doc,
+ "Truncate size to pos.\n"
+ "\n"
+ "The pos argument defaults to the current file position, as\n"
+ "returned by tell(). Imply an absolute seek to pos.\n"
+ "Returns the new absolute position.\n");
+
static PyObject *
stringio_truncate(StringIOObject *self, PyObject *args)
{
Py_ssize_t size;
PyObject *arg = Py_None;
+ CHECK_INITIALIZED(self);
if (!PyArg_ParseTuple(args, "|O:truncate", &arg))
return NULL;
+ CHECK_CLOSED(self);
- if (PyLong_Check(arg)) {
- size = PyLong_AsSsize_t(arg);
+ if (PyNumber_Check(arg)) {
+ size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
if (size == -1 && PyErr_Occurred())
return NULL;
}
@@ -199,23 +386,34 @@ stringio_truncate(StringIOObject *self, PyObject *args)
}
if (size < self->string_size) {
- self->string_size = size;
if (resize_buffer(self, size) < 0)
return NULL;
+ self->string_size = size;
}
self->pos = size;
return PyLong_FromSsize_t(size);
}
+PyDoc_STRVAR(stringio_seek_doc,
+ "Change stream position.\n"
+ "\n"
+ "Seek to character offset pos relative to position indicated by whence:\n"
+ " 0 Start of stream (the default). pos should be >= 0;\n"
+ " 1 Current position - pos must be 0;\n"
+ " 2 End of stream - pos must be 0.\n"
+ "Returns the new absolute position.\n");
+
static PyObject *
stringio_seek(StringIOObject *self, PyObject *args)
{
Py_ssize_t pos;
int mode = 0;
+ CHECK_INITIALIZED(self);
if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &mode))
return NULL;
+ CHECK_CLOSED(self);
if (mode != 0 && mode != 1 && mode != 2) {
PyErr_Format(PyExc_ValueError,
@@ -248,36 +446,76 @@ stringio_seek(StringIOObject *self, PyObject *args)
return PyLong_FromSsize_t(self->pos);
}
+PyDoc_STRVAR(stringio_write_doc,
+ "Write string to file.\n"
+ "\n"
+ "Returns the number of characters written, which is always equal to\n"
+ "the length of the string.\n");
+
static PyObject *
stringio_write(StringIOObject *self, PyObject *obj)
{
- const Py_UNICODE *str;
Py_ssize_t size;
- Py_ssize_t n = 0;
- if (PyUnicode_Check(obj)) {
- str = PyUnicode_AsUnicode(obj);
- size = PyUnicode_GetSize(obj);
- }
- else {
+ CHECK_INITIALIZED(self);
+ if (!PyUnicode_Check(obj)) {
PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'",
Py_TYPE(obj)->tp_name);
return NULL;
}
+ CHECK_CLOSED(self);
+ size = PyUnicode_GET_SIZE(obj);
- if (size != 0) {
- n = write_str(self, str, size);
- if (n < 0)
- return NULL;
- }
+ if (size > 0 && write_str(self, obj) < 0)
+ return NULL;
+
+ return PyLong_FromSsize_t(size);
+}
+
+PyDoc_STRVAR(stringio_close_doc,
+ "Close the IO object. Attempting any further operation after the\n"
+ "object is closed will raise a ValueError.\n"
+ "\n"
+ "This method has no effect if the file is already closed.\n");
+
+static PyObject *
+stringio_close(StringIOObject *self)
+{
+ self->closed = 1;
+ /* Free up some memory */
+ if (resize_buffer(self, 0) < 0)
+ return NULL;
+ Py_CLEAR(self->readnl);
+ Py_CLEAR(self->writenl);
+ Py_CLEAR(self->decoder);
+ Py_RETURN_NONE;
+}
+
+static int
+stringio_traverse(StringIOObject *self, visitproc visit, void *arg)
+{
+ Py_VISIT(self->dict);
+ return 0;
+}
- return PyLong_FromSsize_t(n);
+static int
+stringio_clear(StringIOObject *self)
+{
+ Py_CLEAR(self->dict);
+ return 0;
}
static void
stringio_dealloc(StringIOObject *self)
{
- PyMem_Free(self->buf);
+ _PyObject_GC_UNTRACK(self);
+ Py_CLEAR(self->readnl);
+ Py_CLEAR(self->writenl);
+ Py_CLEAR(self->decoder);
+ if (self->buf)
+ PyMem_Free(self->buf);
+ if (self->weakreflist != NULL)
+ PyObject_ClearWeakRefs((PyObject *) self);
Py_TYPE(self)->tp_free(self);
}
@@ -303,19 +541,194 @@ stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
return (PyObject *)self;
}
+static int
+stringio_init(StringIOObject *self, PyObject *args, PyObject *kwds)
+{
+ char *kwlist[] = {"initial_value", "newline", NULL};
+ PyObject *value = NULL;
+ char *newline = "\n";
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oz:__init__", kwlist,
+ &value, &newline))
+ return -1;
+
+ if (newline && newline[0] != '\0'
+ && !(newline[0] == '\n' && newline[1] == '\0')
+ && !(newline[0] == '\r' && newline[1] == '\0')
+ && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
+ PyErr_Format(PyExc_ValueError,
+ "illegal newline value: %s", newline);
+ return -1;
+ }
+ if (value && value != Py_None && !PyUnicode_Check(value)) {
+ PyErr_Format(PyExc_ValueError,
+ "initial_value must be str or None, not %.200s",
+ Py_TYPE(value)->tp_name);
+ return -1;
+ }
+
+ self->ok = 0;
+
+ Py_CLEAR(self->readnl);
+ Py_CLEAR(self->writenl);
+ Py_CLEAR(self->decoder);
+
+ if (newline) {
+ self->readnl = PyUnicode_FromString(newline);
+ if (self->readnl == NULL)
+ return -1;
+ }
+ self->readuniversal = (newline == NULL || newline[0] == '\0');
+ self->readtranslate = (newline == NULL);
+ /* If newline == "", we don't translate anything.
+ If newline == "\n" or newline == None, we translate to "\n", which is
+ a no-op.
+ (for newline == None, TextIOWrapper translates to os.sepline, but it
+ is pointless for StringIO)
+ */
+ if (newline != NULL && newline[0] == '\r') {
+ self->writenl = self->readnl;
+ Py_INCREF(self->writenl);
+ }
+
+ if (self->readuniversal) {
+ self->decoder = PyObject_CallFunction(
+ (PyObject *)&PyIncrementalNewlineDecoder_Type,
+ "Oi", Py_None, (int) self->readtranslate);
+ if (self->decoder == NULL)
+ return -1;
+ }
+
+ /* Now everything is set up, resize buffer to size of initial value,
+ and copy it */
+ self->string_size = 0;
+ if (value && value != Py_None) {
+ Py_ssize_t len = PyUnicode_GetSize(value);
+ /* This is a heuristic, for newline translation might change
+ the string length. */
+ if (resize_buffer(self, len) < 0)
+ return -1;
+ self->pos = 0;
+ if (write_str(self, value) < 0)
+ return -1;
+ }
+ else {
+ if (resize_buffer(self, 0) < 0)
+ return -1;
+ }
+ self->pos = 0;
+
+ self->closed = 0;
+ self->ok = 1;
+ return 0;
+}
+
+/* Properties and pseudo-properties */
+static PyObject *
+stringio_seekable(StringIOObject *self, PyObject *args)
+{
+ CHECK_INITIALIZED(self);
+ Py_RETURN_TRUE;
+}
+
+static PyObject *
+stringio_readable(StringIOObject *self, PyObject *args)
+{
+ CHECK_INITIALIZED(self);
+ Py_RETURN_TRUE;
+}
+
+static PyObject *
+stringio_writable(StringIOObject *self, PyObject *args)
+{
+ CHECK_INITIALIZED(self);
+ Py_RETURN_TRUE;
+}
+
+static PyObject *
+stringio_buffer(StringIOObject *self, void *context)
+{
+ PyErr_SetString(IO_STATE->unsupported_operation,
+ "buffer attribute is unsupported on type StringIO");
+ return NULL;
+}
+
+static PyObject *
+stringio_closed(StringIOObject *self, void *context)
+{
+ CHECK_INITIALIZED(self);
+ return PyBool_FromLong(self->closed);
+}
+
+static PyObject *
+stringio_encoding(StringIOObject *self, void *context)
+{
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
+ return PyUnicode_FromString("utf-8");
+}
+
+static PyObject *
+stringio_errors(StringIOObject *self, void *context)
+{
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
+ return PyUnicode_FromString("strict");
+}
+
+static PyObject *
+stringio_line_buffering(StringIOObject *self, void *context)
+{
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
+ Py_RETURN_FALSE;
+}
+
+static PyObject *
+stringio_newlines(StringIOObject *self, void *context)
+{
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
+ if (self->decoder == NULL)
+ Py_RETURN_NONE;
+ return PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
+}
+
static struct PyMethodDef stringio_methods[] = {
- {"getvalue", (PyCFunction)stringio_getvalue, METH_VARARGS, NULL},
- {"read", (PyCFunction)stringio_read, METH_VARARGS, NULL},
- {"tell", (PyCFunction)stringio_tell, METH_NOARGS, NULL},
- {"truncate", (PyCFunction)stringio_truncate, METH_VARARGS, NULL},
- {"seek", (PyCFunction)stringio_seek, METH_VARARGS, NULL},
- {"write", (PyCFunction)stringio_write, METH_O, NULL},
+ {"close", (PyCFunction)stringio_close, METH_NOARGS, stringio_close_doc},
+ {"getvalue", (PyCFunction)stringio_getvalue, METH_VARARGS, stringio_getvalue_doc},
+ {"read", (PyCFunction)stringio_read, METH_VARARGS, stringio_read_doc},
+ {"readline", (PyCFunction)stringio_readline, METH_VARARGS, stringio_readline_doc},
+ {"tell", (PyCFunction)stringio_tell, METH_NOARGS, stringio_tell_doc},
+ {"truncate", (PyCFunction)stringio_truncate, METH_VARARGS, stringio_truncate_doc},
+ {"seek", (PyCFunction)stringio_seek, METH_VARARGS, stringio_seek_doc},
+ {"write", (PyCFunction)stringio_write, METH_O, stringio_write_doc},
+
+ {"seekable", (PyCFunction)stringio_seekable, METH_NOARGS},
+ {"readable", (PyCFunction)stringio_readable, METH_NOARGS},
+ {"writable", (PyCFunction)stringio_writable, METH_NOARGS},
{NULL, NULL} /* sentinel */
};
-static PyTypeObject StringIO_Type = {
+static PyGetSetDef stringio_getset[] = {
+ {"closed", (getter)stringio_closed, NULL, NULL},
+ {"newlines", (getter)stringio_newlines, NULL, NULL},
+ /* (following comments straight off of the original Python wrapper:)
+ XXX Cruft to support the TextIOWrapper API. This would only
+ be meaningful if StringIO supported the buffer attribute.
+ Hopefully, a better solution, than adding these pseudo-attributes,
+ will be found.
+ */
+ {"buffer", (getter)stringio_buffer, NULL, NULL},
+ {"encoding", (getter)stringio_encoding, NULL, NULL},
+ {"errors", (getter)stringio_errors, NULL, NULL},
+ {"line_buffering", (getter)stringio_line_buffering, NULL, NULL},
+ {0}
+};
+
+PyTypeObject PyStringIO_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
- "_stringio._StringIO", /*tp_name*/
+ "_io.StringIO", /*tp_name*/
sizeof(StringIOObject), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)stringio_dealloc, /*tp_dealloc*/
@@ -333,51 +746,24 @@ static PyTypeObject StringIO_Type = {
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
- Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
- 0, /*tp_doc*/
- 0, /*tp_traverse*/
- 0, /*tp_clear*/
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
+ | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
+ stringio_doc, /*tp_doc*/
+ (traverseproc)stringio_traverse, /*tp_traverse*/
+ (inquiry)stringio_clear, /*tp_clear*/
0, /*tp_richcompare*/
- 0, /*tp_weaklistoffset*/
+ offsetof(StringIOObject, weakreflist), /*tp_weaklistoffset*/
0, /*tp_iter*/
- 0, /*tp_iternext*/
+ (iternextfunc)stringio_iternext, /*tp_iternext*/
stringio_methods, /*tp_methods*/
0, /*tp_members*/
- 0, /*tp_getset*/
+ stringio_getset, /*tp_getset*/
0, /*tp_base*/
0, /*tp_dict*/
0, /*tp_descr_get*/
0, /*tp_descr_set*/
- 0, /*tp_dictoffset*/
- 0, /*tp_init*/
+ offsetof(StringIOObject, dict), /*tp_dictoffset*/
+ (initproc)stringio_init, /*tp_init*/
0, /*tp_alloc*/
stringio_new, /*tp_new*/
};
-
-static struct PyModuleDef _stringiomodule = {
- PyModuleDef_HEAD_INIT,
- "_stringio",
- NULL,
- -1,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL
-};
-
-PyMODINIT_FUNC
-PyInit__stringio(void)
-{
- PyObject *m;
-
- if (PyType_Ready(&StringIO_Type) < 0)
- return NULL;
- m = PyModule_Create(&_stringiomodule);
- if (m == NULL)
- return NULL;
- Py_INCREF(&StringIO_Type);
- if (PyModule_AddObject(m, "_StringIO", (PyObject *)&StringIO_Type) < 0)
- return NULL;
- return m;
-}