From 60d241f135f10312f5a638846659d7e471f6cac9 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Tue, 16 Oct 2007 06:31:30 +0000 Subject: For PEP3137: Adds missing methods to the mutable PyBytes object (soon to be called a buffer). Shares code with stringobject when possible. Adds unit tests with common code that should be usable to test the PEPs mutable buffer() and immutable bytes() types. http://bugs.python.org/issue1261 --- Include/bytes_methods.h | 84 ++++ Lib/test/buffer_tests.py | 206 ++++++++ Lib/test/test_bytes.py | 48 +- Makefile.pre.in | 13 + Objects/bytes_methods.c | 610 +++++++++++++++++++++++ Objects/bytesobject.c | 205 +++++--- Objects/stringlib/README.txt | 9 + Objects/stringlib/ctype.h | 110 ++++ Objects/stringlib/find.h | 2 +- Objects/stringlib/transmogrify.h | 362 ++++++++++++++ Objects/stringlib/unicodedefs.h | 2 + Objects/stringobject.c | 1023 ++------------------------------------ 12 files changed, 1595 insertions(+), 1079 deletions(-) create mode 100644 Include/bytes_methods.h create mode 100644 Lib/test/buffer_tests.py create mode 100644 Objects/bytes_methods.c create mode 100644 Objects/stringlib/ctype.h create mode 100644 Objects/stringlib/transmogrify.h diff --git a/Include/bytes_methods.h b/Include/bytes_methods.h new file mode 100644 index 0000000..59873f2 --- /dev/null +++ b/Include/bytes_methods.h @@ -0,0 +1,84 @@ +#ifndef Py_BYTES_CTYPE_H +#define Py_BYTES_CTYPE_H + +/* + * The internal implementation behind PyString (bytes) and PyBytes (buffer) + * methods of the given names, they operate on ASCII byte strings. + */ +extern PyObject* _Py_bytes_isspace(const char *cptr, Py_ssize_t len); +extern PyObject* _Py_bytes_isalpha(const char *cptr, Py_ssize_t len); +extern PyObject* _Py_bytes_isalnum(const char *cptr, Py_ssize_t len); +extern PyObject* _Py_bytes_isdigit(const char *cptr, Py_ssize_t len); +extern PyObject* _Py_bytes_islower(const char *cptr, Py_ssize_t len); +extern PyObject* _Py_bytes_isupper(const char *cptr, Py_ssize_t len); +extern PyObject* _Py_bytes_istitle(const char *cptr, Py_ssize_t len); + +/* These store their len sized answer in the given preallocated *result arg. */ +extern void _Py_bytes_lower(char *result, const char *cptr, Py_ssize_t len); +extern void _Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len); +extern void _Py_bytes_title(char *result, char *s, Py_ssize_t len); +extern void _Py_bytes_capitalize(char *result, char *s, Py_ssize_t len); +extern void _Py_bytes_swapcase(char *result, char *s, Py_ssize_t len); + +/* Shared __doc__ strings. */ +extern const char _Py_isspace__doc__[]; +extern const char _Py_isalpha__doc__[]; +extern const char _Py_isalnum__doc__[]; +extern const char _Py_isdigit__doc__[]; +extern const char _Py_islower__doc__[]; +extern const char _Py_isupper__doc__[]; +extern const char _Py_istitle__doc__[]; +extern const char _Py_lower__doc__[]; +extern const char _Py_upper__doc__[]; +extern const char _Py_title__doc__[]; +extern const char _Py_capitalize__doc__[]; +extern const char _Py_swapcase__doc__[]; + +#define FLAG_LOWER 0x01 +#define FLAG_UPPER 0x02 +#define FLAG_ALPHA (FLAG_LOWER|FLAG_UPPER) +#define FLAG_DIGIT 0x04 +#define FLAG_ALNUM (FLAG_ALPHA|FLAG_DIGIT) +#define FLAG_SPACE 0x08 +#define FLAG_XDIGIT 0x10 + +extern const unsigned int _Py_ctype_table[256]; + +#define ISLOWER(c) (_Py_ctype_table[Py_CHARMASK(c)] & FLAG_LOWER) +#define ISUPPER(c) (_Py_ctype_table[Py_CHARMASK(c)] & FLAG_UPPER) +#define ISALPHA(c) (_Py_ctype_table[Py_CHARMASK(c)] & FLAG_ALPHA) +#define ISDIGIT(c) (_Py_ctype_table[Py_CHARMASK(c)] & FLAG_DIGIT) +#define ISXDIGIT(c) (_Py_ctype_table[Py_CHARMASK(c)] & FLAG_XDIGIT) +#define ISALNUM(c) (_Py_ctype_table[Py_CHARMASK(c)] & FLAG_ALNUM) +#define ISSPACE(c) (_Py_ctype_table[Py_CHARMASK(c)] & FLAG_SPACE) + +#undef islower +#define islower(c) undefined_islower(c) +#undef isupper +#define isupper(c) undefined_isupper(c) +#undef isalpha +#define isalpha(c) undefined_isalpha(c) +#undef isdigit +#define isdigit(c) undefined_isdigit(c) +#undef isxdigit +#define isxdigit(c) undefined_isxdigit(c) +#undef isalnum +#define isalnum(c) undefined_isalnum(c) +#undef isspace +#define isspace(c) undefined_isspace(c) + +extern const unsigned char _Py_ctype_tolower[256]; +extern const unsigned char _Py_ctype_toupper[256]; + +#define TOLOWER(c) (_Py_ctype_tolower[Py_CHARMASK(c)]) +#define TOUPPER(c) (_Py_ctype_toupper[Py_CHARMASK(c)]) + +#undef tolower +#define tolower(c) undefined_tolower(c) +#undef toupper +#define toupper(c) undefined_toupper(c) + +/* this is needed because some docs are shared from the .o, not static */ +#define PyDoc_STRVAR_shared(name,str) const char name[] = PyDoc_STR(str) + +#endif /* !Py_BYTES_CTYPE_H */ diff --git a/Lib/test/buffer_tests.py b/Lib/test/buffer_tests.py new file mode 100644 index 0000000..01ac3c5 --- /dev/null +++ b/Lib/test/buffer_tests.py @@ -0,0 +1,206 @@ +# Tests that work for both str8 (bytes) and bytes (buffer) objects. +# See PEP 3137. + +import struct +import sys + +class MixinBytesBufferCommonTests(object): + """Tests that work for both str8 (bytes) and bytes (buffer) objects. + See PEP 3137. + """ + + def marshal(self, x): + """Convert x into the appropriate type for these tests.""" + raise RuntimeError('test class must provide a marshal method') + + def test_islower(self): + self.assertFalse(self.marshal(b'').islower()) + self.assert_(self.marshal(b'a').islower()) + self.assertFalse(self.marshal(b'A').islower()) + self.assertFalse(self.marshal(b'\n').islower()) + self.assert_(self.marshal(b'abc').islower()) + self.assertFalse(self.marshal(b'aBc').islower()) + self.assert_(self.marshal(b'abc\n').islower()) + self.assertRaises(TypeError, self.marshal(b'abc').islower, 42) + + def test_isupper(self): + self.assertFalse(self.marshal(b'').isupper()) + self.assertFalse(self.marshal(b'a').isupper()) + self.assert_(self.marshal(b'A').isupper()) + self.assertFalse(self.marshal(b'\n').isupper()) + self.assert_(self.marshal(b'ABC').isupper()) + self.assertFalse(self.marshal(b'AbC').isupper()) + self.assert_(self.marshal(b'ABC\n').isupper()) + self.assertRaises(TypeError, self.marshal(b'abc').isupper, 42) + + def test_istitle(self): + self.assertFalse(self.marshal(b'').istitle()) + self.assertFalse(self.marshal(b'a').istitle()) + self.assert_(self.marshal(b'A').istitle()) + self.assertFalse(self.marshal(b'\n').istitle()) + self.assert_(self.marshal(b'A Titlecased Line').istitle()) + self.assert_(self.marshal(b'A\nTitlecased Line').istitle()) + self.assert_(self.marshal(b'A Titlecased, Line').istitle()) + self.assertFalse(self.marshal(b'Not a capitalized String').istitle()) + self.assertFalse(self.marshal(b'Not\ta Titlecase String').istitle()) + self.assertFalse(self.marshal(b'Not--a Titlecase String').istitle()) + self.assertFalse(self.marshal(b'NOT').istitle()) + self.assertRaises(TypeError, self.marshal(b'abc').istitle, 42) + + def test_isspace(self): + self.assertFalse(self.marshal(b'').isspace()) + self.assertFalse(self.marshal(b'a').isspace()) + self.assert_(self.marshal(b' ').isspace()) + self.assert_(self.marshal(b'\t').isspace()) + self.assert_(self.marshal(b'\r').isspace()) + self.assert_(self.marshal(b'\n').isspace()) + self.assert_(self.marshal(b' \t\r\n').isspace()) + self.assertFalse(self.marshal(b' \t\r\na').isspace()) + self.assertRaises(TypeError, self.marshal(b'abc').isspace, 42) + + def test_isalpha(self): + self.assertFalse(self.marshal(b'').isalpha()) + self.assert_(self.marshal(b'a').isalpha()) + self.assert_(self.marshal(b'A').isalpha()) + self.assertFalse(self.marshal(b'\n').isalpha()) + self.assert_(self.marshal(b'abc').isalpha()) + self.assertFalse(self.marshal(b'aBc123').isalpha()) + self.assertFalse(self.marshal(b'abc\n').isalpha()) + self.assertRaises(TypeError, self.marshal(b'abc').isalpha, 42) + + def test_isalnum(self): + self.assertFalse(self.marshal(b'').isalnum()) + self.assert_(self.marshal(b'a').isalnum()) + self.assert_(self.marshal(b'A').isalnum()) + self.assertFalse(self.marshal(b'\n').isalnum()) + self.assert_(self.marshal(b'123abc456').isalnum()) + self.assert_(self.marshal(b'a1b3c').isalnum()) + self.assertFalse(self.marshal(b'aBc000 ').isalnum()) + self.assertFalse(self.marshal(b'abc\n').isalnum()) + self.assertRaises(TypeError, self.marshal(b'abc').isalnum, 42) + + def test_isdigit(self): + self.assertFalse(self.marshal(b'').isdigit()) + self.assertFalse(self.marshal(b'a').isdigit()) + self.assert_(self.marshal(b'0').isdigit()) + self.assert_(self.marshal(b'0123456789').isdigit()) + self.assertFalse(self.marshal(b'0123456789a').isdigit()) + + self.assertRaises(TypeError, self.marshal(b'abc').isdigit, 42) + + def test_lower(self): + self.assertEqual(b'hello', self.marshal(b'HeLLo').lower()) + self.assertEqual(b'hello', self.marshal(b'hello').lower()) + self.assertRaises(TypeError, self.marshal(b'hello').lower, 42) + + def test_upper(self): + self.assertEqual(b'HELLO', self.marshal(b'HeLLo').upper()) + self.assertEqual(b'HELLO', self.marshal(b'HELLO').upper()) + self.assertRaises(TypeError, self.marshal(b'hello').upper, 42) + + def test_capitalize(self): + self.assertEqual(b' hello ', self.marshal(b' hello ').capitalize()) + self.assertEqual(b'Hello ', self.marshal(b'Hello ').capitalize()) + self.assertEqual(b'Hello ', self.marshal(b'hello ').capitalize()) + self.assertEqual(b'Aaaa', self.marshal(b'aaaa').capitalize()) + self.assertEqual(b'Aaaa', self.marshal(b'AaAa').capitalize()) + + self.assertRaises(TypeError, self.marshal(b'hello').capitalize, 42) + + def test_ljust(self): + self.assertEqual(b'abc ', self.marshal(b'abc').ljust(10)) + self.assertEqual(b'abc ', self.marshal(b'abc').ljust(6)) + self.assertEqual(b'abc', self.marshal(b'abc').ljust(3)) + self.assertEqual(b'abc', self.marshal(b'abc').ljust(2)) + self.assertEqual(b'abc*******', self.marshal(b'abc').ljust(10, '*')) + self.assertRaises(TypeError, self.marshal(b'abc').ljust) + + def test_rjust(self): + self.assertEqual(b' abc', self.marshal(b'abc').rjust(10)) + self.assertEqual(b' abc', self.marshal(b'abc').rjust(6)) + self.assertEqual(b'abc', self.marshal(b'abc').rjust(3)) + self.assertEqual(b'abc', self.marshal(b'abc').rjust(2)) + self.assertEqual(b'*******abc', self.marshal(b'abc').rjust(10, '*')) + self.assertRaises(TypeError, self.marshal(b'abc').rjust) + + def test_center(self): + self.assertEqual(b' abc ', self.marshal(b'abc').center(10)) + self.assertEqual(b' abc ', self.marshal(b'abc').center(6)) + self.assertEqual(b'abc', self.marshal(b'abc').center(3)) + self.assertEqual(b'abc', self.marshal(b'abc').center(2)) + self.assertEqual(b'***abc****', self.marshal(b'abc').center(10, '*')) + self.assertRaises(TypeError, self.marshal(b'abc').center) + + def test_swapcase(self): + self.assertEqual(b'hEllO CoMPuTErS', + self.marshal(b'HeLLo cOmpUteRs').swapcase()) + + self.assertRaises(TypeError, self.marshal(b'hello').swapcase, 42) + + def test_zfill(self): + self.assertEqual(b'123', self.marshal(b'123').zfill(2)) + self.assertEqual(b'123', self.marshal(b'123').zfill(3)) + self.assertEqual(b'0123', self.marshal(b'123').zfill(4)) + self.assertEqual(b'+123', self.marshal(b'+123').zfill(3)) + self.assertEqual(b'+123', self.marshal(b'+123').zfill(4)) + self.assertEqual(b'+0123', self.marshal(b'+123').zfill(5)) + self.assertEqual(b'-123', self.marshal(b'-123').zfill(3)) + self.assertEqual(b'-123', self.marshal(b'-123').zfill(4)) + self.assertEqual(b'-0123', self.marshal(b'-123').zfill(5)) + self.assertEqual(b'000', self.marshal(b'').zfill(3)) + self.assertEqual(b'34', self.marshal(b'34').zfill(1)) + self.assertEqual(b'0034', self.marshal(b'34').zfill(4)) + + self.assertRaises(TypeError, self.marshal(b'123').zfill) + + def test_expandtabs(self): + self.assertEqual(b'abc\rab def\ng hi', + self.marshal(b'abc\rab\tdef\ng\thi').expandtabs()) + self.assertEqual(b'abc\rab def\ng hi', + self.marshal(b'abc\rab\tdef\ng\thi').expandtabs(8)) + self.assertEqual(b'abc\rab def\ng hi', + self.marshal(b'abc\rab\tdef\ng\thi').expandtabs(4)) + self.assertEqual(b'abc\r\nab def\ng hi', + self.marshal(b'abc\r\nab\tdef\ng\thi').expandtabs(4)) + self.assertEqual(b'abc\rab def\ng hi', + self.marshal(b'abc\rab\tdef\ng\thi').expandtabs()) + self.assertEqual(b'abc\rab def\ng hi', + self.marshal(b'abc\rab\tdef\ng\thi').expandtabs(8)) + self.assertEqual(b'abc\r\nab\r\ndef\ng\r\nhi', + self.marshal(b'abc\r\nab\r\ndef\ng\r\nhi').expandtabs(4)) + self.assertEqual(b' a\n b', self.marshal(b' \ta\n\tb').expandtabs(1)) + + self.assertRaises(TypeError, self.marshal(b'hello').expandtabs, 42, 42) + # This test is only valid when sizeof(int) == sizeof(void*) == 4. + if sys.maxint < (1 << 32) and struct.calcsize('P') == 4: + self.assertRaises(OverflowError, + self.marshal(b'\ta\n\tb').expandtabs, sys.maxint) + + def test_title(self): + self.assertEqual(b' Hello ', self.marshal(b' hello ').title()) + self.assertEqual(b'Hello ', self.marshal(b'hello ').title()) + self.assertEqual(b'Hello ', self.marshal(b'Hello ').title()) + self.assertEqual(b'Format This As Title String', + self.marshal(b'fOrMaT thIs aS titLe String').title()) + self.assertEqual(b'Format,This-As*Title;String', + self.marshal(b'fOrMaT,thIs-aS*titLe;String').title()) + self.assertEqual(b'Getint', self.marshal(b'getInt').title()) + self.assertRaises(TypeError, self.marshal(b'hello').title, 42) + + def test_splitlines(self): + self.assertEqual([b'abc', b'def', b'', b'ghi'], + self.marshal(b'abc\ndef\n\rghi').splitlines()) + self.assertEqual([b'abc', b'def', b'', b'ghi'], + self.marshal(b'abc\ndef\n\r\nghi').splitlines()) + self.assertEqual([b'abc', b'def', b'ghi'], + self.marshal(b'abc\ndef\r\nghi').splitlines()) + self.assertEqual([b'abc', b'def', b'ghi'], + self.marshal(b'abc\ndef\r\nghi\n').splitlines()) + self.assertEqual([b'abc', b'def', b'ghi', b''], + self.marshal(b'abc\ndef\r\nghi\n\r').splitlines()) + self.assertEqual([b'', b'abc', b'def', b'ghi', b''], + self.marshal(b'\nabc\ndef\r\nghi\n\r').splitlines()) + self.assertEqual([b'\n', b'abc\n', b'def\r\n', b'ghi\n', b'\r'], + self.marshal(b'\nabc\ndef\r\nghi\n\r').splitlines(1)) + + self.assertRaises(TypeError, self.marshal(b'abc').splitlines, 42, 42) diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index c51a320..391a660 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -8,6 +8,7 @@ import tempfile import unittest import test.test_support import test.string_tests +import test.buffer_tests class BytesTest(unittest.TestCase): @@ -454,17 +455,18 @@ class BytesTest(unittest.TestCase): def test_fromhex(self): self.assertRaises(TypeError, bytes.fromhex) self.assertRaises(TypeError, bytes.fromhex, 1) - self.assertEquals(bytes.fromhex(''), bytes()) + self.assertEquals(bytes.fromhex(b''), bytes()) b = bytes([0x1a, 0x2b, 0x30]) - self.assertEquals(bytes.fromhex('1a2B30'), b) - self.assertEquals(bytes.fromhex(' 1A 2B 30 '), b) + self.assertEquals(bytes.fromhex(b'1a2B30'), b) + self.assertEquals(bytes.fromhex(b' 1A 2B 30 '), b) self.assertEquals(bytes.fromhex(memoryview(b'')), bytes()) self.assertEquals(bytes.fromhex(memoryview(b'0000')), bytes([0, 0])) - self.assertRaises(ValueError, bytes.fromhex, 'a') - self.assertRaises(ValueError, bytes.fromhex, 'rt') - self.assertRaises(ValueError, bytes.fromhex, '1a b cd') - self.assertRaises(ValueError, bytes.fromhex, '\x00') - self.assertRaises(ValueError, bytes.fromhex, '12 \x00 34') + self.assertRaises(TypeError, bytes.fromhex, '1B') + self.assertRaises(ValueError, bytes.fromhex, b'a') + self.assertRaises(ValueError, bytes.fromhex, b'rt') + self.assertRaises(ValueError, bytes.fromhex, b'1a b cd') + self.assertRaises(ValueError, bytes.fromhex, b'\x00') + self.assertRaises(ValueError, bytes.fromhex, b'12 \x00 34') def test_join(self): self.assertEqual(b"".join([]), bytes()) @@ -504,11 +506,12 @@ class BytesTest(unittest.TestCase): self.assertEqual(b, b'heo') self.assertRaises(ValueError, lambda: b.remove(ord('l'))) self.assertRaises(ValueError, lambda: b.remove(400)) - self.assertRaises(ValueError, lambda: b.remove('e')) + self.assertRaises(TypeError, lambda: b.remove('e')) # remove first and last b.remove(ord('o')) b.remove(ord('h')) self.assertEqual(b, b'e') + self.assertRaises(TypeError, lambda: b.remove(b'e')) def test_pop(self): b = b'world' @@ -542,6 +545,7 @@ class BytesTest(unittest.TestCase): b = bytes() b.append(ord('A')) self.assertEqual(len(b), 1) + self.assertRaises(TypeError, lambda: b.append(b'o')) def test_insert(self): b = b'msssspp' @@ -550,6 +554,7 @@ class BytesTest(unittest.TestCase): b.insert(-2, ord('i')) b.insert(1000, ord('i')) self.assertEqual(b, b'mississippi') + self.assertRaises(TypeError, lambda: b.insert(0, b'1')) def test_startswith(self): b = b'hello' @@ -734,6 +739,29 @@ class BytesTest(unittest.TestCase): # Unfortunately they are all bundled with tests that # are not appropriate for bytes + # I've started porting some of those into buffer_tests.py, we should port + # the rest that make sense (the code can be cleaned up to use modern + # unittest methods at the same time). + +class BufferPEP3137Test(unittest.TestCase, + test.buffer_tests.MixinBytesBufferCommonTests): + def marshal(self, x): + return bytes(x) + # TODO this should become: + #return buffer(x) + # once the bytes -> buffer and str8 -> bytes rename happens + + def test_returns_new_copy(self): + val = self.marshal(b'1234') + # On immutable types these MAY return a reference to themselves + # but on mutable types like buffer they MUST return a new copy. + for methname in ('zfill', 'rjust', 'ljust', 'center'): + method = getattr(val, methname) + newval = method(3) + self.assertEqual(val, newval) + self.assertTrue(val is not newval, + methname+' returned self on a mutable object') + class BytesAsStringTest(test.string_tests.BaseTest): type2test = bytes @@ -759,7 +787,7 @@ class BytesAsStringTest(test.string_tests.BaseTest): def test_main(): test.test_support.run_unittest(BytesTest) test.test_support.run_unittest(BytesAsStringTest) - + test.test_support.run_unittest(BufferPEP3137Test) if __name__ == "__main__": ##test_main() diff --git a/Makefile.pre.in b/Makefile.pre.in index 45f771a..7a0851e 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -285,6 +285,7 @@ PYTHON_OBJS= \ OBJECT_OBJS= \ Objects/abstract.o \ Objects/boolobject.o \ + Objects/bytes_methods.o \ Objects/bytesobject.o \ Objects/cellobject.o \ Objects/classobject.o \ @@ -507,6 +508,18 @@ Python/importdl.o: $(srcdir)/Python/importdl.c Objects/unicodectype.o: $(srcdir)/Objects/unicodectype.c \ $(srcdir)/Objects/unicodetype_db.h +BYTESTR_DEPS = Include/bytes_methods.h \ + $(srcdir)/Objects/stringlib/fastsearch.h \ + $(srcdir)/Objects/stringlib/count.h \ + $(srcdir)/Objects/stringlib/find.h \ + $(srcdir)/Objects/stringlib/partition.h \ + $(srcdir)/Objects/stringlib/ctype.h \ + $(srcdir)/Objects/stringlib/transmogrify.h + +Objects/stringobject.o: $(srcdir)/Objects/stringobject.c $(BYTESTR_DEPS) + +Objects/bytesobject.o: $(srcdir)/Objects/bytesobject.c $(BYTESTR_DEPS) + Objects/unicodeobject.o: $(srcdir)/Objects/unicodeobject.c \ $(srcdir)/Objects/stringlib/string_format.h \ $(srcdir)/Objects/stringlib/unicodedefs.h \ diff --git a/Objects/bytes_methods.c b/Objects/bytes_methods.c new file mode 100644 index 0000000..de87905 --- /dev/null +++ b/Objects/bytes_methods.c @@ -0,0 +1,610 @@ +#include "Python.h" +#include "bytes_methods.h" + +/* Our own locale-independent ctype.h-like macros */ + +const unsigned int _Py_ctype_table[256] = { + 0, /* 0x0 '\x00' */ + 0, /* 0x1 '\x01' */ + 0, /* 0x2 '\x02' */ + 0, /* 0x3 '\x03' */ + 0, /* 0x4 '\x04' */ + 0, /* 0x5 '\x05' */ + 0, /* 0x6 '\x06' */ + 0, /* 0x7 '\x07' */ + 0, /* 0x8 '\x08' */ + FLAG_SPACE, /* 0x9 '\t' */ + FLAG_SPACE, /* 0xa '\n' */ + FLAG_SPACE, /* 0xb '\v' */ + FLAG_SPACE, /* 0xc '\f' */ + FLAG_SPACE, /* 0xd '\r' */ + 0, /* 0xe '\x0e' */ + 0, /* 0xf '\x0f' */ + 0, /* 0x10 '\x10' */ + 0, /* 0x11 '\x11' */ + 0, /* 0x12 '\x12' */ + 0, /* 0x13 '\x13' */ + 0, /* 0x14 '\x14' */ + 0, /* 0x15 '\x15' */ + 0, /* 0x16 '\x16' */ + 0, /* 0x17 '\x17' */ + 0, /* 0x18 '\x18' */ + 0, /* 0x19 '\x19' */ + 0, /* 0x1a '\x1a' */ + 0, /* 0x1b '\x1b' */ + 0, /* 0x1c '\x1c' */ + 0, /* 0x1d '\x1d' */ + 0, /* 0x1e '\x1e' */ + 0, /* 0x1f '\x1f' */ + FLAG_SPACE, /* 0x20 ' ' */ + 0, /* 0x21 '!' */ + 0, /* 0x22 '"' */ + 0, /* 0x23 '#' */ + 0, /* 0x24 '$' */ + 0, /* 0x25 '%' */ + 0, /* 0x26 '&' */ + 0, /* 0x27 "'" */ + 0, /* 0x28 '(' */ + 0, /* 0x29 ')' */ + 0, /* 0x2a '*' */ + 0, /* 0x2b '+' */ + 0, /* 0x2c ',' */ + 0, /* 0x2d '-' */ + 0, /* 0x2e '.' */ + 0, /* 0x2f '/' */ + FLAG_DIGIT|FLAG_XDIGIT, /* 0x30 '0' */ + FLAG_DIGIT|FLAG_XDIGIT, /* 0x31 '1' */ + FLAG_DIGIT|FLAG_XDIGIT, /* 0x32 '2' */ + FLAG_DIGIT|FLAG_XDIGIT, /* 0x33 '3' */ + FLAG_DIGIT|FLAG_XDIGIT, /* 0x34 '4' */ + FLAG_DIGIT|FLAG_XDIGIT, /* 0x35 '5' */ + FLAG_DIGIT|FLAG_XDIGIT, /* 0x36 '6' */ + FLAG_DIGIT|FLAG_XDIGIT, /* 0x37 '7' */ + FLAG_DIGIT|FLAG_XDIGIT, /* 0x38 '8' */ + FLAG_DIGIT|FLAG_XDIGIT, /* 0x39 '9' */ + 0, /* 0x3a ':' */ + 0, /* 0x3b ';' */ + 0, /* 0x3c '<' */ + 0, /* 0x3d '=' */ + 0, /* 0x3e '>' */ + 0, /* 0x3f '?' */ + 0, /* 0x40 '@' */ + FLAG_UPPER|FLAG_XDIGIT, /* 0x41 'A' */ + FLAG_UPPER|FLAG_XDIGIT, /* 0x42 'B' */ + FLAG_UPPER|FLAG_XDIGIT, /* 0x43 'C' */ + FLAG_UPPER|FLAG_XDIGIT, /* 0x44 'D' */ + FLAG_UPPER|FLAG_XDIGIT, /* 0x45 'E' */ + FLAG_UPPER|FLAG_XDIGIT, /* 0x46 'F' */ + FLAG_UPPER, /* 0x47 'G' */ + FLAG_UPPER, /* 0x48 'H' */ + FLAG_UPPER, /* 0x49 'I' */ + FLAG_UPPER, /* 0x4a 'J' */ + FLAG_UPPER, /* 0x4b 'K' */ + FLAG_UPPER, /* 0x4c 'L' */ + FLAG_UPPER, /* 0x4d 'M' */ + FLAG_UPPER, /* 0x4e 'N' */ + FLAG_UPPER, /* 0x4f 'O' */ + FLAG_UPPER, /* 0x50 'P' */ + FLAG_UPPER, /* 0x51 'Q' */ + FLAG_UPPER, /* 0x52 'R' */ + FLAG_UPPER, /* 0x53 'S' */ + FLAG_UPPER, /* 0x54 'T' */ + FLAG_UPPER, /* 0x55 'U' */ + FLAG_UPPER, /* 0x56 'V' */ + FLAG_UPPER, /* 0x57 'W' */ + FLAG_UPPER, /* 0x58 'X' */ + FLAG_UPPER, /* 0x59 'Y' */ + FLAG_UPPER, /* 0x5a 'Z' */ + 0, /* 0x5b '[' */ + 0, /* 0x5c '\\' */ + 0, /* 0x5d ']' */ + 0, /* 0x5e '^' */ + 0, /* 0x5f '_' */ + 0, /* 0x60 '`' */ + FLAG_LOWER|FLAG_XDIGIT, /* 0x61 'a' */ + FLAG_LOWER|FLAG_XDIGIT, /* 0x62 'b' */ + FLAG_LOWER|FLAG_XDIGIT, /* 0x63 'c' */ + FLAG_LOWER|FLAG_XDIGIT, /* 0x64 'd' */ + FLAG_LOWER|FLAG_XDIGIT, /* 0x65 'e' */ + FLAG_LOWER|FLAG_XDIGIT, /* 0x66 'f' */ + FLAG_LOWER, /* 0x67 'g' */ + FLAG_LOWER, /* 0x68 'h' */ + FLAG_LOWER, /* 0x69 'i' */ + FLAG_LOWER, /* 0x6a 'j' */ + FLAG_LOWER, /* 0x6b 'k' */ + FLAG_LOWER, /* 0x6c 'l' */ + FLAG_LOWER, /* 0x6d 'm' */ + FLAG_LOWER, /* 0x6e 'n' */ + FLAG_LOWER, /* 0x6f 'o' */ + FLAG_LOWER, /* 0x70 'p' */ + FLAG_LOWER, /* 0x71 'q' */ + FLAG_LOWER, /* 0x72 'r' */ + FLAG_LOWER, /* 0x73 's' */ + FLAG_LOWER, /* 0x74 't' */ + FLAG_LOWER, /* 0x75 'u' */ + FLAG_LOWER, /* 0x76 'v' */ + FLAG_LOWER, /* 0x77 'w' */ + FLAG_LOWER, /* 0x78 'x' */ + FLAG_LOWER, /* 0x79 'y' */ + FLAG_LOWER, /* 0x7a 'z' */ + 0, /* 0x7b '{' */ + 0, /* 0x7c '|' */ + 0, /* 0x7d '}' */ + 0, /* 0x7e '~' */ + 0, /* 0x7f '\x7f' */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + + +const unsigned char _Py_ctype_tolower[256] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, +}; + +const unsigned char _Py_ctype_toupper[256] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, +}; + + +PyDoc_STRVAR_shared(_Py_isspace__doc__, +"B.isspace() -> bool\n\ +\n\ +Return True if all characters in B are whitespace\n\ +and there is at least one character in B, False otherwise."); + +PyObject* +_Py_bytes_isspace(const char *cptr, Py_ssize_t len) +{ + register const unsigned char *p + = (unsigned char *) cptr; + register const unsigned char *e; + + /* Shortcut for single character strings */ + if (len == 1 && ISSPACE(*p)) + Py_RETURN_TRUE; + + /* Special case for empty strings */ + if (len == 0) + Py_RETURN_FALSE; + + e = p + len; + for (; p < e; p++) { + if (!ISSPACE(*p)) + Py_RETURN_FALSE; + } + Py_RETURN_TRUE; +} + + +PyDoc_STRVAR_shared(_Py_isalpha__doc__, +"B.isalpha() -> bool\n\ +\n\ +Return True if all characters in B are alphabetic\n\ +and there is at least one character in B, False otherwise."); + +PyObject* +_Py_bytes_isalpha(const char *cptr, Py_ssize_t len) +{ + register const unsigned char *p + = (unsigned char *) cptr; + register const unsigned char *e; + + /* Shortcut for single character strings */ + if (len == 1 && ISALPHA(*p)) + Py_RETURN_TRUE; + + /* Special case for empty strings */ + if (len == 0) + Py_RETURN_FALSE; + + e = p + len; + for (; p < e; p++) { + if (!ISALPHA(*p)) + Py_RETURN_FALSE; + } + Py_RETURN_TRUE; +} + + +PyDoc_STRVAR_shared(_Py_isalnum__doc__, +"B.isalnum() -> bool\n\ +\n\ +Return True if all characters in B are alphanumeric\n\ +and there is at least one character in B, False otherwise."); + +PyObject* +_Py_bytes_isalnum(const char *cptr, Py_ssize_t len) +{ + register const unsigned char *p + = (unsigned char *) cptr; + register const unsigned char *e; + + /* Shortcut for single character strings */ + if (len == 1 && ISALNUM(*p)) + Py_RETURN_TRUE; + + /* Special case for empty strings */ + if (len == 0) + Py_RETURN_FALSE; + + e = p + len; + for (; p < e; p++) { + if (!ISALNUM(*p)) + Py_RETURN_FALSE; + } + Py_RETURN_TRUE; +} + + +PyDoc_STRVAR_shared(_Py_isdigit__doc__, +"B.isdigit() -> bool\n\ +\n\ +Return True if all characters in B are digits\n\ +and there is at least one character in B, False otherwise."); + +PyObject* +_Py_bytes_isdigit(const char *cptr, Py_ssize_t len) +{ + register const unsigned char *p + = (unsigned char *) cptr; + register const unsigned char *e; + + /* Shortcut for single character strings */ + if (len == 1 && ISDIGIT(*p)) + Py_RETURN_TRUE; + + /* Special case for empty strings */ + if (len == 0) + Py_RETURN_FALSE; + + e = p + len; + for (; p < e; p++) { + if (!ISDIGIT(*p)) + Py_RETURN_FALSE; + } + Py_RETURN_TRUE; +} + + +PyDoc_STRVAR_shared(_Py_islower__doc__, +"B.islower() -> bool\n\ +\n\ +Return True if all cased characters in B are lowercase and there is\n\ +at least one cased character in B, False otherwise."); + +PyObject* +_Py_bytes_islower(const char *cptr, Py_ssize_t len) +{ + register const unsigned char *p + = (unsigned char *) cptr; + register const unsigned char *e; + int cased; + + /* Shortcut for single character strings */ + if (len == 1) + return PyBool_FromLong(ISLOWER(*p)); + + /* Special case for empty strings */ + if (len == 0) + Py_RETURN_FALSE; + + e = p + len; + cased = 0; + for (; p < e; p++) { + if (ISUPPER(*p)) + Py_RETURN_FALSE; + else if (!cased && ISLOWER(*p)) + cased = 1; + } + return PyBool_FromLong(cased); +} + + +PyDoc_STRVAR_shared(_Py_isupper__doc__, +"B.isupper() -> bool\n\ +\n\ +Return True if all cased characters in B are uppercase and there is\n\ +at least one cased character in B, False otherwise."); + +PyObject* +_Py_bytes_isupper(const char *cptr, Py_ssize_t len) +{ + register const unsigned char *p + = (unsigned char *) cptr; + register const unsigned char *e; + int cased; + + /* Shortcut for single character strings */ + if (len == 1) + return PyBool_FromLong(ISUPPER(*p)); + + /* Special case for empty strings */ + if (len == 0) + Py_RETURN_FALSE; + + e = p + len; + cased = 0; + for (; p < e; p++) { + if (ISLOWER(*p)) + Py_RETURN_FALSE; + else if (!cased && ISUPPER(*p)) + cased = 1; + } + return PyBool_FromLong(cased); +} + + +PyDoc_STRVAR_shared(_Py_istitle__doc__, +"B.istitle() -> bool\n\ +\n\ +Return True if B is a titlecased string and there is at least one\n\ +character in B, i.e. uppercase characters may only follow uncased\n\ +characters and lowercase characters only cased ones. Return False\n\ +otherwise."); + +PyObject* +_Py_bytes_istitle(const char *cptr, Py_ssize_t len) +{ + register const unsigned char *p + = (unsigned char *) cptr; + register const unsigned char *e; + int cased, previous_is_cased; + + /* Shortcut for single character strings */ + if (len == 1) + return PyBool_FromLong(ISUPPER(*p)); + + /* Special case for empty strings */ + if (len == 0) + Py_RETURN_FALSE; + + e = p + len; + cased = 0; + previous_is_cased = 0; + for (; p < e; p++) { + register const unsigned char ch = *p; + + if (ISUPPER(ch)) { + if (previous_is_cased) + Py_RETURN_FALSE; + previous_is_cased = 1; + cased = 1; + } + else if (ISLOWER(ch)) { + if (!previous_is_cased) + Py_RETURN_FALSE; + previous_is_cased = 1; + cased = 1; + } + else + previous_is_cased = 0; + } + return PyBool_FromLong(cased); +} + + +PyDoc_STRVAR_shared(_Py_lower__doc__, +"B.lower() -> copy of B\n\ +\n\ +Return a copy of B with all ASCII characters converted to lowercase."); + +void +_Py_bytes_lower(char *result, const char *cptr, Py_ssize_t len) +{ + Py_ssize_t i; + + /* + newobj = PyString_FromStringAndSize(NULL, len); + if (!newobj) + return NULL; + + s = PyString_AS_STRING(newobj); + */ + + Py_MEMCPY(result, cptr, len); + + for (i = 0; i < len; i++) { + int c = Py_CHARMASK(result[i]); + if (ISUPPER(c)) + result[i] = TOLOWER(c); + } +} + + +PyDoc_STRVAR_shared(_Py_upper__doc__, +"B.upper() -> copy of B\n\ +\n\ +Return a copy of B with all ASCII characters converted to uppercase."); + +void +_Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len) +{ + Py_ssize_t i; + + /* + newobj = PyString_FromStringAndSize(NULL, len); + if (!newobj) + return NULL; + + s = PyString_AS_STRING(newobj); + */ + + Py_MEMCPY(result, cptr, len); + + for (i = 0; i < len; i++) { + int c = Py_CHARMASK(result[i]); + if (ISLOWER(c)) + result[i] = TOUPPER(c); + } +} + + +PyDoc_STRVAR_shared(_Py_title__doc__, +"B.title() -> copy of B\n\ +\n\ +Return a titlecased version of B, i.e. ASCII words start with uppercase\n\ +characters, all remaining cased characters have lowercase."); + +void +_Py_bytes_title(char *result, char *s, Py_ssize_t len) +{ + Py_ssize_t i; + int previous_is_cased = 0; + + /* + newobj = PyString_FromStringAndSize(NULL, len); + if (newobj == NULL) + return NULL; + s_new = PyString_AsString(newobj); + */ + for (i = 0; i < len; i++) { + int c = Py_CHARMASK(*s++); + if (ISLOWER(c)) { + if (!previous_is_cased) + c = TOUPPER(c); + previous_is_cased = 1; + } else if (ISUPPER(c)) { + if (previous_is_cased) + c = TOLOWER(c); + previous_is_cased = 1; + } else + previous_is_cased = 0; + *result++ = c; + } +} + + +PyDoc_STRVAR_shared(_Py_capitalize__doc__, +"B.capitalize() -> copy of B\n\ +\n\ +Return a copy of B with only its first character capitalized (ASCII)."); + +void +_Py_bytes_capitalize(char *result, char *s, Py_ssize_t len) +{ + Py_ssize_t i; + + /* + newobj = PyString_FromStringAndSize(NULL, len); + if (newobj == NULL) + return NULL; + s_new = PyString_AsString(newobj); + */ + if (0 < len) { + int c = Py_CHARMASK(*s++); + if (ISLOWER(c)) + *result = TOUPPER(c); + else + *result = c; + result++; + } + for (i = 1; i < len; i++) { + int c = Py_CHARMASK(*s++); + if (ISUPPER(c)) + *result = TOLOWER(c); + else + *result = c; + result++; + } +} + + +PyDoc_STRVAR_shared(_Py_swapcase__doc__, +"B.swapcase() -> copy of B\n\ +\n\ +Return a copy of B with uppercase ASCII characters converted\n\ +to lowercase ASCII and vice versa."); + +void +_Py_bytes_swapcase(char *result, char *s, Py_ssize_t len) +{ + Py_ssize_t i; + + /* + newobj = PyString_FromStringAndSize(NULL, len); + if (newobj == NULL) + return NULL; + s_new = PyString_AsString(newobj); + */ + for (i = 0; i < len; i++) { + int c = Py_CHARMASK(*s++); + if (ISLOWER(c)) { + *result = TOUPPER(c); + } + else if (ISUPPER(c)) { + *result = TOLOWER(c); + } + else + *result = c; + result++; + } +} + diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index da2e23f..18d0f57 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -5,11 +5,8 @@ #define PY_SSIZE_T_CLEAN #include "Python.h" #include "structmember.h" +#include "bytes_methods.h" -/* The nullbytes are used by the stringlib during partition. - * If partition is removed from bytes, nullbytes and its helper - * Init/Fini should also be removed. - */ static PyBytesObject *nullbytes = NULL; void @@ -37,15 +34,20 @@ PyBytes_Init(void) static int _getbytevalue(PyObject* arg, int *value) { - PyObject *intarg = PyNumber_Int(arg); - if (! intarg) - return 0; - *value = PyInt_AsLong(intarg); - Py_DECREF(intarg); - if (*value < 0 || *value >= 256) { - PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)"); + long face_value; + + if (PyInt_Check(arg)) { + face_value = PyInt_AsLong(arg); + if (face_value < 0 || face_value >= 256) { + PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)"); + return 0; + } + } else { + PyErr_Format(PyExc_TypeError, "an integer is required"); return 0; } + + *value = face_value; return 1; } @@ -80,9 +82,7 @@ _getbuffer(PyObject *obj, Py_buffer *view) { PyBufferProcs *buffer = Py_Type(obj)->tp_as_buffer; - if (buffer == NULL || - PyUnicode_Check(obj) || - buffer->bf_getbuffer == NULL) + if (buffer == NULL || buffer->bf_getbuffer == NULL) { PyErr_Format(PyExc_TypeError, "Type %.100s doesn't support the buffer API", @@ -1035,13 +1035,18 @@ bytes_dealloc(PyBytesObject *self) #define STRINGLIB_CHAR char #define STRINGLIB_CMP memcmp #define STRINGLIB_LEN PyBytes_GET_SIZE +#define STRINGLIB_STR PyBytes_AS_STRING #define STRINGLIB_NEW PyBytes_FromStringAndSize #define STRINGLIB_EMPTY nullbytes +#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact +#define STRINGLIB_MUTABLE 1 #include "stringlib/fastsearch.h" #include "stringlib/count.h" #include "stringlib/find.h" #include "stringlib/partition.h" +#include "stringlib/ctype.h" +#include "stringlib/transmogrify.h" /* The following Py_LOCAL_INLINE and Py_LOCAL functions @@ -1088,7 +1093,6 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir) return res; } - PyDoc_STRVAR(find__doc__, "B.find(sub [,start [,end]]) -> int\n\ \n\ @@ -1118,27 +1122,25 @@ static PyObject * bytes_count(PyBytesObject *self, PyObject *args) { PyObject *sub_obj; - const char *str = PyBytes_AS_STRING(self), *sub; - Py_ssize_t sub_len; + const char *str = PyBytes_AS_STRING(self); Py_ssize_t start = 0, end = PY_SSIZE_T_MAX; + Py_buffer vsub; + PyObject *count_obj; if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj, _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) return NULL; - if (PyBytes_Check(sub_obj)) { - sub = PyBytes_AS_STRING(sub_obj); - sub_len = PyBytes_GET_SIZE(sub_obj); - } - /* XXX --> use the modern buffer interface */ - else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len)) + if (_getbuffer(sub_obj, &vsub) < 0) return NULL; _adjust_indices(&start, &end, PyBytes_GET_SIZE(self)); - return PyInt_FromSsize_t( - stringlib_count(str + start, end - start, sub, sub_len) + count_obj = PyInt_FromSsize_t( + stringlib_count(str + start, end - start, vsub.buf, vsub.len) ); + PyObject_ReleaseBuffer(sub_obj, &vsub); + return count_obj; } @@ -1210,36 +1212,39 @@ _bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start, Py_ssize_t end, int direction) { Py_ssize_t len = PyBytes_GET_SIZE(self); - Py_ssize_t slen; - const char* sub; const char* str; + Py_buffer vsubstr; + int rv; - if (PyBytes_Check(substr)) { - sub = PyBytes_AS_STRING(substr); - slen = PyBytes_GET_SIZE(substr); - } - /* XXX --> Use the modern buffer interface */ - else if (PyObject_AsCharBuffer(substr, &sub, &slen)) - return -1; str = PyBytes_AS_STRING(self); + if (_getbuffer(substr, &vsubstr) < 0) + return -1; + _adjust_indices(&start, &end, len); if (direction < 0) { /* startswith */ - if (start+slen > len) - return 0; + if (start+vsubstr.len > len) { + rv = 0; + goto done; + } } else { /* endswith */ - if (end-start < slen || start > len) - return 0; + if (end-start < vsubstr.len || start > len) { + rv = 0; + goto done; + } - if (end-slen > start) - start = end - slen; + if (end-vsubstr.len > start) + start = end - vsubstr.len; } - if (end-start >= slen) - return ! memcmp(str+start, sub, slen); - return 0; + if (end-start >= vsubstr.len) + rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len); + +done: + PyObject_ReleaseBuffer(substr, &vsubstr); + return rv; } @@ -1324,7 +1329,6 @@ bytes_endswith(PyBytesObject *self, PyObject *args) } - PyDoc_STRVAR(translate__doc__, "B.translate(table [,deletechars]) -> bytes\n\ \n\ @@ -1340,53 +1344,47 @@ bytes_translate(PyBytesObject *self, PyObject *args) register const char *table; register Py_ssize_t i, c, changed = 0; PyObject *input_obj = (PyObject*)self; - const char *table1, *output_start, *del_table=NULL; - Py_ssize_t inlen, tablen, dellen = 0; + const char *output_start; + Py_ssize_t inlen; PyObject *result; int trans_table[256]; PyObject *tableobj, *delobj = NULL; + Py_buffer vtable, vdel; if (!PyArg_UnpackTuple(args, "translate", 1, 2, &tableobj, &delobj)) return NULL; - if (PyBytes_Check(tableobj)) { - table1 = PyBytes_AS_STRING(tableobj); - tablen = PyBytes_GET_SIZE(tableobj); - } - /* XXX -> Use the modern buffer interface */ - else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen)) + if (_getbuffer(tableobj, &vtable) < 0) return NULL; - if (tablen != 256) { + if (vtable.len != 256) { PyErr_SetString(PyExc_ValueError, "translation table must be 256 characters long"); - return NULL; + result = NULL; + goto done; } if (delobj != NULL) { - if (PyBytes_Check(delobj)) { - del_table = PyBytes_AS_STRING(delobj); - dellen = PyBytes_GET_SIZE(delobj); + if (_getbuffer(delobj, &vdel) < 0) { + result = NULL; + goto done; } - /* XXX -> use the modern buffer interface */ - else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen)) - return NULL; } else { - del_table = NULL; - dellen = 0; + vdel.buf = NULL; + vdel.len = 0; } - table = table1; + table = (const char *)vtable.buf; inlen = PyBytes_GET_SIZE(input_obj); result = PyBytes_FromStringAndSize((char *)NULL, inlen); if (result == NULL) - return NULL; + goto done; output_start = output = PyBytes_AsString(result); input = PyBytes_AS_STRING(input_obj); - if (dellen == 0) { + if (vdel.len == 0) { /* If no deletions are required, use faster code */ for (i = inlen; --i >= 0; ) { c = Py_CHARMASK(*input++); @@ -1394,17 +1392,18 @@ bytes_translate(PyBytesObject *self, PyObject *args) changed = 1; } if (changed || !PyBytes_CheckExact(input_obj)) - return result; + goto done; Py_DECREF(result); Py_INCREF(input_obj); - return input_obj; + result = input_obj; + goto done; } for (i = 0; i < 256; i++) trans_table[i] = Py_CHARMASK(table[i]); - for (i = 0; i < dellen; i++) - trans_table[(int) Py_CHARMASK(del_table[i])] = -1; + for (i = 0; i < vdel.len; i++) + trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1; for (i = inlen; --i >= 0; ) { c = Py_CHARMASK(*input++); @@ -1416,11 +1415,17 @@ bytes_translate(PyBytesObject *self, PyObject *args) if (!changed && PyBytes_CheckExact(input_obj)) { Py_DECREF(result); Py_INCREF(input_obj); - return input_obj; + result = input_obj; + goto done; } /* Fix the size of the resulting string */ if (inlen > 0) PyBytes_Resize(result, output - output_start); + +done: + PyObject_ReleaseBuffer(tableobj, &vtable); + if (delobj != NULL) + PyObject_ReleaseBuffer(delobj, &vdel); return result; } @@ -2021,6 +2026,7 @@ replace(PyBytesObject *self, } } + PyDoc_STRVAR(replace__doc__, "B.replace (old, new[, count]) -> bytes\n\ \n\ @@ -2133,7 +2139,6 @@ split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount) return NULL; } -#define ISSPACE(c) (isspace(Py_CHARMASK(c)) && ((c) & 0x80) == 0) Py_LOCAL_INLINE(PyObject *) split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount) @@ -2459,6 +2464,10 @@ end of the bytes."); static PyObject * bytes_extend(PyBytesObject *self, PyObject *arg) { + /* XXX(gps): The docstring says any iterable int will do but the + * bytes_setslice code only accepts something supporting PEP 3118. + * A list or tuple of 0 <= int <= 255 is supposed to work. */ + /* bug being tracked on: http://bugs.python.org/issue1283 */ if (bytes_setslice(self, Py_Size(self), Py_Size(self), arg) == -1) return NULL; Py_RETURN_NONE; @@ -2852,11 +2861,11 @@ bytes.fromhex('10 2030') -> bytes([0x10, 0x20, 0x30])."); static int hex_digit_to_int(int c) { - if (isdigit(c)) + if (ISDIGIT(c)) return c - '0'; else { - if (isupper(c)) - c = tolower(c); + if (ISUPPER(c)) + c = TOLOWER(c); if (c >= 'a' && c <= 'f') return c - 'a' + 10; } @@ -2866,26 +2875,34 @@ hex_digit_to_int(int c) static PyObject * bytes_fromhex(PyObject *cls, PyObject *args) { - PyObject *newbytes; - char *hex, *buf; - Py_ssize_t len, byteslen, i, j; + PyObject *newbytes, *hexobj; + char *buf; + unsigned char *hex; + Py_ssize_t byteslen, i, j; int top, bot; + Py_buffer vhex; + + if (!PyArg_ParseTuple(args, "O:fromhex", &hexobj)) + return NULL; - if (!PyArg_ParseTuple(args, "s#:fromhex", &hex, &len)) + if (_getbuffer(hexobj, &vhex) < 0) return NULL; - byteslen = len / 2; /* max length if there are no spaces */ + byteslen = vhex.len / 2; /* max length if there are no spaces */ + hex = vhex.buf; newbytes = PyBytes_FromStringAndSize(NULL, byteslen); - if (!newbytes) + if (!newbytes) { + PyObject_ReleaseBuffer(hexobj, &vhex); return NULL; + } buf = PyBytes_AS_STRING(newbytes); - for (i = j = 0; i < len; i += 2) { + for (i = j = 0; i < vhex.len; i += 2) { /* skip over spaces in the input */ while (Py_CHARMASK(hex[i]) == ' ') i++; - if (i >= len) + if (i >= vhex.len) break; top = hex_digit_to_int(Py_CHARMASK(hex[i])); bot = hex_digit_to_int(Py_CHARMASK(hex[i+1])); @@ -2900,10 +2917,12 @@ bytes_fromhex(PyObject *cls, PyObject *args) } if (PyBytes_Resize(newbytes, j) < 0) goto error; + PyObject_ReleaseBuffer(hexobj, &vhex); return newbytes; error: Py_DECREF(newbytes); + PyObject_ReleaseBuffer(hexobj, &vhex); return NULL; } @@ -2955,6 +2974,19 @@ bytes_methods[] = { {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__}, {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS, startswith__doc__}, + {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__}, + {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__}, + {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS, + _Py_capitalize__doc__}, + {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS, + _Py_swapcase__doc__}, + {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,_Py_islower__doc__}, + {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,_Py_isupper__doc__}, + {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,_Py_isspace__doc__}, + {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,_Py_isdigit__doc__}, + {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,_Py_istitle__doc__}, + {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,_Py_isalpha__doc__}, + {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,_Py_isalnum__doc__}, {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__}, {"translate", (PyCFunction)bytes_translate, METH_VARARGS, translate__doc__}, {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__}, @@ -2975,6 +3007,15 @@ bytes_methods[] = { {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS, fromhex_doc}, {"join", (PyCFunction)bytes_join, METH_O, join_doc}, + {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__}, + {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__}, + {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__}, + {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__}, + {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__}, + {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS, + expandtabs__doc__}, + {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS, + splitlines__doc__}, {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc}, {NULL} }; diff --git a/Objects/stringlib/README.txt b/Objects/stringlib/README.txt index 82a8774..aec3441 100644 --- a/Objects/stringlib/README.txt +++ b/Objects/stringlib/README.txt @@ -32,3 +32,12 @@ STRINGLIB_CHAR* STRINGLIB_STR(PyObject*) returns the pointer to the character data for the given string object (which must be of the right type) + +int STRINGLIB_CHECK_EXACT(PyObject *) + + returns true if the object is an instance of our type, not a subclass. + +STRINGLIB_MUTABLE + + Must be 0 or 1 to tell the cpp macros in stringlib code if the object + being operated on is mutable or not. diff --git a/Objects/stringlib/ctype.h b/Objects/stringlib/ctype.h new file mode 100644 index 0000000..8951276 --- /dev/null +++ b/Objects/stringlib/ctype.h @@ -0,0 +1,110 @@ +/* NOTE: this API is -ONLY- for use with single byte character strings. */ +/* Do not use it with Unicode. */ + +#include "bytes_methods.h" + +static PyObject* +stringlib_isspace(PyObject *self) +{ + return _Py_bytes_isspace(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +} + +static PyObject* +stringlib_isalpha(PyObject *self) +{ + return _Py_bytes_isalpha(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +} + +static PyObject* +stringlib_isalnum(PyObject *self) +{ + return _Py_bytes_isalnum(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +} + +static PyObject* +stringlib_isdigit(PyObject *self) +{ + return _Py_bytes_isdigit(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +} + +static PyObject* +stringlib_islower(PyObject *self) +{ + return _Py_bytes_islower(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +} + +static PyObject* +stringlib_isupper(PyObject *self) +{ + return _Py_bytes_isupper(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +} + +static PyObject* +stringlib_istitle(PyObject *self) +{ + return _Py_bytes_istitle(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +} + + +/* functions that return a new object partially translated by ctype funcs: */ + +static PyObject* +stringlib_lower(PyObject *self) +{ + PyObject* newobj; + newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self)); + if (!newobj) + return NULL; + _Py_bytes_lower(STRINGLIB_STR(newobj), STRINGLIB_STR(self), + STRINGLIB_LEN(self)); + return newobj; +} + +static PyObject* +stringlib_upper(PyObject *self) +{ + PyObject* newobj; + newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self)); + if (!newobj) + return NULL; + _Py_bytes_upper(STRINGLIB_STR(newobj), STRINGLIB_STR(self), + STRINGLIB_LEN(self)); + return newobj; +} + +static PyObject* +stringlib_title(PyObject *self) +{ + PyObject* newobj; + newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self)); + if (!newobj) + return NULL; + _Py_bytes_title(STRINGLIB_STR(newobj), STRINGLIB_STR(self), + STRINGLIB_LEN(self)); + return newobj; +} + +static PyObject* +stringlib_capitalize(PyObject *self) +{ + PyObject* newobj; + newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self)); + if (!newobj) + return NULL; + _Py_bytes_capitalize(STRINGLIB_STR(newobj), STRINGLIB_STR(self), + STRINGLIB_LEN(self)); + return newobj; +} + +static PyObject* +stringlib_swapcase(PyObject *self) +{ + PyObject* newobj; + newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self)); + if (!newobj) + return NULL; + _Py_bytes_swapcase(STRINGLIB_STR(newobj), STRINGLIB_STR(self), + STRINGLIB_LEN(self)); + return newobj; +} + diff --git a/Objects/stringlib/find.h b/Objects/stringlib/find.h index 4cdbb09..3b924b6 100644 --- a/Objects/stringlib/find.h +++ b/Objects/stringlib/find.h @@ -90,7 +90,7 @@ stringlib_rfind_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len, return stringlib_rfind(str + start, end - start, sub, sub_len, start); } -#ifdef STRINGLIB_STR +#ifdef STRINGLIB_WANT_CONTAINS_OBJ Py_LOCAL_INLINE(int) stringlib_contains_obj(PyObject* str, PyObject* sub) diff --git a/Objects/stringlib/transmogrify.h b/Objects/stringlib/transmogrify.h new file mode 100644 index 0000000..1ee8e75 --- /dev/null +++ b/Objects/stringlib/transmogrify.h @@ -0,0 +1,362 @@ +/* NOTE: this API is -ONLY- for use with single byte character strings. */ +/* Do not use it with Unicode. */ + +#include "bytes_methods.h" + +#ifndef STRINGLIB_MUTABLE +#warning "STRINGLIB_MUTABLE not defined before #include, assuming 0" +#define STRINGLIB_MUTABLE 0 +#endif + +/* the more complicated methods. parts of these should be pulled out into the + shared code in bytes_methods.c to cut down on duplicate code bloat. */ + +PyDoc_STRVAR(expandtabs__doc__, +"B.expandtabs([tabsize]) -> modified copy of B\n\ +\n\ +Return a copy of B where all tab characters are expanded using spaces.\n\ +If tabsize is not given, a tab size of 8 characters is assumed."); + +static PyObject* +stringlib_expandtabs(PyObject *self, PyObject *args) +{ + const char *e, *p; + char *q; + Py_ssize_t i, j, old_j; + PyObject *u; + int tabsize = 8; + + if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize)) + return NULL; + + /* First pass: determine size of output string */ + i = j = old_j = 0; + e = STRINGLIB_STR(self) + STRINGLIB_LEN(self); + for (p = STRINGLIB_STR(self); p < e; p++) + if (*p == '\t') { + if (tabsize > 0) { + j += tabsize - (j % tabsize); + /* XXX: this depends on a signed integer overflow to < 0 */ + /* C compilers, including gcc, do -NOT- guarantee this. */ + if (old_j > j) { + PyErr_SetString(PyExc_OverflowError, + "result is too long"); + return NULL; + } + old_j = j; + } + } + else { + j++; + if (*p == '\n' || *p == '\r') { + i += j; + old_j = j = 0; + /* XXX: this depends on a signed integer overflow to < 0 */ + /* C compilers, including gcc, do -NOT- guarantee this. */ + if (i < 0) { + PyErr_SetString(PyExc_OverflowError, + "result is too long"); + return NULL; + } + } + } + + if ((i + j) < 0) { + /* XXX: this depends on a signed integer overflow to < 0 */ + /* C compilers, including gcc, do -NOT- guarantee this. */ + PyErr_SetString(PyExc_OverflowError, "result is too long"); + return NULL; + } + + /* Second pass: create output string and fill it */ + u = STRINGLIB_NEW(NULL, i + j); + if (!u) + return NULL; + + j = 0; + q = STRINGLIB_STR(u); + + for (p = STRINGLIB_STR(self); p < e; p++) + if (*p == '\t') { + if (tabsize > 0) { + i = tabsize - (j % tabsize); + j += i; + while (i--) + *q++ = ' '; + } + } + else { + j++; + *q++ = *p; + if (*p == '\n' || *p == '\r') + j = 0; + } + + return u; +} + +Py_LOCAL_INLINE(PyObject *) +pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill) +{ + PyObject *u; + + if (left < 0) + left = 0; + if (right < 0) + right = 0; + + if (left == 0 && right == 0 && STRINGLIB_CHECK_EXACT(self)) { +#if STRINGLIB_MUTABLE + /* We're defined as returning a copy; If the object is mutable + * that means we must make an identical copy. */ + return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +#else + Py_INCREF(self); + return (PyObject *)self; +#endif /* STRINGLIB_MUTABLE */ + } + + u = STRINGLIB_NEW(NULL, + left + STRINGLIB_LEN(self) + right); + if (u) { + if (left) + memset(STRINGLIB_STR(u), fill, left); + Py_MEMCPY(STRINGLIB_STR(u) + left, + STRINGLIB_STR(self), + STRINGLIB_LEN(self)); + if (right) + memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self), + fill, right); + } + + return u; +} + +PyDoc_STRVAR(ljust__doc__, +"B.ljust(width[, fillchar]) -> modified copy of B\n" +"\n" +"Return B left justified in a string of length width. Padding is\n" +"done using the specified fill character (default is a space)."); + +static PyObject * +stringlib_ljust(PyObject *self, PyObject *args) +{ + Py_ssize_t width; + char fillchar = ' '; + + if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar)) + return NULL; + + if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) { +#if STRINGLIB_MUTABLE + /* We're defined as returning a copy; If the object is mutable + * that means we must make an identical copy. */ + return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +#else + Py_INCREF(self); + return (PyObject*) self; +#endif + } + + return pad(self, 0, width - STRINGLIB_LEN(self), fillchar); +} + + +PyDoc_STRVAR(rjust__doc__, +"B.rjust(width[, fillchar]) -> modified copy of B\n" +"\n" +"Return B right justified in a string of length width. Padding is\n" +"done using the specified fill character (default is a space)"); + +static PyObject * +stringlib_rjust(PyObject *self, PyObject *args) +{ + Py_ssize_t width; + char fillchar = ' '; + + if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar)) + return NULL; + + if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) { +#if STRINGLIB_MUTABLE + /* We're defined as returning a copy; If the object is mutable + * that means we must make an identical copy. */ + return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +#else + Py_INCREF(self); + return (PyObject*) self; +#endif + } + + return pad(self, width - STRINGLIB_LEN(self), 0, fillchar); +} + + +PyDoc_STRVAR(center__doc__, +"B.center(width[, fillchar]) -> modified copy of B\n" +"\n" +"Return B centered in a string of length width. Padding is\n" +"done using the specified fill character (default is a space)"); + +static PyObject * +stringlib_center(PyObject *self, PyObject *args) +{ + Py_ssize_t marg, left; + Py_ssize_t width; + char fillchar = ' '; + + if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar)) + return NULL; + + if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) { +#if STRINGLIB_MUTABLE + /* We're defined as returning a copy; If the object is mutable + * that means we must make an identical copy. */ + return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +#else + Py_INCREF(self); + return (PyObject*) self; +#endif + } + + marg = width - STRINGLIB_LEN(self); + left = marg / 2 + (marg & width & 1); + + return pad(self, left, marg - left, fillchar); +} + +PyDoc_STRVAR(zfill__doc__, +"B.zfill(width) -> modified copy of B\n" +"\n" +"Pad a numeric string B with zeros on the left, to fill a field\n" +"of the specified width. B is never truncated."); + +static PyObject * +stringlib_zfill(PyObject *self, PyObject *args) +{ + Py_ssize_t fill; + PyObject *s; + char *p; + Py_ssize_t width; + + if (!PyArg_ParseTuple(args, "n:zfill", &width)) + return NULL; + + if (STRINGLIB_LEN(self) >= width) { + if (STRINGLIB_CHECK_EXACT(self)) { +#if STRINGLIB_MUTABLE + /* We're defined as returning a copy; If the object is mutable + * that means we must make an identical copy. */ + return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +#else + Py_INCREF(self); + return (PyObject*) self; +#endif + } + else + return STRINGLIB_NEW( + STRINGLIB_STR(self), + STRINGLIB_LEN(self) + ); + } + + fill = width - STRINGLIB_LEN(self); + + s = pad(self, fill, 0, '0'); + + if (s == NULL) + return NULL; + + p = STRINGLIB_STR(s); + if (p[fill] == '+' || p[fill] == '-') { + /* move sign to beginning of string */ + p[0] = p[fill]; + p[fill] = '0'; + } + + return (PyObject*) s; +} + + +#define _STRINGLIB_SPLIT_APPEND(data, left, right) \ + str = STRINGLIB_NEW((data) + (left), \ + (right) - (left)); \ + if (str == NULL) \ + goto onError; \ + if (PyList_Append(list, str)) { \ + Py_DECREF(str); \ + goto onError; \ + } \ + else \ + Py_DECREF(str); + +PyDoc_STRVAR(splitlines__doc__, +"B.splitlines([keepends]) -> list of lines\n\ +\n\ +Return a list of the lines in B, breaking at line boundaries.\n\ +Line breaks are not included in the resulting list unless keepends\n\ +is given and true."); + +static PyObject* +stringlib_splitlines(PyObject *self, PyObject *args) +{ + register Py_ssize_t i; + register Py_ssize_t j; + Py_ssize_t len; + int keepends = 0; + PyObject *list; + PyObject *str; + char *data; + + if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends)) + return NULL; + + data = STRINGLIB_STR(self); + len = STRINGLIB_LEN(self); + + /* This does not use the preallocated list because splitlines is + usually run with hundreds of newlines. The overhead of + switching between PyList_SET_ITEM and append causes about a + 2-3% slowdown for that common case. A smarter implementation + could move the if check out, so the SET_ITEMs are done first + and the appends only done when the prealloc buffer is full. + That's too much work for little gain.*/ + + list = PyList_New(0); + if (!list) + goto onError; + + for (i = j = 0; i < len; ) { + Py_ssize_t eol; + + /* Find a line and append it */ + while (i < len && data[i] != '\n' && data[i] != '\r') + i++; + + /* Skip the line break reading CRLF as one line break */ + eol = i; + if (i < len) { + if (data[i] == '\r' && i + 1 < len && + data[i+1] == '\n') + i += 2; + else + i++; + if (keepends) + eol = i; + } + _STRINGLIB_SPLIT_APPEND(data, j, eol); + j = i; + } + if (j < len) { + _STRINGLIB_SPLIT_APPEND(data, j, len); + } + + return list; + + onError: + Py_XDECREF(list); + return NULL; +} + +#undef _STRINGLIB_SPLIT_APPEND + diff --git a/Objects/stringlib/unicodedefs.h b/Objects/stringlib/unicodedefs.h index 25c1d4f..fa6140f 100644 --- a/Objects/stringlib/unicodedefs.h +++ b/Objects/stringlib/unicodedefs.h @@ -22,6 +22,8 @@ #define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_TOSTR PyObject_Unicode +#define STRINGLIB_WANT_CONTAINS_OBJ 1 + /* STRINGLIB_CMP was defined as: Py_LOCAL_INLINE(int) diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 3b4ddaf..83e3cfa 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -4,256 +4,7 @@ #include "Python.h" -/* Our own locale-independent ctype.h-like macros */ -/* XXX Move into a header file? */ - -#define FLAG_LOWER 0x01 -#define FLAG_UPPER 0x02 -#define FLAG_ALPHA (FLAG_LOWER|FLAG_UPPER) -#define FLAG_DIGIT 0x04 -#define FLAG_ALNUM (FLAG_ALPHA|FLAG_DIGIT) -#define FLAG_SPACE 0x08 -#define FLAG_XDIGIT 0x10 - -static unsigned int ctype_table[256] = { - 0, /* 0x0 '\x00' */ - 0, /* 0x1 '\x01' */ - 0, /* 0x2 '\x02' */ - 0, /* 0x3 '\x03' */ - 0, /* 0x4 '\x04' */ - 0, /* 0x5 '\x05' */ - 0, /* 0x6 '\x06' */ - 0, /* 0x7 '\x07' */ - 0, /* 0x8 '\x08' */ - FLAG_SPACE, /* 0x9 '\t' */ - FLAG_SPACE, /* 0xa '\n' */ - FLAG_SPACE, /* 0xb '\v' */ - FLAG_SPACE, /* 0xc '\f' */ - FLAG_SPACE, /* 0xd '\r' */ - 0, /* 0xe '\x0e' */ - 0, /* 0xf '\x0f' */ - 0, /* 0x10 '\x10' */ - 0, /* 0x11 '\x11' */ - 0, /* 0x12 '\x12' */ - 0, /* 0x13 '\x13' */ - 0, /* 0x14 '\x14' */ - 0, /* 0x15 '\x15' */ - 0, /* 0x16 '\x16' */ - 0, /* 0x17 '\x17' */ - 0, /* 0x18 '\x18' */ - 0, /* 0x19 '\x19' */ - 0, /* 0x1a '\x1a' */ - 0, /* 0x1b '\x1b' */ - 0, /* 0x1c '\x1c' */ - 0, /* 0x1d '\x1d' */ - 0, /* 0x1e '\x1e' */ - 0, /* 0x1f '\x1f' */ - FLAG_SPACE, /* 0x20 ' ' */ - 0, /* 0x21 '!' */ - 0, /* 0x22 '"' */ - 0, /* 0x23 '#' */ - 0, /* 0x24 '$' */ - 0, /* 0x25 '%' */ - 0, /* 0x26 '&' */ - 0, /* 0x27 "'" */ - 0, /* 0x28 '(' */ - 0, /* 0x29 ')' */ - 0, /* 0x2a '*' */ - 0, /* 0x2b '+' */ - 0, /* 0x2c ',' */ - 0, /* 0x2d '-' */ - 0, /* 0x2e '.' */ - 0, /* 0x2f '/' */ - FLAG_DIGIT|FLAG_XDIGIT, /* 0x30 '0' */ - FLAG_DIGIT|FLAG_XDIGIT, /* 0x31 '1' */ - FLAG_DIGIT|FLAG_XDIGIT, /* 0x32 '2' */ - FLAG_DIGIT|FLAG_XDIGIT, /* 0x33 '3' */ - FLAG_DIGIT|FLAG_XDIGIT, /* 0x34 '4' */ - FLAG_DIGIT|FLAG_XDIGIT, /* 0x35 '5' */ - FLAG_DIGIT|FLAG_XDIGIT, /* 0x36 '6' */ - FLAG_DIGIT|FLAG_XDIGIT, /* 0x37 '7' */ - FLAG_DIGIT|FLAG_XDIGIT, /* 0x38 '8' */ - FLAG_DIGIT|FLAG_XDIGIT, /* 0x39 '9' */ - 0, /* 0x3a ':' */ - 0, /* 0x3b ';' */ - 0, /* 0x3c '<' */ - 0, /* 0x3d '=' */ - 0, /* 0x3e '>' */ - 0, /* 0x3f '?' */ - 0, /* 0x40 '@' */ - FLAG_UPPER|FLAG_XDIGIT, /* 0x41 'A' */ - FLAG_UPPER|FLAG_XDIGIT, /* 0x42 'B' */ - FLAG_UPPER|FLAG_XDIGIT, /* 0x43 'C' */ - FLAG_UPPER|FLAG_XDIGIT, /* 0x44 'D' */ - FLAG_UPPER|FLAG_XDIGIT, /* 0x45 'E' */ - FLAG_UPPER|FLAG_XDIGIT, /* 0x46 'F' */ - FLAG_UPPER, /* 0x47 'G' */ - FLAG_UPPER, /* 0x48 'H' */ - FLAG_UPPER, /* 0x49 'I' */ - FLAG_UPPER, /* 0x4a 'J' */ - FLAG_UPPER, /* 0x4b 'K' */ - FLAG_UPPER, /* 0x4c 'L' */ - FLAG_UPPER, /* 0x4d 'M' */ - FLAG_UPPER, /* 0x4e 'N' */ - FLAG_UPPER, /* 0x4f 'O' */ - FLAG_UPPER, /* 0x50 'P' */ - FLAG_UPPER, /* 0x51 'Q' */ - FLAG_UPPER, /* 0x52 'R' */ - FLAG_UPPER, /* 0x53 'S' */ - FLAG_UPPER, /* 0x54 'T' */ - FLAG_UPPER, /* 0x55 'U' */ - FLAG_UPPER, /* 0x56 'V' */ - FLAG_UPPER, /* 0x57 'W' */ - FLAG_UPPER, /* 0x58 'X' */ - FLAG_UPPER, /* 0x59 'Y' */ - FLAG_UPPER, /* 0x5a 'Z' */ - 0, /* 0x5b '[' */ - 0, /* 0x5c '\\' */ - 0, /* 0x5d ']' */ - 0, /* 0x5e '^' */ - 0, /* 0x5f '_' */ - 0, /* 0x60 '`' */ - FLAG_LOWER|FLAG_XDIGIT, /* 0x61 'a' */ - FLAG_LOWER|FLAG_XDIGIT, /* 0x62 'b' */ - FLAG_LOWER|FLAG_XDIGIT, /* 0x63 'c' */ - FLAG_LOWER|FLAG_XDIGIT, /* 0x64 'd' */ - FLAG_LOWER|FLAG_XDIGIT, /* 0x65 'e' */ - FLAG_LOWER|FLAG_XDIGIT, /* 0x66 'f' */ - FLAG_LOWER, /* 0x67 'g' */ - FLAG_LOWER, /* 0x68 'h' */ - FLAG_LOWER, /* 0x69 'i' */ - FLAG_LOWER, /* 0x6a 'j' */ - FLAG_LOWER, /* 0x6b 'k' */ - FLAG_LOWER, /* 0x6c 'l' */ - FLAG_LOWER, /* 0x6d 'm' */ - FLAG_LOWER, /* 0x6e 'n' */ - FLAG_LOWER, /* 0x6f 'o' */ - FLAG_LOWER, /* 0x70 'p' */ - FLAG_LOWER, /* 0x71 'q' */ - FLAG_LOWER, /* 0x72 'r' */ - FLAG_LOWER, /* 0x73 's' */ - FLAG_LOWER, /* 0x74 't' */ - FLAG_LOWER, /* 0x75 'u' */ - FLAG_LOWER, /* 0x76 'v' */ - FLAG_LOWER, /* 0x77 'w' */ - FLAG_LOWER, /* 0x78 'x' */ - FLAG_LOWER, /* 0x79 'y' */ - FLAG_LOWER, /* 0x7a 'z' */ - 0, /* 0x7b '{' */ - 0, /* 0x7c '|' */ - 0, /* 0x7d '}' */ - 0, /* 0x7e '~' */ - 0, /* 0x7f '\x7f' */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -#define ISLOWER(c) (ctype_table[Py_CHARMASK(c)] & FLAG_LOWER) -#define ISUPPER(c) (ctype_table[Py_CHARMASK(c)] & FLAG_UPPER) -#define ISALPHA(c) (ctype_table[Py_CHARMASK(c)] & FLAG_ALPHA) -#define ISDIGIT(c) (ctype_table[Py_CHARMASK(c)] & FLAG_DIGIT) -#define ISXDIGIT(c) (ctype_table[Py_CHARMASK(c)] & FLAG_XDIGIT) -#define ISALNUM(c) (ctype_table[Py_CHARMASK(c)] & FLAG_ALNUM) -#define ISSPACE(c) (ctype_table[Py_CHARMASK(c)] & FLAG_SPACE) - -#undef islower -#define islower(c) undefined_islower(c) -#undef isupper -#define isupper(c) undefined_isupper(c) -#undef isalpha -#define isalpha(c) undefined_isalpha(c) -#undef isdigit -#define isdigit(c) undefined_isdigit(c) -#undef isxdigit -#define isxdigit(c) undefined_isxdigit(c) -#undef isalnum -#define isalnum(c) undefined_isalnum(c) -#undef isspace -#define isspace(c) undefined_isspace(c) - -static unsigned char ctype_tolower[256] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, - 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, - 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, - 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, - 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, - 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, - 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, - 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, - 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, - 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, - 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, - 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, - 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, - 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, - 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, - 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, - 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, - 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, - 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, - 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, - 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, - 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, - 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, -}; - -static unsigned char ctype_toupper[256] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, - 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, - 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, - 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, - 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, - 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, - 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, - 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, - 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, - 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, - 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, - 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, - 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, - 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, - 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, - 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, - 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, - 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, - 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, - 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, - 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, - 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, - 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, - 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, - 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, - 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, - 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, - 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, - 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, -}; - -#define TOLOWER(c) (ctype_tolower[Py_CHARMASK(c)]) -#define TOUPPER(c) (ctype_toupper[Py_CHARMASK(c)]) - -#undef tolower -#define tolower(c) undefined_tolower(c) -#undef toupper -#define toupper(c) undefined_toupper(c) +#include "bytes_methods.h" #ifdef COUNT_ALLOCS int null_strings, one_strings; @@ -1002,14 +753,19 @@ PyString_AsStringAndSize(register PyObject *obj, #define STRINGLIB_LEN PyString_GET_SIZE #define STRINGLIB_NEW PyString_FromStringAndSize #define STRINGLIB_STR PyString_AS_STRING +#define STRINGLIB_WANT_CONTAINS_OBJ 1 #define STRINGLIB_EMPTY nullstring +#define STRINGLIB_CHECK_EXACT PyString_CheckExact +#define STRINGLIB_MUTABLE 0 #include "stringlib/fastsearch.h" #include "stringlib/count.h" #include "stringlib/find.h" #include "stringlib/partition.h" +#include "stringlib/ctype.h" +#include "stringlib/transmogrify.h" PyObject * @@ -1466,18 +1222,6 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"}; #define PREALLOC_SIZE(maxsplit) \ (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1) -#define SPLIT_APPEND(data, left, right) \ - str = PyString_FromStringAndSize((data) + (left), \ - (right) - (left)); \ - if (str == NULL) \ - goto onError; \ - if (PyList_Append(list, str)) { \ - Py_DECREF(str); \ - goto onError; \ - } \ - else \ - Py_DECREF(str); - #define SPLIT_ADD(data, left, right) { \ str = PyString_FromStringAndSize((data) + (left), \ (right) - (left)); \ @@ -1839,6 +1583,10 @@ onError: return NULL; } +#undef SPLIT_ADD +#undef MAX_PREALLOC +#undef PREALLOC_SIZE + PyDoc_STRVAR(join__doc__, "S.join(sequence) -> string\n\ @@ -2222,136 +1970,6 @@ string_rstrip(PyStringObject *self, PyObject *args) } -PyDoc_STRVAR(lower__doc__, -"S.lower() -> string\n\ -\n\ -Return a copy of the string S converted to lowercase."); - -static PyObject * -string_lower(PyStringObject *self) -{ - char *s; - Py_ssize_t i, n = PyString_GET_SIZE(self); - PyObject *newobj; - - newobj = PyString_FromStringAndSize(NULL, n); - if (!newobj) - return NULL; - - s = PyString_AS_STRING(newobj); - - Py_MEMCPY(s, PyString_AS_STRING(self), n); - - for (i = 0; i < n; i++) { - int c = Py_CHARMASK(s[i]); - if (ISUPPER(c)) - s[i] = TOLOWER(c); - } - - return newobj; -} - -PyDoc_STRVAR(upper__doc__, -"S.upper() -> string\n\ -\n\ -Return a copy of the string S converted to uppercase."); - -static PyObject * -string_upper(PyStringObject *self) -{ - char *s; - Py_ssize_t i, n = PyString_GET_SIZE(self); - PyObject *newobj; - - newobj = PyString_FromStringAndSize(NULL, n); - if (!newobj) - return NULL; - - s = PyString_AS_STRING(newobj); - - Py_MEMCPY(s, PyString_AS_STRING(self), n); - - for (i = 0; i < n; i++) { - int c = Py_CHARMASK(s[i]); - if (ISLOWER(c)) - s[i] = TOUPPER(c); - } - - return newobj; -} - -PyDoc_STRVAR(title__doc__, -"S.title() -> string\n\ -\n\ -Return a titlecased version of S, i.e. words start with uppercase\n\ -characters, all remaining cased characters have lowercase."); - -static PyObject* -string_title(PyStringObject *self) -{ - char *s = PyString_AS_STRING(self), *s_new; - Py_ssize_t i, n = PyString_GET_SIZE(self); - int previous_is_cased = 0; - PyObject *newobj; - - newobj = PyString_FromStringAndSize(NULL, n); - if (newobj == NULL) - return NULL; - s_new = PyString_AsString(newobj); - for (i = 0; i < n; i++) { - int c = Py_CHARMASK(*s++); - if (ISLOWER(c)) { - if (!previous_is_cased) - c = TOUPPER(c); - previous_is_cased = 1; - } else if (ISUPPER(c)) { - if (previous_is_cased) - c = TOLOWER(c); - previous_is_cased = 1; - } else - previous_is_cased = 0; - *s_new++ = c; - } - return newobj; -} - -PyDoc_STRVAR(capitalize__doc__, -"S.capitalize() -> string\n\ -\n\ -Return a copy of the string S with only its first character\n\ -capitalized."); - -static PyObject * -string_capitalize(PyStringObject *self) -{ - char *s = PyString_AS_STRING(self), *s_new; - Py_ssize_t i, n = PyString_GET_SIZE(self); - PyObject *newobj; - - newobj = PyString_FromStringAndSize(NULL, n); - if (newobj == NULL) - return NULL; - s_new = PyString_AsString(newobj); - if (0 < n) { - int c = Py_CHARMASK(*s++); - if (ISLOWER(c)) - *s_new = TOUPPER(c); - else - *s_new = c; - s_new++; - } - for (i = 1; i < n; i++) { - int c = Py_CHARMASK(*s++); - if (ISUPPER(c)) - *s_new = TOLOWER(c); - else - *s_new = c; - s_new++; - } - return newobj; -} - - PyDoc_STRVAR(count__doc__, "S.count(sub[, start[, end]]) -> int\n\ \n\ @@ -2393,38 +2011,6 @@ string_count(PyStringObject *self, PyObject *args) ); } -PyDoc_STRVAR(swapcase__doc__, -"S.swapcase() -> string\n\ -\n\ -Return a copy of the string S with uppercase characters\n\ -converted to lowercase and vice versa."); - -static PyObject * -string_swapcase(PyStringObject *self) -{ - char *s = PyString_AS_STRING(self), *s_new; - Py_ssize_t i, n = PyString_GET_SIZE(self); - PyObject *newobj; - - newobj = PyString_FromStringAndSize(NULL, n); - if (newobj == NULL) - return NULL; - s_new = PyString_AsString(newobj); - for (i = 0; i < n; i++) { - int c = Py_CHARMASK(*s++); - if (ISLOWER(c)) { - *s_new = TOUPPER(c); - } - else if (ISUPPER(c)) { - *s_new = TOLOWER(c); - } - else - *s_new = c; - s_new++; - } - return newobj; -} - PyDoc_STRVAR(translate__doc__, "S.translate(table [,deletechars]) -> string\n\ @@ -3380,548 +2966,6 @@ string_decode(PyStringObject *self, PyObject *args) } -PyDoc_STRVAR(expandtabs__doc__, -"S.expandtabs([tabsize]) -> string\n\ -\n\ -Return a copy of S where all tab characters are expanded using spaces.\n\ -If tabsize is not given, a tab size of 8 characters is assumed."); - -static PyObject* -string_expandtabs(PyStringObject *self, PyObject *args) -{ - const char *e, *p; - char *q; - Py_ssize_t i, j, old_j; - PyObject *u; - int tabsize = 8; - - if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize)) - return NULL; - - /* First pass: determine size of output string */ - i = j = old_j = 0; - e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); - for (p = PyString_AS_STRING(self); p < e; p++) - if (*p == '\t') { - if (tabsize > 0) { - j += tabsize - (j % tabsize); - if (old_j > j) { - PyErr_SetString(PyExc_OverflowError, - "new string is too long"); - return NULL; - } - old_j = j; - } - } - else { - j++; - if (*p == '\n' || *p == '\r') { - i += j; - old_j = j = 0; - if (i < 0) { - PyErr_SetString(PyExc_OverflowError, - "new string is too long"); - return NULL; - } - } - } - - if ((i + j) < 0) { - PyErr_SetString(PyExc_OverflowError, "new string is too long"); - return NULL; - } - - /* Second pass: create output string and fill it */ - u = PyString_FromStringAndSize(NULL, i + j); - if (!u) - return NULL; - - j = 0; - q = PyString_AS_STRING(u); - - for (p = PyString_AS_STRING(self); p < e; p++) - if (*p == '\t') { - if (tabsize > 0) { - i = tabsize - (j % tabsize); - j += i; - while (i--) - *q++ = ' '; - } - } - else { - j++; - *q++ = *p; - if (*p == '\n' || *p == '\r') - j = 0; - } - - return u; -} - -Py_LOCAL_INLINE(PyObject *) -pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill) -{ - PyObject *u; - - if (left < 0) - left = 0; - if (right < 0) - right = 0; - - if (left == 0 && right == 0 && PyString_CheckExact(self)) { - Py_INCREF(self); - return (PyObject *)self; - } - - u = PyString_FromStringAndSize(NULL, - left + PyString_GET_SIZE(self) + right); - if (u) { - if (left) - memset(PyString_AS_STRING(u), fill, left); - Py_MEMCPY(PyString_AS_STRING(u) + left, - PyString_AS_STRING(self), - PyString_GET_SIZE(self)); - if (right) - memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self), - fill, right); - } - - return u; -} - -PyDoc_STRVAR(ljust__doc__, -"S.ljust(width[, fillchar]) -> string\n" -"\n" -"Return S left justified in a string of length width. Padding is\n" -"done using the specified fill character (default is a space)."); - -static PyObject * -string_ljust(PyStringObject *self, PyObject *args) -{ - Py_ssize_t width; - char fillchar = ' '; - - if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar)) - return NULL; - - if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) { - Py_INCREF(self); - return (PyObject*) self; - } - - return pad(self, 0, width - PyString_GET_SIZE(self), fillchar); -} - - -PyDoc_STRVAR(rjust__doc__, -"S.rjust(width[, fillchar]) -> string\n" -"\n" -"Return S right justified in a string of length width. Padding is\n" -"done using the specified fill character (default is a space)"); - -static PyObject * -string_rjust(PyStringObject *self, PyObject *args) -{ - Py_ssize_t width; - char fillchar = ' '; - - if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar)) - return NULL; - - if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) { - Py_INCREF(self); - return (PyObject*) self; - } - - return pad(self, width - PyString_GET_SIZE(self), 0, fillchar); -} - - -PyDoc_STRVAR(center__doc__, -"S.center(width[, fillchar]) -> string\n" -"\n" -"Return S centered in a string of length width. Padding is\n" -"done using the specified fill character (default is a space)"); - -static PyObject * -string_center(PyStringObject *self, PyObject *args) -{ - Py_ssize_t marg, left; - Py_ssize_t width; - char fillchar = ' '; - - if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar)) - return NULL; - - if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) { - Py_INCREF(self); - return (PyObject*) self; - } - - marg = width - PyString_GET_SIZE(self); - left = marg / 2 + (marg & width & 1); - - return pad(self, left, marg - left, fillchar); -} - -PyDoc_STRVAR(zfill__doc__, -"S.zfill(width) -> string\n" -"\n" -"Pad a numeric string S with zeros on the left, to fill a field\n" -"of the specified width. The string S is never truncated."); - -static PyObject * -string_zfill(PyStringObject *self, PyObject *args) -{ - Py_ssize_t fill; - PyObject *s; - char *p; - Py_ssize_t width; - - if (!PyArg_ParseTuple(args, "n:zfill", &width)) - return NULL; - - if (PyString_GET_SIZE(self) >= width) { - if (PyString_CheckExact(self)) { - Py_INCREF(self); - return (PyObject*) self; - } - else - return PyString_FromStringAndSize( - PyString_AS_STRING(self), - PyString_GET_SIZE(self) - ); - } - - fill = width - PyString_GET_SIZE(self); - - s = pad(self, fill, 0, '0'); - - if (s == NULL) - return NULL; - - p = PyString_AS_STRING(s); - if (p[fill] == '+' || p[fill] == '-') { - /* move sign to beginning of string */ - p[0] = p[fill]; - p[fill] = '0'; - } - - return (PyObject*) s; -} - -PyDoc_STRVAR(isspace__doc__, -"S.isspace() -> bool\n\ -\n\ -Return True if all characters in S are whitespace\n\ -and there is at least one character in S, False otherwise."); - -static PyObject* -string_isspace(PyStringObject *self) -{ - register const unsigned char *p - = (unsigned char *) PyString_AS_STRING(self); - register const unsigned char *e; - - /* Shortcut for single character strings */ - if (PyString_GET_SIZE(self) == 1 && - ISSPACE(*p)) - return PyBool_FromLong(1); - - /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) - return PyBool_FromLong(0); - - e = p + PyString_GET_SIZE(self); - for (; p < e; p++) { - if (!ISSPACE(*p)) - return PyBool_FromLong(0); - } - return PyBool_FromLong(1); -} - - -PyDoc_STRVAR(isalpha__doc__, -"S.isalpha() -> bool\n\ -\n\ -Return True if all characters in S are alphabetic\n\ -and there is at least one character in S, False otherwise."); - -static PyObject* -string_isalpha(PyStringObject *self) -{ - register const unsigned char *p - = (unsigned char *) PyString_AS_STRING(self); - register const unsigned char *e; - - /* Shortcut for single character strings */ - if (PyString_GET_SIZE(self) == 1 && - ISALPHA(*p)) - return PyBool_FromLong(1); - - /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) - return PyBool_FromLong(0); - - e = p + PyString_GET_SIZE(self); - for (; p < e; p++) { - if (!ISALPHA(*p)) - return PyBool_FromLong(0); - } - return PyBool_FromLong(1); -} - - -PyDoc_STRVAR(isalnum__doc__, -"S.isalnum() -> bool\n\ -\n\ -Return True if all characters in S are alphanumeric\n\ -and there is at least one character in S, False otherwise."); - -static PyObject* -string_isalnum(PyStringObject *self) -{ - register const unsigned char *p - = (unsigned char *) PyString_AS_STRING(self); - register const unsigned char *e; - - /* Shortcut for single character strings */ - if (PyString_GET_SIZE(self) == 1 && ISALNUM(*p)) - return PyBool_FromLong(1); - - /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) - return PyBool_FromLong(0); - - e = p + PyString_GET_SIZE(self); - for (; p < e; p++) { - if (!ISALNUM(*p)) - return PyBool_FromLong(0); - } - return PyBool_FromLong(1); -} - - -PyDoc_STRVAR(isdigit__doc__, -"S.isdigit() -> bool\n\ -\n\ -Return True if all characters in S are digits\n\ -and there is at least one character in S, False otherwise."); - -static PyObject* -string_isdigit(PyStringObject *self) -{ - register const unsigned char *p - = (unsigned char *) PyString_AS_STRING(self); - register const unsigned char *e; - - /* Shortcut for single character strings */ - if (PyString_GET_SIZE(self) == 1 && ISDIGIT(*p)) - return PyBool_FromLong(1); - - /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) - return PyBool_FromLong(0); - - e = p + PyString_GET_SIZE(self); - for (; p < e; p++) { - if (!ISDIGIT(*p)) - return PyBool_FromLong(0); - } - return PyBool_FromLong(1); -} - - -PyDoc_STRVAR(islower__doc__, -"S.islower() -> bool\n\ -\n\ -Return True if all cased characters in S are lowercase and there is\n\ -at least one cased character in S, False otherwise."); - -static PyObject* -string_islower(PyStringObject *self) -{ - register const unsigned char *p - = (unsigned char *) PyString_AS_STRING(self); - register const unsigned char *e; - int cased; - - /* Shortcut for single character strings */ - if (PyString_GET_SIZE(self) == 1) - return PyBool_FromLong(ISLOWER(*p)); - - /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) - return PyBool_FromLong(0); - - e = p + PyString_GET_SIZE(self); - cased = 0; - for (; p < e; p++) { - if (ISUPPER(*p)) - return PyBool_FromLong(0); - else if (!cased && ISLOWER(*p)) - cased = 1; - } - return PyBool_FromLong(cased); -} - - -PyDoc_STRVAR(isupper__doc__, -"S.isupper() -> bool\n\ -\n\ -Return True if all cased characters in S are uppercase and there is\n\ -at least one cased character in S, False otherwise."); - -static PyObject* -string_isupper(PyStringObject *self) -{ - register const unsigned char *p - = (unsigned char *) PyString_AS_STRING(self); - register const unsigned char *e; - int cased; - - /* Shortcut for single character strings */ - if (PyString_GET_SIZE(self) == 1) - return PyBool_FromLong(ISUPPER(*p)); - - /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) - return PyBool_FromLong(0); - - e = p + PyString_GET_SIZE(self); - cased = 0; - for (; p < e; p++) { - if (ISLOWER(*p)) - return PyBool_FromLong(0); - else if (!cased && ISUPPER(*p)) - cased = 1; - } - return PyBool_FromLong(cased); -} - - -PyDoc_STRVAR(istitle__doc__, -"S.istitle() -> bool\n\ -\n\ -Return True if S is a titlecased string and there is at least one\n\ -character in S, i.e. uppercase characters may only follow uncased\n\ -characters and lowercase characters only cased ones. Return False\n\ -otherwise."); - -static PyObject* -string_istitle(PyStringObject *self, PyObject *uncased) -{ - register const unsigned char *p - = (unsigned char *) PyString_AS_STRING(self); - register const unsigned char *e; - int cased, previous_is_cased; - - /* Shortcut for single character strings */ - if (PyString_GET_SIZE(self) == 1) - return PyBool_FromLong(ISUPPER(*p)); - - /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) - return PyBool_FromLong(0); - - e = p + PyString_GET_SIZE(self); - cased = 0; - previous_is_cased = 0; - for (; p < e; p++) { - register const unsigned char ch = *p; - - if (ISUPPER(ch)) { - if (previous_is_cased) - return PyBool_FromLong(0); - previous_is_cased = 1; - cased = 1; - } - else if (ISLOWER(ch)) { - if (!previous_is_cased) - return PyBool_FromLong(0); - previous_is_cased = 1; - cased = 1; - } - else - previous_is_cased = 0; - } - return PyBool_FromLong(cased); -} - - -PyDoc_STRVAR(splitlines__doc__, -"S.splitlines([keepends]) -> list of strings\n\ -\n\ -Return a list of the lines in S, breaking at line boundaries.\n\ -Line breaks are not included in the resulting list unless keepends\n\ -is given and true."); - -static PyObject* -string_splitlines(PyStringObject *self, PyObject *args) -{ - register Py_ssize_t i; - register Py_ssize_t j; - Py_ssize_t len; - int keepends = 0; - PyObject *list; - PyObject *str; - char *data; - - if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends)) - return NULL; - - data = PyString_AS_STRING(self); - len = PyString_GET_SIZE(self); - - /* This does not use the preallocated list because splitlines is - usually run with hundreds of newlines. The overhead of - switching between PyList_SET_ITEM and append causes about a - 2-3% slowdown for that common case. A smarter implementation - could move the if check out, so the SET_ITEMs are done first - and the appends only done when the prealloc buffer is full. - That's too much work for little gain.*/ - - list = PyList_New(0); - if (!list) - goto onError; - - for (i = j = 0; i < len; ) { - Py_ssize_t eol; - - /* Find a line and append it */ - while (i < len && data[i] != '\n' && data[i] != '\r') - i++; - - /* Skip the line break reading CRLF as one line break */ - eol = i; - if (i < len) { - if (data[i] == '\r' && i + 1 < len && - data[i+1] == '\n') - i += 2; - else - i++; - if (keepends) - eol = i; - } - SPLIT_APPEND(data, j, eol); - j = i; - } - if (j < len) { - SPLIT_APPEND(data, j, len); - } - - return list; - - onError: - Py_XDECREF(list); - return NULL; -} - -#undef SPLIT_APPEND -#undef SPLIT_ADD -#undef MAX_PREALLOC -#undef PREALLOC_SIZE - static PyObject * string_getnewargs(PyStringObject *v) { @@ -3934,17 +2978,24 @@ string_methods[] = { {"join", (PyCFunction)string_join, METH_O, join__doc__}, {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__}, {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__}, - {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__}, - {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__}, - {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__}, - {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__}, - {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__}, - {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__}, - {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__}, - {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__}, - {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__}, - {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS, - capitalize__doc__}, + {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__}, + {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__}, + {"islower", (PyCFunction)stringlib_islower, METH_NOARGS, + _Py_islower__doc__}, + {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS, + _Py_isupper__doc__}, + {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS, + _Py_isspace__doc__}, + {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS, + _Py_isdigit__doc__}, + {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS, + _Py_istitle__doc__}, + {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS, + _Py_isalpha__doc__}, + {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS, + _Py_isalnum__doc__}, + {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS, + _Py_capitalize__doc__}, {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__}, {"endswith", (PyCFunction)string_endswith, METH_VARARGS, endswith__doc__}, @@ -3961,20 +3012,20 @@ string_methods[] = { {"startswith", (PyCFunction)string_startswith, METH_VARARGS, startswith__doc__}, {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__}, - {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS, - swapcase__doc__}, + {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS, + _Py_swapcase__doc__}, {"translate", (PyCFunction)string_translate, METH_VARARGS, translate__doc__}, - {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__}, - {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__}, - {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__}, - {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__}, - {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__}, + {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__}, + {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__}, + {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__}, + {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__}, + {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__}, {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__}, {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__}, - {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS, + {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS, expandtabs__doc__}, - {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS, + {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS, splitlines__doc__}, {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS}, {NULL, NULL} /* sentinel */ -- cgit v0.12