From 75a902db7859a4751743e98530c5d96a672641be Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 19 Oct 2007 22:06:24 +0000 Subject: Patch 1280, by Alexandre Vassalotti. Make PyString's indexing and iteration return integers. (I changed a few of Alexandre's decisions -- GvR.) --- Lib/dis.py | 19 ++++++++----------- Lib/encodings/__init__.py | 2 ++ Lib/modulefinder.py | 2 +- Lib/sre_parse.py | 10 ++++++++-- Lib/test/string_tests.py | 4 ++++ Lib/test/test_bytes.py | 2 +- Lib/test/test_set.py | 16 ++++++++-------- Lib/test/test_struct.py | 4 ++-- Objects/stringobject.c | 35 +++++++++-------------------------- 9 files changed, 43 insertions(+), 51 deletions(-) diff --git a/Lib/dis.py b/Lib/dis.py index 200dee2..4cf452a 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -117,8 +117,7 @@ def disassemble(co, lasti=-1): extended_arg = 0 free = None while i < n: - c = code[i] - op = ord(c) + op = code[i] if i in linestarts: if i > 0: print() @@ -134,7 +133,7 @@ def disassemble(co, lasti=-1): print(opname[op].ljust(20), end=' ') i = i+1 if op >= HAVE_ARGUMENT: - oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg + oparg = code[i] + code[i+1]*256 + extended_arg extended_arg = 0 i = i+2 if op == EXTENDED_ARG: @@ -162,8 +161,7 @@ def disassemble_string(code, lasti=-1, varnames=None, names=None, n = len(code) i = 0 while i < n: - c = code[i] - op = ord(c) + op = code[i] if i == lasti: print('-->', end=' ') else: print(' ', end=' ') if i in labels: print('>>', end=' ') @@ -172,7 +170,7 @@ def disassemble_string(code, lasti=-1, varnames=None, names=None, print(opname[op].ljust(15), end=' ') i = i+1 if op >= HAVE_ARGUMENT: - oparg = ord(code[i]) + ord(code[i+1])*256 + oparg = code[i] + code[i+1]*256 i = i+2 print(repr(oparg).rjust(5), end=' ') if op in hasconst: @@ -208,11 +206,10 @@ def findlabels(code): n = len(code) i = 0 while i < n: - c = code[i] - op = ord(c) + op = code[i] i = i+1 if op >= HAVE_ARGUMENT: - oparg = ord(code[i]) + ord(code[i+1])*256 + oparg = code[i] + code[i+1]*256 i = i+2 label = -1 if op in hasjrel: @@ -230,8 +227,8 @@ def findlinestarts(code): Generate pairs (offset, lineno) as described in Python/compile.c. """ - byte_increments = [ord(c) for c in code.co_lnotab[0::2]] - line_increments = [ord(c) for c in code.co_lnotab[1::2]] + byte_increments = list(code.co_lnotab[0::2]) + line_increments = list(code.co_lnotab[1::2]) lastlineno = None lineno = code.co_firstlineno diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py index ed25e91..87e5745 100644 --- a/Lib/encodings/__init__.py +++ b/Lib/encodings/__init__.py @@ -52,6 +52,8 @@ def normalize_encoding(encoding): non-ASCII characters, these must be Latin-1 compatible. """ + if isinstance(encoding, str8): + encoding = str(encoding, "ascii") chars = [] punct = False for c in encoding: diff --git a/Lib/modulefinder.py b/Lib/modulefinder.py index a57911c..1dbc5bb 100644 --- a/Lib/modulefinder.py +++ b/Lib/modulefinder.py @@ -367,7 +367,7 @@ class ModuleFinder: consts = co.co_consts LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME while code: - c = code[0] + c = chr(code[0]) if c in STORE_OPS: oparg, = unpack('= len(self.string): self.next = None return - char = self.string[self.index] - if char[0] == "\\": + char = self.string[self.index:self.index+1] + # Special case for the str8, since indexing returns a integer + # XXX This is only needed for test_bug_926075 in test_re.py + if isinstance(self.string, str8): + char = chr(char) + if char == "\\": try: c = self.string[self.index + 1] except IndexError: raise error("bogus escape (end of line)") + if isinstance(self.string, str8): + char = chr(c) char = char + c self.index = self.index + len(char) self.next = char diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index cb8900d..9da062e 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -558,6 +558,10 @@ class CommonTest(BaseTest): a = self.type2test('DNSSEC') b = self.type2test('') for c in a: + # Special case for the str8, since indexing returns a integer + # XXX Maybe it would be a good idea to seperate str8's tests... + if self.type2test == str8: + c = chr(c) b += c hash(b) self.assertEqual(hash(a), hash(b)) diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index 391a660..c7c6bd3 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -348,7 +348,7 @@ class BytesTest(unittest.TestCase): sample = str8("Hello world\n\x80\x81\xfe\xff") buf = memoryview(sample) b = bytes(buf) - self.assertEqual(b, bytes(map(ord, sample))) + self.assertEqual(b, bytes(sample)) def test_to_str(self): sample = "Hello world\n\x80\x81\xfe\xff" diff --git a/Lib/test/test_set.py b/Lib/test/test_set.py index 3b8e747..86a5636 100644 --- a/Lib/test/test_set.py +++ b/Lib/test/test_set.py @@ -72,7 +72,7 @@ class TestJointOps(unittest.TestCase): self.assertEqual(type(u), self.thetype) self.assertRaises(PassThru, self.s.union, check_pass_thru()) self.assertRaises(TypeError, self.s.union, [[]]) - for C in set, frozenset, dict.fromkeys, str, str8, list, tuple: + for C in set, frozenset, dict.fromkeys, str, list, tuple: self.assertEqual(self.thetype('abcba').union(C('cdc')), set('abcd')) self.assertEqual(self.thetype('abcba').union(C('efgfe')), set('abcefg')) self.assertEqual(self.thetype('abcba').union(C('ccb')), set('abc')) @@ -96,7 +96,7 @@ class TestJointOps(unittest.TestCase): self.assertEqual(self.s, self.thetype(self.word)) self.assertEqual(type(i), self.thetype) self.assertRaises(PassThru, self.s.intersection, check_pass_thru()) - for C in set, frozenset, dict.fromkeys, str, str8, list, tuple: + for C in set, frozenset, dict.fromkeys, str, list, tuple: self.assertEqual(self.thetype('abcba').intersection(C('cdc')), set('cc')) self.assertEqual(self.thetype('abcba').intersection(C('efgfe')), set('')) self.assertEqual(self.thetype('abcba').intersection(C('ccb')), set('bc')) @@ -121,7 +121,7 @@ class TestJointOps(unittest.TestCase): self.assertEqual(type(i), self.thetype) self.assertRaises(PassThru, self.s.difference, check_pass_thru()) self.assertRaises(TypeError, self.s.difference, [[]]) - for C in set, frozenset, dict.fromkeys, str, str8, list, tuple: + for C in set, frozenset, dict.fromkeys, str, list, tuple: self.assertEqual(self.thetype('abcba').difference(C('cdc')), set('ab')) self.assertEqual(self.thetype('abcba').difference(C('efgfe')), set('abc')) self.assertEqual(self.thetype('abcba').difference(C('ccb')), set('a')) @@ -146,7 +146,7 @@ class TestJointOps(unittest.TestCase): self.assertEqual(type(i), self.thetype) self.assertRaises(PassThru, self.s.symmetric_difference, check_pass_thru()) self.assertRaises(TypeError, self.s.symmetric_difference, [[]]) - for C in set, frozenset, dict.fromkeys, str, str8, list, tuple: + for C in set, frozenset, dict.fromkeys, str, list, tuple: self.assertEqual(self.thetype('abcba').symmetric_difference(C('cdc')), set('abd')) self.assertEqual(self.thetype('abcba').symmetric_difference(C('efgfe')), set('abcefg')) self.assertEqual(self.thetype('abcba').symmetric_difference(C('ccb')), set('a')) @@ -390,7 +390,7 @@ class TestSet(TestJointOps): self.assertRaises(PassThru, self.s.update, check_pass_thru()) self.assertRaises(TypeError, self.s.update, [[]]) for p, q in (('cdc', 'abcd'), ('efgfe', 'abcefg'), ('ccb', 'abc'), ('ef', 'abcef')): - for C in set, frozenset, dict.fromkeys, str, str8, list, tuple: + for C in set, frozenset, dict.fromkeys, str, list, tuple: s = self.thetype('abcba') self.assertEqual(s.update(C(p)), None) self.assertEqual(s, set(q)) @@ -411,7 +411,7 @@ class TestSet(TestJointOps): self.assertRaises(PassThru, self.s.intersection_update, check_pass_thru()) self.assertRaises(TypeError, self.s.intersection_update, [[]]) for p, q in (('cdc', 'c'), ('efgfe', ''), ('ccb', 'bc'), ('ef', '')): - for C in set, frozenset, dict.fromkeys, str, str8, list, tuple: + for C in set, frozenset, dict.fromkeys, str, list, tuple: s = self.thetype('abcba') self.assertEqual(s.intersection_update(C(p)), None) self.assertEqual(s, set(q)) @@ -436,7 +436,7 @@ class TestSet(TestJointOps): self.assertRaises(TypeError, self.s.difference_update, [[]]) self.assertRaises(TypeError, self.s.symmetric_difference_update, [[]]) for p, q in (('cdc', 'ab'), ('efgfe', 'abc'), ('ccb', 'a'), ('ef', 'abc')): - for C in set, frozenset, dict.fromkeys, str, str8, list, tuple: + for C in set, frozenset, dict.fromkeys, str, list, tuple: s = self.thetype('abcba') self.assertEqual(s.difference_update(C(p)), None) self.assertEqual(s, set(q)) @@ -460,7 +460,7 @@ class TestSet(TestJointOps): self.assertRaises(PassThru, self.s.symmetric_difference_update, check_pass_thru()) self.assertRaises(TypeError, self.s.symmetric_difference_update, [[]]) for p, q in (('cdc', 'abd'), ('efgfe', 'abcefg'), ('ccb', 'a'), ('ef', 'abcef')): - for C in set, frozenset, dict.fromkeys, str, str8, list, tuple: + for C in set, frozenset, dict.fromkeys, str, list, tuple: s = self.thetype('abcba') self.assertEqual(s.symmetric_difference_update(C(p)), None) self.assertEqual(s, set(q)) diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py index 83d7efb..cc6db32 100644 --- a/Lib/test/test_struct.py +++ b/Lib/test/test_struct.py @@ -674,8 +674,8 @@ def test_bool(): elif not prefix and verbose: print('size of bool in native format is %i' % (len(packed))) - for c in str8('\x01\x7f\xff\x0f\xf0'): - if struct.unpack('>t', c)[0] is not True: + for c in b'\x01\x7f\xff\x0f\xf0': + if struct.unpack('>t', bytes([c]))[0] is not True: raise TestFailed('%c did not unpack as True' % c) test_bool() diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 699ae27..6371aa3 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -986,28 +986,6 @@ string_contains(PyObject *str_obj, PyObject *sub_obj) return stringlib_contains_obj(str_obj, sub_obj); } -static PyObject * -string_item(PyStringObject *a, register Py_ssize_t i) -{ - char pchar; - PyObject *v; - if (i < 0 || i >= Py_Size(a)) { - PyErr_SetString(PyExc_IndexError, "string index out of range"); - return NULL; - } - pchar = a->ob_sval[i]; - v = (PyObject *)characters[pchar & UCHAR_MAX]; - if (v == NULL) - v = PyString_FromStringAndSize(&pchar, 1); - else { -#ifdef COUNT_ALLOCS - one_strings++; -#endif - Py_INCREF(v); - } - return v; -} - static PyObject* string_richcompare(PyStringObject *a, PyStringObject *b, int op) { @@ -1110,7 +1088,12 @@ string_subscript(PyStringObject* self, PyObject* item) return NULL; if (i < 0) i += PyString_GET_SIZE(self); - return string_item(self, i); + if (i < 0 || i >= PyString_GET_SIZE(self)) { + PyErr_SetString(PyExc_IndexError, + "string index out of range"); + return NULL; + } + return PyInt_FromLong((unsigned char)self->ob_sval[i]); } else if (PySlice_Check(item)) { Py_ssize_t start, stop, step, slicelength, cur, i; @@ -1173,7 +1156,7 @@ static PySequenceMethods string_as_sequence = { (lenfunc)string_length, /*sq_length*/ (binaryfunc)string_concat, /*sq_concat*/ (ssizeargfunc)string_repeat, /*sq_repeat*/ - (ssizeargfunc)string_item, /*sq_item*/ + 0, /*sq_item*/ 0, /*sq_slice*/ 0, /*sq_ass_item*/ 0, /*sq_ass_slice*/ @@ -4147,8 +4130,8 @@ striter_next(striterobject *it) assert(PyString_Check(seq)); if (it->it_index < PyString_GET_SIZE(seq)) { - item = PyString_FromStringAndSize( - PyString_AS_STRING(seq)+it->it_index, 1); + item = PyInt_FromLong( + (unsigned char)seq->ob_sval[it->it_index]); if (item != NULL) ++it->it_index; return item; -- cgit v0.12