diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2001-08-17 18:39:25 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2001-08-17 18:39:25 (GMT) |
commit | 339d0f720e86dc34837547c90d3003a4a68d7d46 (patch) | |
tree | 2059e5d02f490540e759800b127d50f3fcd8c2b5 | |
parent | f75976617bb36c892ee8a0f6a6fd3caddbd38cea (diff) | |
download | cpython-339d0f720e86dc34837547c90d3003a4a68d7d46.zip cpython-339d0f720e86dc34837547c90d3003a4a68d7d46.tar.gz cpython-339d0f720e86dc34837547c90d3003a4a68d7d46.tar.bz2 |
Patch #445762: Support --disable-unicode
- Do not compile unicodeobject, unicodectype, and unicodedata if Unicode is disabled
- check for Py_USING_UNICODE in all places that use Unicode functions
- disables unicode literals, and the builtin functions
- add the types.StringTypes list
- remove Unicode literals from most tests.
42 files changed, 465 insertions, 185 deletions
diff --git a/Include/intobject.h b/Include/intobject.h index a22d38a..128d0d3 100644 --- a/Include/intobject.h +++ b/Include/intobject.h @@ -30,7 +30,9 @@ extern DL_IMPORT(PyTypeObject) PyInt_Type; #define PyInt_Check(op) ((op)->ob_type == &PyInt_Type) extern DL_IMPORT(PyObject *) PyInt_FromString(char*, char**, int); +#ifdef Py_USING_UNICODE extern DL_IMPORT(PyObject *) PyInt_FromUnicode(Py_UNICODE*, int, int); +#endif extern DL_IMPORT(PyObject *) PyInt_FromLong(long); extern DL_IMPORT(long) PyInt_AsLong(PyObject *); extern DL_IMPORT(long) PyInt_GetMax(void); diff --git a/Include/longobject.h b/Include/longobject.h index 3c6fde0..8efa35f 100644 --- a/Include/longobject.h +++ b/Include/longobject.h @@ -42,7 +42,9 @@ extern DL_IMPORT(unsigned LONG_LONG) PyLong_AsUnsignedLongLong(PyObject *); #endif /* HAVE_LONG_LONG */ DL_IMPORT(PyObject *) PyLong_FromString(char *, char **, int); +#ifdef Py_USING_UNICODE DL_IMPORT(PyObject *) PyLong_FromUnicode(Py_UNICODE*, int, int); +#endif /* _PyLong_FromByteArray: View the n unsigned bytes as a binary integer in base 256, and return a Python long with the same numeric value. diff --git a/Include/object.h b/Include/object.h index 8da524b..f832717 100644 --- a/Include/object.h +++ b/Include/object.h @@ -320,7 +320,9 @@ extern DL_IMPORT(int) PyObject_Print(PyObject *, FILE *, int); extern DL_IMPORT(void) _PyObject_Dump(PyObject *); extern DL_IMPORT(PyObject *) PyObject_Repr(PyObject *); extern DL_IMPORT(PyObject *) PyObject_Str(PyObject *); +#ifdef Py_USING_UNICODE extern DL_IMPORT(PyObject *) PyObject_Unicode(PyObject *); +#endif extern DL_IMPORT(int) PyObject_Compare(PyObject *, PyObject *); extern DL_IMPORT(PyObject *) PyObject_RichCompare(PyObject *, PyObject *, int); extern DL_IMPORT(int) PyObject_RichCompareBool(PyObject *, PyObject *, int); diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index a7e50c3..025c8b7 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -58,6 +58,12 @@ Copyright (c) Corporation for National Research Initiatives. /* --- Internal Unicode Format -------------------------------------------- */ +#ifndef Py_USING_UNICODE + +#define PyUnicode_Check(op) 0 + +#else + /* FIXME: MvL's new implementation assumes that Py_UNICODE_SIZE is properly set, but the default rules below doesn't set it. I'll sort this out some other day -- fredrik@pythonware.com */ @@ -1087,4 +1093,5 @@ extern DL_IMPORT(int) _PyUnicode_IsAlpha( #ifdef __cplusplus } #endif +#endif /* Py_USING_UNICODE */ #endif /* !Py_UNICODEOBJECT_H */ diff --git a/Lib/ConfigParser.py b/Lib/ConfigParser.py index 7be8ffd..fec847c 100644 --- a/Lib/ConfigParser.py +++ b/Lib/ConfigParser.py @@ -82,7 +82,7 @@ ConfigParser -- responsible for for parsing a list of write the configuration state in .ini format """ -import string +import string, types import re __all__ = ["NoSectionError","DuplicateSectionError","NoOptionError", @@ -222,7 +222,7 @@ class ConfigParser: configuration files in the list will be read. A single filename may also be given. """ - if type(filenames) in [type(''), type(u'')]: + if type(filenames) in types.StringTypes: filenames = [filenames] for filename in filenames: try: diff --git a/Lib/copy.py b/Lib/copy.py index 123162c..c8cc880 100644 --- a/Lib/copy.py +++ b/Lib/copy.py @@ -91,7 +91,10 @@ d[types.IntType] = _copy_atomic d[types.LongType] = _copy_atomic d[types.FloatType] = _copy_atomic d[types.StringType] = _copy_atomic -d[types.UnicodeType] = _copy_atomic +try: + d[types.UnicodeType] = _copy_atomic +except AttributeError: + pass try: d[types.CodeType] = _copy_atomic except AttributeError: @@ -170,7 +173,10 @@ d[types.IntType] = _deepcopy_atomic d[types.LongType] = _deepcopy_atomic d[types.FloatType] = _deepcopy_atomic d[types.StringType] = _deepcopy_atomic -d[types.UnicodeType] = _deepcopy_atomic +try: + d[types.UnicodeType] = _deepcopy_atomic +except AttributeError: + pass d[types.CodeType] = _deepcopy_atomic d[types.TypeType] = _deepcopy_atomic d[types.XRangeType] = _deepcopy_atomic diff --git a/Lib/site.py b/Lib/site.py index eaa08a7..dfe658a 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -305,7 +305,8 @@ if 0: encoding = "undefined" if encoding != "ascii": - sys.setdefaultencoding(encoding) + # On Non-Unicode builds this will raise an AttributeError... + sys.setdefaultencoding(encoding) # Needs Python Unicode build ! # # Run custom site specific code, if available. diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index edef37c..fa3fb89 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -1,6 +1,6 @@ # test_pickle and test_cpickle both use this. -from test_support import TestFailed +from test_support import TestFailed, have_unicode import sys # break into multiple strings to please font-lock-mode @@ -191,7 +191,11 @@ def dotest(pickle): print "accepted insecure string: %s" % repr(buf) # Test some Unicode end cases - endcases = [u'', u'<\\u>', u'<\\\u1234>', u'<\n>', u'<\\>'] + if have_unicode: + endcases = [unicode(''), unicode('<\\u>'), unicode('<\\\u1234>'), + unicode('<\n>'), unicode('<\\>')] + else: + endcases = [] for u in endcases: try: u2 = pickle.loads(pickle.dumps(u)) diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index 9b95a8e..e207566 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -1,7 +1,7 @@ """Common tests shared by test_string and test_userstring""" import string -from test_support import verify, verbose, TestFailed +from test_support import verify, verbose, TestFailed, have_unicode transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377' @@ -125,11 +125,12 @@ def run_method_tests(test): test('join', ' ', 'w x y z', Sequence()) test('join', 'a', 'abc', ('abc',)) test('join', 'a', 'z', UserList(['z'])) - test('join', u'.', u'a.b.c', ['a', 'b', 'c']) - test('join', '.', u'a.b.c', [u'a', 'b', 'c']) - test('join', '.', u'a.b.c', ['a', u'b', 'c']) - test('join', '.', u'a.b.c', ['a', 'b', u'c']) - test('join', '.', TypeError, ['a', u'b', 3]) + if have_unicode: + test('join', unicode('.'), unicode('a.b.c'), ['a', 'b', 'c']) + test('join', '.', unicode('a.b.c'), [unicode('a'), 'b', 'c']) + test('join', '.', unicode('a.b.c'), ['a', unicode('b'), 'c']) + test('join', '.', unicode('a.b.c'), ['a', 'b', unicode('c')]) + test('join', '.', TypeError, ['a', unicode('b'), 3]) for i in [5, 25, 125]: test('join', '-', ((('a' * i) + '-') * i)[:-1], ['a' * i] * i) diff --git a/Lib/test/test_b1.py b/Lib/test/test_b1.py index 1103a03..18b426f 100644 --- a/Lib/test/test_b1.py +++ b/Lib/test/test_b1.py @@ -119,7 +119,9 @@ if complex(0.0, 3.14j) != -3.14+0j: raise TestFailed, 'complex(0.0, 3.14j)' if complex(0j, 3.14) != 3.14j: raise TestFailed, 'complex(0j, 3.14)' if complex(0.0, 3.14) != 3.14j: raise TestFailed, 'complex(0.0, 3.14)' if complex(" 3.14+J ") != 3.14+1j: raise TestFailed, 'complex(" 3.14+J )"' -if complex(u" 3.14+J ") != 3.14+1j: raise TestFailed, 'complex(u" 3.14+J )"' +if have_unicode: + if complex(unicode(" 3.14+J ")) != 3.14+1j: + raise TestFailed, 'complex(u" 3.14+J )"' class Z: def __complex__(self): return 3.14j z = Z() @@ -174,18 +176,20 @@ if eval('b', globals, locals) != 200: raise TestFailed, "eval(3)" if eval('c', globals, locals) != 300: raise TestFailed, "eval(4)" -if eval(u'1+1') != 2: raise TestFailed, 'eval(u\'1+1\')' -if eval(u' 1+1\n') != 2: raise TestFailed, 'eval(u\' 1+1\\n\')' +if have_unicode: + if eval(unicode('1+1')) != 2: raise TestFailed, 'eval(u\'1+1\')' + if eval(unicode(' 1+1\n')) != 2: raise TestFailed, 'eval(u\' 1+1\\n\')' globals = {'a': 1, 'b': 2} locals = {'b': 200, 'c': 300} -if eval(u'a', globals) != 1: - raise TestFailed, "eval(1) == %s" % eval(u'a', globals) -if eval(u'a', globals, locals) != 1: - raise TestFailed, "eval(2)" -if eval(u'b', globals, locals) != 200: - raise TestFailed, "eval(3)" -if eval(u'c', globals, locals) != 300: - raise TestFailed, "eval(4)" +if have_unicode: + if eval(unicode('a'), globals) != 1: + raise TestFailed, "eval(1) == %s" % eval(unicode('a'), globals) + if eval(unicode('a'), globals, locals) != 1: + raise TestFailed, "eval(2)" + if eval(unicode('b'), globals, locals) != 200: + raise TestFailed, "eval(3)" + if eval(unicode('c'), globals, locals) != 300: + raise TestFailed, "eval(4)" print 'execfile' z = 0 @@ -249,9 +253,11 @@ if float(3.14) != 3.14: raise TestFailed, 'float(3.14)' if float(314) != 314.0: raise TestFailed, 'float(314)' if float(314L) != 314.0: raise TestFailed, 'float(314L)' if float(" 3.14 ") != 3.14: raise TestFailed, 'float(" 3.14 ")' -if float(u" 3.14 ") != 3.14: raise TestFailed, 'float(u" 3.14 ")' -if float(u" \u0663.\u0661\u0664 ") != 3.14: - raise TestFailed, 'float(u" \u0663.\u0661\u0664 ")' +if have_unicode: + if float(unicode(" 3.14 ")) != 3.14: + raise TestFailed, 'float(u" 3.14 ")' + if float(unicode(" \u0663.\u0661\u0664 ")) != 3.14: + raise TestFailed, 'float(u" \u0663.\u0661\u0664 ")' print 'getattr' import sys @@ -324,7 +330,9 @@ if int(3.5) != 3: raise TestFailed, 'int(3.5)' if int(-3.5) != -3: raise TestFailed, 'int(-3.5)' # Different base: if int("10",16) != 16L: raise TestFailed, 'int("10",16)' -if int(u"10",16) != 16L: raise TestFailed, 'int(u"10",16)' +if have_unicode: + if int(unicode("10"),16) != 16L: + raise TestFailed, 'int(u"10",16)' # Test conversion from strings and various anomalies L = [ ('0', 0), @@ -343,23 +351,26 @@ L = [ (' 1\02 ', ValueError), ('', ValueError), (' ', ValueError), - (' \t\t ', ValueError), - (u'0', 0), - (u'1', 1), - (u'9', 9), - (u'10', 10), - (u'99', 99), - (u'100', 100), - (u'314', 314), - (u' 314', 314), - (u'\u0663\u0661\u0664 ', 314), - (u' \t\t 314 \t\t ', 314), - (u' 1x', ValueError), - (u' 1 ', 1), - (u' 1\02 ', ValueError), - (u'', ValueError), - (u' ', ValueError), - (u' \t\t ', ValueError), + (' \t\t ', ValueError) +] +if have_unicode: + L += [ + (unicode('0'), 0), + (unicode('1'), 1), + (unicode('9'), 9), + (unicode('10'), 10), + (unicode('99'), 99), + (unicode('100'), 100), + (unicode('314'), 314), + (unicode(' 314'), 314), + (unicode('\u0663\u0661\u0664 '), 314), + (unicode(' \t\t 314 \t\t '), 314), + (unicode(' 1x'), ValueError), + (unicode(' 1 '), 1), + (unicode(' 1\02 '), ValueError), + (unicode(''), ValueError), + (unicode(' '), ValueError), + (unicode(' \t\t '), ValueError), ] for s, v in L: for sign in "", "+", "-": @@ -460,16 +471,23 @@ if long(-3.9) != -3L: raise TestFailed, 'long(-3.9)' if long(3.5) != 3L: raise TestFailed, 'long(3.5)' if long(-3.5) != -3L: raise TestFailed, 'long(-3.5)' if long("-3") != -3L: raise TestFailed, 'long("-3")' -if long(u"-3") != -3L: raise TestFailed, 'long(u"-3")' +if have_unicode: + if long(unicode("-3")) != -3L: + raise TestFailed, 'long(u"-3")' # Different base: if long("10",16) != 16L: raise TestFailed, 'long("10",16)' -if long(u"10",16) != 16L: raise TestFailed, 'long(u"10",16)' +if have_unicode: + if long(unicode("10"),16) != 16L: + raise TestFailed, 'long(u"10",16)' # Check conversions from string (same test set as for int(), and then some) LL = [ ('1' + '0'*20, 10L**20), - ('1' + '0'*100, 10L**100), - (u'1' + u'0'*20, 10L**20), - (u'1' + u'0'*100, 10L**100), + ('1' + '0'*100, 10L**100) +] +if have_unicode: + L+=[ + (unicode('1') + unicode('0')*20, 10L**20), + (unicode('1') + unicode('0')*100, 10L**100), ] for s, v in L + LL: for sign in "", "+", "-": diff --git a/Lib/test/test_contains.py b/Lib/test/test_contains.py index 8fec425..1a9a965 100644 --- a/Lib/test/test_contains.py +++ b/Lib/test/test_contains.py @@ -1,4 +1,4 @@ -from test_support import TestFailed +from test_support import TestFailed, have_unicode class base_set: @@ -63,62 +63,65 @@ try: except TypeError: pass -# Test char in Unicode -check('c' in u'abc', "'c' not in u'abc'") -check('d' not in u'abc', "'d' in u'abc'") +if have_unicode: -try: - '' in u'abc' - check(0, "'' in u'abc' did not raise error") -except TypeError: - pass + # Test char in Unicode -try: - 'ab' in u'abc' - check(0, "'ab' in u'abc' did not raise error") -except TypeError: - pass + check('c' in unicode('abc'), "'c' not in u'abc'") + check('d' not in unicode('abc'), "'d' in u'abc'") -try: - None in u'abc' - check(0, "None in u'abc' did not raise error") -except TypeError: - pass + try: + '' in unicode('abc') + check(0, "'' in u'abc' did not raise error") + except TypeError: + pass -# Test Unicode char in Unicode + try: + 'ab' in unicode('abc') + check(0, "'ab' in u'abc' did not raise error") + except TypeError: + pass -check(u'c' in u'abc', "u'c' not in u'abc'") -check(u'd' not in u'abc', "u'd' in u'abc'") + try: + None in unicode('abc') + check(0, "None in u'abc' did not raise error") + except TypeError: + pass -try: - u'' in u'abc' - check(0, "u'' in u'abc' did not raise error") -except TypeError: - pass + # Test Unicode char in Unicode -try: - u'ab' in u'abc' - check(0, "u'ab' in u'abc' did not raise error") -except TypeError: - pass + check(unicode('c') in unicode('abc'), "u'c' not in u'abc'") + check(unicode('d') not in unicode('abc'), "u'd' in u'abc'") -# Test Unicode char in string + try: + unicode('') in unicode('abc') + check(0, "u'' in u'abc' did not raise error") + except TypeError: + pass -check(u'c' in 'abc', "u'c' not in 'abc'") -check(u'd' not in 'abc', "u'd' in 'abc'") + try: + unicode('ab') in unicode('abc') + check(0, "u'ab' in u'abc' did not raise error") + except TypeError: + pass -try: - u'' in 'abc' - check(0, "u'' in 'abc' did not raise error") -except TypeError: - pass + # Test Unicode char in string -try: - u'ab' in 'abc' - check(0, "u'ab' in 'abc' did not raise error") -except TypeError: - pass + check(unicode('c') in 'abc', "u'c' not in 'abc'") + check(unicode('d') not in 'abc', "u'd' in 'abc'") + + try: + unicode('') in 'abc' + check(0, "u'' in 'abc' did not raise error") + except TypeError: + pass + + try: + unicode('ab') in 'abc' + check(0, "u'ab' in 'abc' did not raise error") + except TypeError: + pass # A collection of tests on builtin sequence types a = range(10) diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py index c74db0f..a89ed52 100644 --- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -1,4 +1,4 @@ -from test_support import verbose +from test_support import verbose, have_unicode import sys # test string formatting operator (I am not sure if this is being tested @@ -34,7 +34,8 @@ def testformat(formatstr, args, output=None): def testboth(formatstr, *args): testformat(formatstr, *args) - testformat(unicode(formatstr), *args) + if have_unicode: + testformat(unicode(formatstr), *args) testboth("%.1d", (1,), "1") @@ -212,5 +213,6 @@ def test_exc(formatstr, args, exception, excmsg): test_exc('abc %a', 1, ValueError, "unsupported format character 'a' (0x61) at index 5") -test_exc(u'abc %\u3000', 1, ValueError, - "unsupported format character '?' (0x3000) at index 5") +if have_unicode: + test_exc(unicode('abc %\u3000'), 1, ValueError, + "unsupported format character '?' (0x3000) at index 5") diff --git a/Lib/test/test_iter.py b/Lib/test/test_iter.py index 63e488e..8b6891b 100644 --- a/Lib/test/test_iter.py +++ b/Lib/test/test_iter.py @@ -1,7 +1,7 @@ # Test iterators. import unittest -from test_support import run_unittest, TESTFN, unlink +from test_support import run_unittest, TESTFN, unlink, have_unicode # Test result of triple loop (too big to inline) TRIPLETS = [(0, 0, 0), (0, 0, 1), (0, 0, 2), @@ -214,8 +214,11 @@ class TestCase(unittest.TestCase): self.check_for_loop(iter("abcde"), ["a", "b", "c", "d", "e"]) # Test a Unicode string - def test_iter_unicode(self): - self.check_for_loop(iter(u"abcde"), [u"a", u"b", u"c", u"d", u"e"]) + if have_unicode: + def test_iter_unicode(self): + self.check_for_loop(iter(unicode("abcde")), + [unicode("a"), unicode("b"), unicode("c"), + unicode("d"), unicode("e")]) # Test a directory def test_iter_dict(self): @@ -477,6 +480,7 @@ class TestCase(unittest.TestCase): d = {"one": 1, "two": 2, "three": 3} self.assertEqual(reduce(add, d), "".join(d.keys())) + # This test case will be removed if we don't have Unicode def test_unicode_join_endcase(self): # This class inserts a Unicode object into its argument's natural @@ -493,7 +497,7 @@ class TestCase(unittest.TestCase): i = self.i self.i = i+1 if i == 2: - return u"fooled you!" + return unicode("fooled you!") return self.it.next() f = open(TESTFN, "w") @@ -510,13 +514,15 @@ class TestCase(unittest.TestCase): # and pass that on to unicode.join(). try: got = " - ".join(OhPhooey(f)) - self.assertEqual(got, u"a\n - b\n - fooled you! - c\n") + self.assertEqual(got, unicode("a\n - b\n - fooled you! - c\n")) finally: f.close() try: unlink(TESTFN) except OSError: pass + if not have_unicode: + def test_unicode_join_endcase(self): pass # Test iterators with 'x in y' and 'x not in y'. def test_in_and_not_in(self): diff --git a/Lib/test/test_pprint.py b/Lib/test/test_pprint.py index ba53ee8..34c7a84 100644 --- a/Lib/test/test_pprint.py +++ b/Lib/test/test_pprint.py @@ -3,6 +3,11 @@ import unittest import test_support +try: + uni = unicode +except NameError: + def uni(x):return x + class QueryTestCase(unittest.TestCase): @@ -14,7 +19,7 @@ class QueryTestCase(unittest.TestCase): def test_basic(self): """Verify .isrecursive() and .isreadable() w/o recursion.""" verify = self.assert_ - for safe in (2, 2.0, 2j, "abc", [3], (2,2), {3: 3}, u"yaddayadda", + for safe in (2, 2.0, 2j, "abc", [3], (2,2), {3: 3}, uni("yaddayadda"), self.a, self.b): verify(not pprint.isrecursive(safe), "expected not isrecursive for " + `safe`) @@ -58,8 +63,8 @@ class QueryTestCase(unittest.TestCase): def test_same_as_repr(self): "Simple objects and small containers that should be same as repr()." verify = self.assert_ - for simple in (0, 0L, 0+0j, 0.0, "", u"", (), [], {}, verify, pprint, - -6, -6L, -6-6j, -1.5, "x", u"x", (3,), [3], {3: 6}, + for simple in (0, 0L, 0+0j, 0.0, "", uni(""), (), [], {}, verify, pprint, + -6, -6L, -6-6j, -1.5, "x", uni("x"), (3,), [3], {3: 6}, (1,2), [3,4], {5: 6, 7: 8}, {"xy\tab\n": (3,), 5: [[]], (): {}}, range(10, -11, -1) diff --git a/Lib/test/test_sre.py b/Lib/test/test_sre.py index f673c33..8442258 100644 --- a/Lib/test/test_sre.py +++ b/Lib/test/test_sre.py @@ -6,7 +6,7 @@ import sys sys.path=['.']+sys.path -from test_support import verbose, TestFailed +from test_support import verbose, TestFailed, have_unicode import sre import sys, os, string, traceback @@ -378,7 +378,8 @@ for t in tests: # Try the match with UNICODE locale enabled, and check # that it still succeeds. - obj=sre.compile(pattern, sre.UNICODE) - result=obj.search(s) - if result==None: - print '=== Fails on unicode-sensitive match', t + if have_unicode: + obj=sre.compile(pattern, sre.UNICODE) + result=obj.search(s) + if result==None: + print '=== Fails on unicode-sensitive match', t diff --git a/Lib/test/test_support.py b/Lib/test/test_support.py index 3d5c783..80b8356 100644 --- a/Lib/test/test_support.py +++ b/Lib/test/test_support.py @@ -56,6 +56,12 @@ def fcmp(x, y): # fuzzy comparison function return cmp(len(x), len(y)) return cmp(x, y) +try: + unicode + have_unicode = 1 +except NameError: + have_unicode = 0 + import os # Filename used for testing if os.name == 'java': @@ -64,9 +70,10 @@ if os.name == 'java': elif os.name != 'riscos': TESTFN = '@test' # Unicode name only used if TEST_FN_ENCODING exists for the platform. - TESTFN_UNICODE=u"@test-\xe0\xf2" # 2 latin characters. - if os.name=="nt": - TESTFN_ENCODING="mbcs" + if have_unicode: + TESTFN_UNICODE=unicode("@test-\xe0\xf2", "latin-1") # 2 latin characters. + if os.name=="nt": + TESTFN_ENCODING="mbcs" else: TESTFN = 'test' del os diff --git a/Lib/test/test_winreg.py b/Lib/test/test_winreg.py index 02bc749..ca38305 100644 --- a/Lib/test/test_winreg.py +++ b/Lib/test/test_winreg.py @@ -4,21 +4,24 @@ from _winreg import * import os, sys -from test_support import verify +from test_support import verify, have_unicode test_key_name = "SOFTWARE\\Python Registry Test Key - Delete Me" test_data = [ ("Int Value", 45, REG_DWORD), ("String Val", "A string value", REG_SZ,), - (u"Unicode Val", u"A Unicode value", REG_SZ,), ("StringExpand", "The path is %path%", REG_EXPAND_SZ), - ("UnicodeExpand", u"The path is %path%", REG_EXPAND_SZ), ("Multi-string", ["Lots", "of", "string", "values"], REG_MULTI_SZ), - ("Multi-unicode", [u"Lots", u"of", u"unicode", u"values"], REG_MULTI_SZ), - ("Multi-mixed", [u"Unicode", u"and", "string", "values"],REG_MULTI_SZ), ("Raw Data", ("binary"+chr(0)+"data"), REG_BINARY), ] +if have_unicode: + test_data+=[ + (unicode("Unicode Val"), unicode("A Unicode value"), REG_SZ,), + ("UnicodeExpand", unicode("The path is %path%"), REG_EXPAND_SZ), + ("Multi-unicode", [unicode("Lots"), unicode("of"), unicode("unicode"), unicode("values")], REG_MULTI_SZ), + ("Multi-mixed", [unicode("Unicode"), unicode("and"), "string", "values"],REG_MULTI_SZ), + ] def WriteTestData(root_key): # Set the default value for this key. diff --git a/Lib/types.py b/Lib/types.py index 01af463..6c23e24 100644 --- a/Lib/types.py +++ b/Lib/types.py @@ -19,7 +19,12 @@ except NameError: pass StringType = str -UnicodeType = unicode +try: + UnicodeType = unicode + StringTypes = [StringType, UnicodeType] +except NameError: + StringTypes = [StringType] + BufferType = type(buffer('')) TupleType = tuple diff --git a/Makefile.pre.in b/Makefile.pre.in index 10d6452..e776555 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -147,6 +147,7 @@ LIBOBJS= @LIBOBJS@ DLINCLDIR= @DLINCLDIR@ DYNLOADFILE= @DYNLOADFILE@ MACHDEP_OBJS= @MACHDEP_OBJS@ +UNICODE_OBJS= @UNICODE_OBJS@ PYTHON= python$(EXE) @@ -263,8 +264,7 @@ OBJECT_OBJS= \ Objects/stringobject.o \ Objects/tupleobject.o \ Objects/typeobject.o \ - Objects/unicodeobject.o \ - Objects/unicodectype.o + $(UNICODE_OBJS) ########################################################################## @@ -1,6 +1,12 @@ What's New in Python 2.2a2? =========================== +Build + +- configure supports a new option --enable-unicode, with the values + ucs2 and ucs4 (new in 2.2a1). With --disable-unicode, the Unicode + type and supporting code is completely removed from the interpreter. + Tools - The new Tools/scripts/cleanfuture.py can be used to automatically @@ -57,6 +63,12 @@ C API sure to check the Unicode width compatibility in their extensions by using at least one of the mangled Unicode APIs in the extension. +- Two new flags METH_NOARGS and METH_O are available in method definition + tables to simplify implementation of methods with no arguments and a + single untyped argument. Calling such methods is more efficient than + calling corresponding METH_VARARGS methods. METH_OLDARGS is now + deprecated. + Windows - "import module" now compiles module.pyw if it exists and nothing else @@ -90,12 +102,6 @@ Core (These warnings currently don't conform to the warnings framework of PEP 230; we intend to fix this in 2.2a2.) -- Two new flags METH_NOARGS and METH_O are available in method definition - tables to simplify implementation of methods with no arguments and a - single untyped argument. Calling such methods is more efficient than - calling corresponding METH_VARARGS methods. METH_OLDARGS is now - deprecated. - - The UTF-16 codec was modified to be more RFC compliant. It will now only remove BOM characters at the start of the string and then only if running in native mode (UTF-16-LE and -BE won't remove a diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index 61f25d1..a085bcf 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -71,6 +71,7 @@ PyObject *codeclookup(PyObject *self, PyObject *args) return NULL; } +#ifdef Py_USING_UNICODE /* --- Helpers ------------------------------------------------------------ */ static @@ -621,12 +622,14 @@ mbcs_encode(PyObject *self, } #endif /* MS_WIN32 */ +#endif /* Py_USING_UNICODE */ /* --- Module API --------------------------------------------------------- */ static PyMethodDef _codecs_functions[] = { {"register", codecregister, 1}, {"lookup", codeclookup, 1}, +#ifdef Py_USING_UNICODE {"utf_8_encode", utf_8_encode, 1}, {"utf_8_decode", utf_8_decode, 1}, {"utf_16_encode", utf_16_encode, 1}, @@ -654,6 +657,7 @@ static PyMethodDef _codecs_functions[] = { {"mbcs_encode", mbcs_encode, 1}, {"mbcs_decode", mbcs_decode, 1}, #endif +#endif /* Py_USING_UNICODE */ {NULL, NULL} /* sentinel */ }; diff --git a/Modules/_sre.c b/Modules/_sre.c index 1776a16..9943c30 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -63,7 +63,7 @@ static char copyright[] = /* defining this one enables tracing */ #undef VERBOSE -#if PY_VERSION_HEX >= 0x01060000 +#if PY_VERSION_HEX >= 0x01060000 && defined(Py_USING_UNICODE) /* defining this enables unicode support (default under 1.6a1 and later) */ #define HAVE_UNICODE #endif diff --git a/Modules/_tkinter.c b/Modules/_tkinter.c index 9b73307..eedb0c1 100644 --- a/Modules/_tkinter.c +++ b/Modules/_tkinter.c @@ -255,6 +255,7 @@ AsString(PyObject *value, PyObject *tmp) { if (PyString_Check(value)) return PyString_AsString(value); +#ifdef Py_USING_UNICODE else if (PyUnicode_Check(value)) { PyObject *v = PyUnicode_AsUTF8String(value); if (v == NULL) @@ -266,6 +267,7 @@ AsString(PyObject *value, PyObject *tmp) Py_DECREF(v); return PyString_AsString(v); } +#endif else { PyObject *v = PyObject_Str(value); if (v == NULL) @@ -520,6 +522,7 @@ AsObj(PyObject *value) ckfree(FREECAST argv); return result; } +#ifdef Py_USING_UNICODE else if (PyUnicode_Check(value)) { #if TKMAJORMINOR <= 8001 /* In Tcl 8.1 we must use UTF-8 */ @@ -542,6 +545,7 @@ AsObj(PyObject *value) PyUnicode_GET_SIZE(value)); #endif /* TKMAJORMINOR > 8001 */ } +#endif else { PyObject *v = PyObject_Str(value); if (!v) @@ -616,13 +620,16 @@ Tkapp_Call(PyObject *self, PyObject *args) so would confuse applications that expect a string. */ char *s = Tcl_GetStringResult(interp); char *p = s; + /* If the result contains any bytes with the top bit set, it's UTF-8 and we should decode it to Unicode */ +#ifdef Py_USING_UNICODE while (*p != '\0') { if (*p & 0x80) break; p++; } + if (*p == '\0') res = PyString_FromStringAndSize(s, (int)(p-s)); else { @@ -634,6 +641,10 @@ Tkapp_Call(PyObject *self, PyObject *args) res = PyString_FromStringAndSize(s, (int)(p-s)); } } +#else + p = strchr(p, '\0'); + res = PyString_FromStringAndSize(s, (int)(p-s)); +#endif } LEAVE_OVERLAP_TCL diff --git a/Modules/cPickle.c b/Modules/cPickle.c index b27339f..bb0d281 100644 --- a/Modules/cPickle.c +++ b/Modules/cPickle.c @@ -1172,6 +1172,7 @@ err: } +#ifdef Py_USING_UNICODE /* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates backslash and newline characters to \uXXXX escapes. */ static PyObject * @@ -1289,6 +1290,7 @@ err: Py_XDECREF(repr); return -1; } +#endif static int @@ -1824,11 +1826,13 @@ save(Picklerobject *self, PyObject *args, int pers_save) { goto finally; } +#ifdef Py_USING_UNICODE case 'u': if ((type == &PyUnicode_Type) && (PyString_GET_SIZE(args) < 2)) { res = save_unicode(self, args, 0); goto finally; } +#endif } if (args->ob_refcnt > 1) { @@ -1857,12 +1861,14 @@ save(Picklerobject *self, PyObject *args, int pers_save) { } break; +#ifdef Py_USING_UNICODE case 'u': if (type == &PyUnicode_Type) { res = save_unicode(self, args, 1); goto finally; } break; +#endif case 't': if (type == &PyTuple_Type) { @@ -2818,6 +2824,7 @@ load_short_binstring(Unpicklerobject *self) { } +#ifdef Py_USING_UNICODE static int load_unicode(Unpicklerobject *self) { PyObject *str = 0; @@ -2836,8 +2843,10 @@ load_unicode(Unpicklerobject *self) { finally: return res; } +#endif +#ifdef Py_USING_UNICODE static int load_binunicode(Unpicklerobject *self) { PyObject *unicode; @@ -2857,6 +2866,7 @@ load_binunicode(Unpicklerobject *self) { PDATA_PUSH(self->stack, unicode, -1); return 0; } +#endif static int @@ -3615,6 +3625,7 @@ load(Unpicklerobject *self) { break; continue; +#ifdef Py_USING_UNICODE case UNICODE: if (load_unicode(self) < 0) break; @@ -3624,6 +3635,7 @@ load(Unpicklerobject *self) { if (load_binunicode(self) < 0) break; continue; +#endif case EMPTY_TUPLE: if (load_empty_tuple(self) < 0) @@ -3905,6 +3917,7 @@ noload(Unpicklerobject *self) { break; continue; +#ifdef Py_USING_UNICODE case UNICODE: if (load_unicode(self) < 0) break; @@ -3914,6 +3927,7 @@ noload(Unpicklerobject *self) { if (load_binunicode(self) < 0) break; continue; +#endif case EMPTY_TUPLE: if (load_empty_tuple(self) < 0) diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 4bddc46..3311093 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -28,6 +28,11 @@ #define Py_TPFLAGS_GC 0 #endif +#if (PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION > 5) || (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 2) +/* In Python 1.6, 2.0 and 2.1, disabling Unicode was not possible. */ +#define Py_USING_UNICODE +#endif + enum HandlerTypes { StartElement, EndElement, @@ -173,7 +178,7 @@ conv_atts_using_string(XML_Char **atts) } #endif -#if !(PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6) +#ifdef Py_USING_UNICODE #if EXPAT_VERSION == 0x010200 static PyObject * conv_atts_using_unicode(XML_Char **atts) @@ -370,7 +375,7 @@ call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args) return res; } -#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6 +#ifndef Py_USING_UNICODE #define STRING_CONV_FUNC conv_string_to_utf8 #else /* Python 1.6 and later versions */ @@ -506,7 +511,7 @@ VOID_HANDLER(ProcessingInstruction, const XML_Char *data), ("(O&O&)",STRING_CONV_FUNC,target, STRING_CONV_FUNC,data)) -#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6 +#ifndef Py_USING_UNICODE VOID_HANDLER(CharacterData, (void *userData, const XML_Char *data, int len), ("(N)", conv_string_len_to_utf8(data,len))) @@ -531,7 +536,7 @@ VOID_HANDLER(UnparsedEntityDecl, STRING_CONV_FUNC,notationName)) #if EXPAT_VERSION >= 0x015f00 -#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6 +#ifndef Py_USING_UNICODE VOID_HANDLER(EntityDecl, (void *userData, const XML_Char *entityName, @@ -608,7 +613,7 @@ conv_content_model_utf8(XML_Content * const model) return conv_content_model(model, conv_string_to_utf8); } -#if !(PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6) +#ifdef Py_USING_UNICODE static PyObject * conv_content_model_unicode(XML_Content * const model) { @@ -678,7 +683,7 @@ VOID_HANDLER(EndCdataSection, (void *userData), ("()")) -#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6 +#ifndef Py_USING_UNICODE VOID_HANDLER(Default, (void *userData, const XML_Char *s, int len), ("(N)", conv_string_len_to_utf8(s,len))) @@ -1064,7 +1069,7 @@ static struct PyMethodDef xmlparse_methods[] = { /* ---------- */ -#if !(PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6) +#ifdef Py_USING_UNICODE /* pyexpat international encoding support. @@ -1158,8 +1163,7 @@ newxmlparseobject(char *encoding, char *namespace_separator) return NULL; } XML_SetUserData(self->itself, (void *)self); -#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6 -#else +#ifdef Py_USING_UNICODE XML_SetUnknownEncodingHandler(self->itself, (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL); #endif @@ -1292,7 +1296,7 @@ xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v) } if (strcmp(name, "returns_unicode") == 0) { if (PyObject_IsTrue(v)) { -#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6 +#ifndef Py_USING_UNICODE PyErr_SetString(PyExc_ValueError, "Cannot return Unicode strings in Python 1.5"); return -1; @@ -1545,8 +1549,7 @@ MODULE_INITFUNC(void) info.minor, info.micro)); } #endif -#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6 -#else +#ifdef Py_USING_UNICODE init_template_buffer(); #endif /* XXX When Expat supports some way of figuring out how it was diff --git a/Objects/abstract.c b/Objects/abstract.c index b646c36..f7ade6d 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -583,8 +583,10 @@ PyNumber_Remainder(PyObject *v, PyObject *w) { if (PyString_Check(v)) return PyString_Format(v, w); +#ifdef Py_USING_UNICODE else if (PyUnicode_Check(v)) return PyUnicode_Format(v, w); +#endif return binary_op(v, w, NB_SLOT(nb_remainder), "%"); } @@ -707,8 +709,10 @@ PyNumber_InPlaceRemainder(PyObject *v, PyObject *w) { if (PyString_Check(v)) return PyString_Format(v, w); +#ifdef Py_USING_UNICODE else if (PyUnicode_Check(v)) return PyUnicode_Format(v, w); +#endif else return binary_iop(v, w, NB_SLOT(nb_inplace_remainder), NB_SLOT(nb_remainder), "%="); @@ -821,10 +825,12 @@ PyNumber_Int(PyObject *o) if (PyString_Check(o)) return int_from_string(PyString_AS_STRING(o), PyString_GET_SIZE(o)); +#ifdef Py_USING_UNICODE if (PyUnicode_Check(o)) return PyInt_FromUnicode(PyUnicode_AS_UNICODE(o), PyUnicode_GET_SIZE(o), 10); +#endif m = o->ob_type->tp_as_number; if (m && m->nb_int) return m->nb_int(o); @@ -873,11 +879,13 @@ PyNumber_Long(PyObject *o) */ return long_from_string(PyString_AS_STRING(o), PyString_GET_SIZE(o)); +#ifdef Py_USING_UNICODE if (PyUnicode_Check(o)) /* The above check is done in PyLong_FromUnicode(). */ return PyLong_FromUnicode(PyUnicode_AS_UNICODE(o), PyUnicode_GET_SIZE(o), 10); +#endif m = o->ob_type->tp_as_number; if (m && m->nb_long) return m->nb_long(o); diff --git a/Objects/complexobject.c b/Objects/complexobject.c index 84eee11..cb081aa 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -611,14 +611,15 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v) int sw_error=0; int sign; char buffer[256]; /* For errors */ - char s_buffer[256]; int len; if (PyString_Check(v)) { s = PyString_AS_STRING(v); len = PyString_GET_SIZE(v); } +#ifdef Py_USING_UNICODE else if (PyUnicode_Check(v)) { + char s_buffer[256]; if (PyUnicode_GET_SIZE(v) >= sizeof(s_buffer)) { PyErr_SetString(PyExc_ValueError, "complex() literal too large to convert"); @@ -632,6 +633,7 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v) s = s_buffer; len = (int)strlen(s); } +#endif else if (PyObject_AsCharBuffer(v, &s, &len)) { PyErr_SetString(PyExc_TypeError, "complex() arg is not a string"); diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 34b252b..044d1d3 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -109,7 +109,9 @@ PyFloat_FromString(PyObject *v, char **pend) const char *s, *last, *end; double x; char buffer[256]; /* for errors */ +#ifdef Py_USING_UNICODE char s_buffer[256]; /* for objects convertible to a char buffer */ +#endif int len; if (pend) @@ -118,6 +120,7 @@ PyFloat_FromString(PyObject *v, char **pend) s = PyString_AS_STRING(v); len = PyString_GET_SIZE(v); } +#ifdef Py_USING_UNICODE else if (PyUnicode_Check(v)) { if (PyUnicode_GET_SIZE(v) >= sizeof(s_buffer)) { PyErr_SetString(PyExc_ValueError, @@ -132,6 +135,7 @@ PyFloat_FromString(PyObject *v, char **pend) s = s_buffer; len = (int)strlen(s); } +#endif else if (PyObject_AsCharBuffer(v, &s, &len)) { PyErr_SetString(PyExc_TypeError, "float() needs a string argument"); diff --git a/Objects/intobject.c b/Objects/intobject.c index f69f81a..e7f618b 100644 --- a/Objects/intobject.c +++ b/Objects/intobject.c @@ -202,6 +202,7 @@ PyInt_FromString(char *s, char **pend, int base) return PyInt_FromLong(x); } +#ifdef Py_USING_UNICODE PyObject * PyInt_FromUnicode(Py_UNICODE *s, int length, int base) { @@ -216,6 +217,7 @@ PyInt_FromUnicode(Py_UNICODE *s, int length, int base) return NULL; return PyInt_FromString(buffer, NULL, base); } +#endif /* Methods */ @@ -765,10 +767,12 @@ int_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return PyNumber_Int(x); if (PyString_Check(x)) return PyInt_FromString(PyString_AS_STRING(x), NULL, base); +#ifdef Py_USING_UNICODE if (PyUnicode_Check(x)) return PyInt_FromUnicode(PyUnicode_AS_UNICODE(x), PyUnicode_GET_SIZE(x), base); +#endif PyErr_SetString(PyExc_TypeError, "int() can't convert non-string with explicit base"); return NULL; diff --git a/Objects/longobject.c b/Objects/longobject.c index 9f7272c..01a7276 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -971,6 +971,7 @@ PyLong_FromString(char *str, char **pend, int base) return NULL; } +#ifdef Py_USING_UNICODE PyObject * PyLong_FromUnicode(Py_UNICODE *u, int length, int base) { @@ -986,6 +987,7 @@ PyLong_FromUnicode(Py_UNICODE *u, int length, int base) return PyLong_FromString(buffer, NULL, base); } +#endif /* forward */ static PyLongObject *x_divrem @@ -2054,10 +2056,12 @@ long_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return PyNumber_Long(x); else if (PyString_Check(x)) return PyLong_FromString(PyString_AS_STRING(x), NULL, base); +#ifdef Py_USING_UNICODE else if (PyUnicode_Check(x)) return PyLong_FromUnicode(PyUnicode_AS_UNICODE(x), PyUnicode_GET_SIZE(x), base); +#endif else { PyErr_SetString(PyExc_TypeError, "long() can't convert non-string with explicit base"); diff --git a/Objects/object.c b/Objects/object.c index fea9ee5..7e4a211 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -246,6 +246,7 @@ PyObject_Repr(PyObject *v) res = (*v->ob_type->tp_repr)(v); if (res == NULL) return NULL; +#ifdef Py_USING_UNICODE if (PyUnicode_Check(res)) { PyObject* str; str = PyUnicode_AsUnicodeEscapeString(res); @@ -255,6 +256,7 @@ PyObject_Repr(PyObject *v) else return NULL; } +#endif if (!PyString_Check(res)) { PyErr_Format(PyExc_TypeError, "__repr__ returned non-string (type %.200s)", @@ -283,6 +285,7 @@ PyObject_Str(PyObject *v) res = (*v->ob_type->tp_str)(v); if (res == NULL) return NULL; +#ifdef Py_USING_UNICODE if (PyUnicode_Check(res)) { PyObject* str; str = PyUnicode_AsEncodedString(res, NULL, NULL); @@ -292,6 +295,7 @@ PyObject_Str(PyObject *v) else return NULL; } +#endif if (!PyString_Check(res)) { PyErr_Format(PyExc_TypeError, "__str__ returned non-string (type %.200s)", @@ -302,6 +306,7 @@ PyObject_Str(PyObject *v) return res; } +#ifdef Py_USING_UNICODE PyObject * PyObject_Unicode(PyObject *v) { @@ -350,6 +355,7 @@ PyObject_Unicode(PyObject *v) } return res; } +#endif /* Macro to get the tp_richcompare field of a type if defined */ @@ -523,6 +529,7 @@ default_3way_compare(PyObject *v, PyObject *w) return (vv < ww) ? -1 : (vv > ww) ? 1 : 0; } +#ifdef Py_USING_UNICODE /* Special case for Unicode */ if (PyUnicode_Check(v) || PyUnicode_Check(w)) { c = PyUnicode_Compare(v, w); @@ -537,6 +544,7 @@ default_3way_compare(PyObject *v, PyObject *w) return -2; PyErr_Clear(); } +#endif /* None is smaller than anything */ if (v == Py_None) @@ -1032,6 +1040,7 @@ PyObject_GetAttr(PyObject *v, PyObject *name) { PyTypeObject *tp = v->ob_type; +#ifdef Py_USING_UNICODE /* The Unicode to string conversion is done here because the existing tp_getattro slots expect a string object as name and we wouldn't want to break those. */ @@ -1040,6 +1049,8 @@ PyObject_GetAttr(PyObject *v, PyObject *name) if (name == NULL) return NULL; } +#endif + if (!PyString_Check(name)) { PyErr_SetString(PyExc_TypeError, "attribute name must be string"); @@ -1073,6 +1084,7 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value) PyTypeObject *tp = v->ob_type; int err; +#ifdef Py_USING_UNICODE /* The Unicode to string conversion is done here because the existing tp_setattro slots expect a string object as name and we wouldn't want to break those. */ @@ -1081,7 +1093,9 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value) if (name == NULL) return -1; } - else if (!PyString_Check(name)){ + else +#endif + if (!PyString_Check(name)){ PyErr_SetString(PyExc_TypeError, "attribute name must be string"); return -1; diff --git a/Objects/stringobject.c b/Objects/stringobject.c index e2682a0..a8e063e 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -173,8 +173,14 @@ PyObject *PyString_AsDecodedObject(PyObject *str, goto onError; } - if (encoding == NULL) + if (encoding == NULL) { +#ifdef Py_USING_UNICODE encoding = PyUnicode_GetDefaultEncoding(); +#else + PyErr_SetString(PyExc_ValueError, "no encoding specified"); + goto onError; +#endif + } /* Decode via the codec registry */ v = PyCodec_Decode(str, encoding, errors); @@ -197,6 +203,7 @@ PyObject *PyString_AsDecodedString(PyObject *str, if (v == NULL) goto onError; +#ifdef Py_USING_UNICODE /* Convert Unicode to a string using the default encoding */ if (PyUnicode_Check(v)) { PyObject *temp = v; @@ -205,6 +212,7 @@ PyObject *PyString_AsDecodedString(PyObject *str, if (v == NULL) goto onError; } +#endif if (!PyString_Check(v)) { PyErr_Format(PyExc_TypeError, "decoder did not return a string object (type=%.400s)", @@ -245,8 +253,14 @@ PyObject *PyString_AsEncodedObject(PyObject *str, goto onError; } - if (encoding == NULL) + if (encoding == NULL) { +#ifdef Py_USING_UNICODE encoding = PyUnicode_GetDefaultEncoding(); +#else + PyErr_SetString(PyExc_ValueError, "no encoding specified"); + goto onError; +#endif + } /* Encode via the codec registry */ v = PyCodec_Encode(str, encoding, errors); @@ -269,6 +283,7 @@ PyObject *PyString_AsEncodedString(PyObject *str, if (v == NULL) goto onError; +#ifdef Py_USING_UNICODE /* Convert Unicode to a string using the default encoding */ if (PyUnicode_Check(v)) { PyObject *temp = v; @@ -277,6 +292,7 @@ PyObject *PyString_AsEncodedString(PyObject *str, if (v == NULL) goto onError; } +#endif if (!PyString_Check(v)) { PyErr_Format(PyExc_TypeError, "encoder did not return a string object (type=%.400s)", @@ -344,12 +360,15 @@ PyString_AsStringAndSize(register PyObject *obj, } if (!PyString_Check(obj)) { +#ifdef Py_USING_UNICODE if (PyUnicode_Check(obj)) { obj = _PyUnicode_AsDefaultEncodedString(obj, NULL); if (obj == NULL) return -1; } - else { + else +#endif + { PyErr_Format(PyExc_TypeError, "expected string or Unicode object, " "%.200s found", obj->ob_type->tp_name); @@ -477,8 +496,10 @@ string_concat(register PyStringObject *a, register PyObject *bb) register unsigned int size; register PyStringObject *op; if (!PyString_Check(bb)) { +#ifdef Py_USING_UNICODE if (PyUnicode_Check(bb)) return PyUnicode_Concat((PyObject *)a, bb); +#endif PyErr_Format(PyExc_TypeError, "cannot add type \"%.200s\" to string", bb->ob_type->tp_name); @@ -586,8 +607,10 @@ string_contains(PyObject *a, PyObject *el) { register char *s, *end; register char c; +#ifdef Py_USING_UNICODE if (PyUnicode_Check(el)) return PyUnicode_Contains(a, el); +#endif if (!PyString_Check(el) || PyString_Size(el) != 1) { PyErr_SetString(PyExc_TypeError, "'in <string>' requires character as left operand"); @@ -868,8 +891,10 @@ string_split(PyStringObject *self, PyObject *args) sub = PyString_AS_STRING(subobj); n = PyString_GET_SIZE(subobj); } +#ifdef Py_USING_UNICODE else if (PyUnicode_Check(subobj)) return PyUnicode_Split((PyObject *)self, subobj, maxsplit); +#endif else if (PyObject_AsCharBuffer(subobj, &sub, &n)) return NULL; if (n == 0) { @@ -969,6 +994,7 @@ string_join(PyStringObject *self, PyObject *orig) const size_t old_sz = sz; item = PySequence_Fast_GET_ITEM(seq, i); if (!PyString_Check(item)){ +#ifdef Py_USING_UNICODE if (PyUnicode_Check(item)) { /* Defer to Unicode join. * CAUTION: There's no gurantee that the @@ -980,6 +1006,7 @@ string_join(PyStringObject *self, PyObject *orig) Py_DECREF(seq); return result; } +#endif PyErr_Format(PyExc_TypeError, "sequence item %i: expected string," " %.80s found", @@ -1046,8 +1073,10 @@ string_find_internal(PyStringObject *self, PyObject *args, int dir) sub = PyString_AS_STRING(subobj); n = PyString_GET_SIZE(subobj); } +#ifdef Py_USING_UNICODE else if (PyUnicode_Check(subobj)) return PyUnicode_Find((PyObject *)self, subobj, i, last, 1); +#endif else if (PyObject_AsCharBuffer(subobj, &sub, &n)) return -2; @@ -1381,6 +1410,7 @@ string_count(PyStringObject *self, PyObject *args) sub = PyString_AS_STRING(subobj); n = PyString_GET_SIZE(subobj); } +#ifdef Py_USING_UNICODE else if (PyUnicode_Check(subobj)) { int count; count = PyUnicode_Count((PyObject *)self, subobj, i, last); @@ -1389,6 +1419,7 @@ string_count(PyStringObject *self, PyObject *args) else return PyInt_FromLong((long) count); } +#endif else if (PyObject_AsCharBuffer(subobj, &sub, &n)) return NULL; @@ -1481,6 +1512,7 @@ string_translate(PyStringObject *self, PyObject *args) table1 = PyString_AS_STRING(tableobj); tablen = PyString_GET_SIZE(tableobj); } +#ifdef Py_USING_UNICODE else if (PyUnicode_Check(tableobj)) { /* Unicode .translate() does not support the deletechars parameter; instead a mapping to None will cause characters @@ -1492,6 +1524,7 @@ string_translate(PyStringObject *self, PyObject *args) } return PyUnicode_Translate((PyObject *)self, tableobj, NULL); } +#endif else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen)) return NULL; @@ -1500,11 +1533,13 @@ string_translate(PyStringObject *self, PyObject *args) del_table = PyString_AS_STRING(delobj); dellen = PyString_GET_SIZE(delobj); } +#ifdef Py_USING_UNICODE else if (PyUnicode_Check(delobj)) { PyErr_SetString(PyExc_TypeError, "deletions are implemented differently for unicode"); return NULL; } +#endif else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen)) return NULL; @@ -1729,9 +1764,11 @@ string_replace(PyStringObject *self, PyObject *args) sub = PyString_AS_STRING(subobj); sub_len = PyString_GET_SIZE(subobj); } +#ifdef Py_USING_UNICODE else if (PyUnicode_Check(subobj)) return PyUnicode_Replace((PyObject *)self, subobj, replobj, count); +#endif else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len)) return NULL; @@ -1739,9 +1776,11 @@ string_replace(PyStringObject *self, PyObject *args) repl = PyString_AS_STRING(replobj); repl_len = PyString_GET_SIZE(replobj); } +#ifdef Py_USING_UNICODE else if (PyUnicode_Check(replobj)) return PyUnicode_Replace((PyObject *)self, subobj, replobj, count); +#endif else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len)) return NULL; @@ -1792,6 +1831,7 @@ string_startswith(PyStringObject *self, PyObject *args) prefix = PyString_AS_STRING(subobj); plen = PyString_GET_SIZE(subobj); } +#ifdef Py_USING_UNICODE else if (PyUnicode_Check(subobj)) { int rc; rc = PyUnicode_Tailmatch((PyObject *)self, @@ -1801,6 +1841,7 @@ string_startswith(PyStringObject *self, PyObject *args) else return PyInt_FromLong((long) rc); } +#endif else if (PyObject_AsCharBuffer(subobj, &prefix, &plen)) return NULL; @@ -1850,6 +1891,7 @@ string_endswith(PyStringObject *self, PyObject *args) suffix = PyString_AS_STRING(subobj); slen = PyString_GET_SIZE(subobj); } +#ifdef Py_USING_UNICODE else if (PyUnicode_Check(subobj)) { int rc; rc = PyUnicode_Tailmatch((PyObject *)self, @@ -1859,6 +1901,7 @@ string_endswith(PyStringObject *self, PyObject *args) else return PyInt_FromLong((long) rc); } +#endif else if (PyObject_AsCharBuffer(subobj, &suffix, &slen)) return NULL; @@ -2876,7 +2919,10 @@ PyString_Format(PyObject *format, PyObject *args) char *fmt, *res; int fmtcnt, rescnt, reslen, arglen, argidx; int args_owned = 0; - PyObject *result, *orig_args, *v, *w; + PyObject *result, *orig_args; +#ifdef Py_USING_UNICODE + PyObject *v, *w; +#endif PyObject *dict = NULL; if (format == NULL || !PyString_Check(format) || args == NULL) { PyErr_BadInternalCall(); @@ -2926,8 +2972,10 @@ PyString_Format(PyObject *format, PyObject *args) int sign; int len; char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */ +#ifdef Py_USING_UNICODE char *fmt_start = fmt; int argidx_start = argidx; +#endif fmt++; if (*fmt == '(') { @@ -3078,11 +3126,13 @@ PyString_Format(PyObject *format, PyObject *args) break; case 's': case 'r': +#ifdef Py_USING_UNICODE if (PyUnicode_Check(v)) { fmt = fmt_start; argidx = argidx_start; goto unicode; } +#endif if (c == 's') temp = PyObject_Str(v); else @@ -3240,6 +3290,7 @@ PyString_Format(PyObject *format, PyObject *args) _PyString_Resize(&result, reslen - rescnt); return result; +#ifdef Py_USING_UNICODE unicode: if (args_owned) { Py_DECREF(args); @@ -3284,6 +3335,7 @@ PyString_Format(PyObject *format, PyObject *args) Py_DECREF(v); Py_DECREF(args); return w; +#endif /* Py_USING_UNICODE */ error: Py_DECREF(result); diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 12af47f..d0f934f 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -276,6 +276,7 @@ static char chr_doc[] = Return a string of one character with ordinal i; 0 <= i < 256."; +#ifdef Py_USING_UNICODE static PyObject * builtin_unichr(PyObject *self, PyObject *args) { @@ -324,6 +325,7 @@ static char unichr_doc[] = "unichr(i) -> Unicode character\n\ \n\ Return a Unicode string of one character with ordinal i; 0 <= i <= 0x10ffff."; +#endif static PyObject * @@ -630,11 +632,13 @@ builtin_getattr(PyObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "OO|O:getattr", &v, &name, &dflt)) return NULL; +#ifdef Py_USING_UNICODE if (PyUnicode_Check(name)) { name = _PyUnicode_AsDefaultEncodedString(name, NULL); if (name == NULL) return NULL; } +#endif if (!PyString_Check(name)) { PyErr_SetString(PyExc_TypeError, @@ -682,11 +686,13 @@ builtin_hasattr(PyObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "OO:hasattr", &v, &name)) return NULL; +#ifdef Py_USING_UNICODE if (PyUnicode_Check(name)) { name = _PyUnicode_AsDefaultEncodedString(name, NULL); if (name == NULL) return NULL; } +#endif if (!PyString_Check(name)) { PyErr_SetString(PyExc_TypeError, @@ -1252,12 +1258,14 @@ builtin_ord(PyObject *self, PyObject* obj) ord = (long)((unsigned char)*PyString_AS_STRING(obj)); return PyInt_FromLong(ord); } +#ifdef Py_USING_UNICODE } else if (PyUnicode_Check(obj)) { size = PyUnicode_GET_SIZE(obj); if (size == 1) { ord = (long)*PyUnicode_AS_UNICODE(obj); return PyInt_FromLong(ord); } +#endif } else { PyErr_Format(PyExc_TypeError, "ord() expected string of length 1, but " \ @@ -1843,7 +1851,9 @@ static PyMethodDef builtin_methods[] = { {"round", builtin_round, METH_VARARGS, round_doc}, {"setattr", builtin_setattr, METH_VARARGS, setattr_doc}, {"slice", builtin_slice, METH_VARARGS, slice_doc}, +#ifdef Py_USING_UNICODE {"unichr", builtin_unichr, METH_VARARGS, unichr_doc}, +#endif {"vars", builtin_vars, METH_VARARGS, vars_doc}, {"xrange", builtin_xrange, METH_VARARGS, xrange_doc}, {"zip", builtin_zip, METH_VARARGS, zip_doc}, @@ -1905,9 +1915,11 @@ _PyBuiltin_Init(void) return NULL; if (PyDict_SetItemString(dict, "type", (PyObject *) &PyType_Type) < 0) return NULL; +#ifdef Py_USING_UNICODE if (PyDict_SetItemString(dict, "unicode", (PyObject *) &PyUnicode_Type) < 0) return NULL; +#endif debug = PyInt_FromLong(Py_OptimizeFlag == 0); if (PyDict_SetItemString(dict, "__debug__", debug) < 0) { Py_XDECREF(debug); diff --git a/Python/compile.c b/Python/compile.c index 21349ba..c6c3394 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -520,8 +520,8 @@ static void com_assign_name(struct compiling *, node *, int); static PyCodeObject *icompile(node *, struct compiling *); static PyCodeObject *jcompile(node *, char *, struct compiling *, PyCompilerFlags *); -static PyObject *parsestrplus(node *); -static PyObject *parsestr(char *); +static PyObject *parsestrplus(struct compiling*, node *); +static PyObject *parsestr(struct compiling *, char *); static node *get_rawdocstring(node *); static int get_ref_type(struct compiling *, char *); @@ -1111,7 +1111,7 @@ parsenumber(struct compiling *co, char *s) } static PyObject * -parsestr(char *s) +parsestr(struct compiling *com, char *s) { PyObject *v; size_t len; @@ -1122,11 +1122,19 @@ parsestr(char *s) int first = *s; int quote = first; int rawmode = 0; +#ifdef Py_USING_UNICODE int unicode = 0; +#endif if (isalpha(quote) || quote == '_') { if (quote == 'u' || quote == 'U') { +#ifdef Py_USING_UNICODE quote = *++s; unicode = 1; +#else + com_error(com, PyExc_SyntaxError, + "Unicode literals not supported in this Python"); + return NULL; +#endif } if (quote == 'r' || quote == 'R') { quote = *++s; @@ -1155,6 +1163,7 @@ parsestr(char *s) return NULL; } } +#ifdef Py_USING_UNICODE if (unicode || Py_UnicodeFlag) { if (rawmode) return PyUnicode_DecodeRawUnicodeEscape( @@ -1163,6 +1172,7 @@ parsestr(char *s) return PyUnicode_DecodeUnicodeEscape( s, len, NULL); } +#endif if (rawmode || strchr(s, '\\') == NULL) return PyString_FromStringAndSize(s, len); v = PyString_FromStringAndSize((char *)NULL, len); @@ -1238,16 +1248,16 @@ parsestr(char *s) } static PyObject * -parsestrplus(node *n) +parsestrplus(struct compiling* c, node *n) { PyObject *v; int i; REQ(CHILD(n, 0), STRING); - if ((v = parsestr(STR(CHILD(n, 0)))) != NULL) { + if ((v = parsestr(c, STR(CHILD(n, 0)))) != NULL) { /* String literal concatenation */ for (i = 1; i < NCH(n); i++) { PyObject *s; - s = parsestr(STR(CHILD(n, i))); + s = parsestr(c, STR(CHILD(n, i))); if (s == NULL) goto onError; if (PyString_Check(v) && PyString_Check(s)) { @@ -1255,6 +1265,7 @@ parsestrplus(node *n) if (v == NULL) goto onError; } +#ifdef Py_USING_UNICODE else { PyObject *temp; temp = PyUnicode_Concat(v, s); @@ -1264,6 +1275,7 @@ parsestrplus(node *n) Py_DECREF(v); v = temp; } +#endif } } return v; @@ -1445,7 +1457,7 @@ com_atom(struct compiling *c, node *n) com_push(c, 1); break; case STRING: - v = parsestrplus(n); + v = parsestrplus(c, n); if (v == NULL) { c->c_errors++; i = 255; @@ -2936,7 +2948,7 @@ is_constant_false(struct compiling *c, node *n) return i == 0; case STRING: - v = parsestr(STR(n)); + v = parsestr(c, STR(n)); if (v == NULL) { PyErr_Clear(); break; @@ -3330,7 +3342,7 @@ get_rawdocstring(node *n) } static PyObject * -get_docstring(node *n) +get_docstring(struct compiling *c, node *n) { /* Don't generate doc-strings if run with -OO */ if (Py_OptimizeFlag > 1) @@ -3338,7 +3350,7 @@ get_docstring(node *n) n = get_rawdocstring(n); if (n == NULL) return NULL; - return parsestrplus(n); + return parsestrplus(c, n); } static void @@ -3794,7 +3806,7 @@ com_file_input(struct compiling *c, node *n) int i; PyObject *doc; REQ(n, file_input); /* (NEWLINE | stmt)* ENDMARKER */ - doc = get_docstring(n); + doc = get_docstring(c, n); if (doc != NULL) { int i = com_addconst(c, doc); Py_DECREF(doc); @@ -3819,7 +3831,7 @@ compile_funcdef(struct compiling *c, node *n) node *ch; REQ(n, funcdef); /* funcdef: 'def' NAME parameters ':' suite */ c->c_name = STR(CHILD(n, 1)); - doc = get_docstring(CHILD(n, 4)); + doc = get_docstring(c, CHILD(n, 4)); if (doc != NULL) { (void) com_addconst(c, doc); Py_DECREF(doc); @@ -3869,7 +3881,7 @@ compile_classdef(struct compiling *c, node *n) c->c_name = STR(CHILD(n, 1)); c->c_private = c->c_name; ch = CHILD(n, NCH(n)-1); /* The suite */ - doc = get_docstring(ch); + doc = get_docstring(c, ch); if (doc != NULL) { int i = com_addconst(c, doc); Py_DECREF(doc); diff --git a/Python/getargs.c b/Python/getargs.c index c9a5273..9f76ac0 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -566,6 +566,7 @@ convertsimple(PyObject *arg, char **p_format, va_list *p_va, char *msgbuf) *p = PyString_AS_STRING(arg); *q = PyString_GET_SIZE(arg); } +#ifdef Py_USING_UNICODE else if (PyUnicode_Check(arg)) { arg = UNICODE_DEFAULT_ENCODING(arg); if (arg == NULL) @@ -574,6 +575,7 @@ convertsimple(PyObject *arg, char **p_format, va_list *p_va, char *msgbuf) *p = PyString_AS_STRING(arg); *q = PyString_GET_SIZE(arg); } +#endif else { /* any buffer-like object */ char *buf; int count = convertbuffer(arg, p, &buf); @@ -587,6 +589,7 @@ convertsimple(PyObject *arg, char **p_format, va_list *p_va, char *msgbuf) if (PyString_Check(arg)) *p = PyString_AS_STRING(arg); +#ifdef Py_USING_UNICODE else if (PyUnicode_Check(arg)) { arg = UNICODE_DEFAULT_ENCODING(arg); if (arg == NULL) @@ -594,6 +597,7 @@ convertsimple(PyObject *arg, char **p_format, va_list *p_va, char *msgbuf) arg, msgbuf); *p = PyString_AS_STRING(arg); } +#endif else return converterr("string", arg, msgbuf); if ((int)strlen(*p) != PyString_Size(arg)) @@ -616,6 +620,7 @@ convertsimple(PyObject *arg, char **p_format, va_list *p_va, char *msgbuf) *p = PyString_AS_STRING(arg); *q = PyString_GET_SIZE(arg); } +#ifdef Py_USING_UNICODE else if (PyUnicode_Check(arg)) { arg = UNICODE_DEFAULT_ENCODING(arg); if (arg == NULL) @@ -624,6 +629,7 @@ convertsimple(PyObject *arg, char **p_format, va_list *p_va, char *msgbuf) *p = PyString_AS_STRING(arg); *q = PyString_GET_SIZE(arg); } +#endif else { /* any buffer-like object */ char *buf; int count = convertbuffer(arg, p, &buf); @@ -640,6 +646,7 @@ convertsimple(PyObject *arg, char **p_format, va_list *p_va, char *msgbuf) *p = 0; else if (PyString_Check(arg)) *p = PyString_AsString(arg); +#ifdef Py_USING_UNICODE else if (PyUnicode_Check(arg)) { arg = UNICODE_DEFAULT_ENCODING(arg); if (arg == NULL) @@ -647,6 +654,7 @@ convertsimple(PyObject *arg, char **p_format, va_list *p_va, char *msgbuf) arg, msgbuf); *p = PyString_AS_STRING(arg); } +#endif else return converterr("string or None", arg, msgbuf); @@ -670,13 +678,15 @@ convertsimple(PyObject *arg, char **p_format, va_list *p_va, char *msgbuf) case 'e': {/* encoded string */ char **buffer; const char *encoding; - PyObject *u, *s; + PyObject *s; int size, recode_strings; /* Get 'e' parameter: the encoding name */ encoding = (const char *)va_arg(*p_va, const char *); +#ifdef Py_USING_UNICODE if (encoding == NULL) encoding = PyUnicode_GetDefaultEncoding(); +#endif /* Get output buffer parameter: 's' (recode all objects via Unicode) or @@ -702,6 +712,9 @@ convertsimple(PyObject *arg, char **p_format, va_list *p_va, char *msgbuf) Py_INCREF(s); } else { +#ifdef Py_USING_UNICODE + PyObject *u; + /* Convert object to Unicode */ u = PyUnicode_FromObject(arg); if (u == NULL) @@ -723,6 +736,9 @@ convertsimple(PyObject *arg, char **p_format, va_list *p_va, char *msgbuf) "(encoder failed to return a string)", arg, msgbuf); } +#else + return converterr("string<e>", arg, msgbuf); +#endif } size = PyString_GET_SIZE(s); @@ -808,6 +824,7 @@ convertsimple(PyObject *arg, char **p_format, va_list *p_va, char *msgbuf) break; } +#ifdef Py_USING_UNICODE case 'u': {/* raw unicode buffer (Py_UNICODE *) */ if (*format == '#') { /* any buffer-like object */ void **p = (void **)va_arg(*p_va, char **); @@ -829,6 +846,7 @@ convertsimple(PyObject *arg, char **p_format, va_list *p_va, char *msgbuf) } break; } +#endif case 'S': { /* string object */ PyObject **p = va_arg(*p_va, PyObject **); @@ -839,6 +857,7 @@ convertsimple(PyObject *arg, char **p_format, va_list *p_va, char *msgbuf) break; } +#ifdef Py_USING_UNICODE case 'U': { /* Unicode object */ PyObject **p = va_arg(*p_va, PyObject **); if (PyUnicode_Check(arg)) @@ -847,6 +866,7 @@ convertsimple(PyObject *arg, char **p_format, va_list *p_va, char *msgbuf) return converterr("unicode", arg, msgbuf); break; } +#endif case 'O': { /* object */ PyTypeObject *type; diff --git a/Python/marshal.c b/Python/marshal.c index 008659d..5ef11ef 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -187,6 +187,7 @@ w_object(PyObject *v, WFILE *p) w_long((long)n, p); w_string(PyString_AS_STRING(v), n, p); } +#ifdef Py_USING_UNICODE else if (PyUnicode_Check(v)) { PyObject *utf8; utf8 = PyUnicode_AsUTF8String(v); @@ -201,6 +202,7 @@ w_object(PyObject *v, WFILE *p) w_string(PyString_AS_STRING(utf8), n, p); Py_DECREF(utf8); } +#endif else if (PyTuple_Check(v)) { w_byte(TYPE_TUPLE, p); n = PyTuple_Size(v); @@ -472,6 +474,7 @@ r_object(RFILE *p) } return v; +#ifdef Py_USING_UNICODE case TYPE_UNICODE: { char *buffer; @@ -494,6 +497,7 @@ r_object(RFILE *p) PyMem_DEL(buffer); return v; } +#endif case TYPE_TUPLE: n = r_long(p); diff --git a/Python/modsupport.c b/Python/modsupport.c index 8fad54a..0450a8a 100644 --- a/Python/modsupport.c +++ b/Python/modsupport.c @@ -199,6 +199,7 @@ do_mklist(char **p_format, va_list *p_va, int endchar, int n) return v; } +#ifdef Py_USING_UNICODE static int _ustrlen(Py_UNICODE *u) { @@ -207,6 +208,7 @@ _ustrlen(Py_UNICODE *u) while (*v != 0) { i++; v++; } return i; } +#endif static PyObject * do_mktuple(char **p_format, va_list *p_va, int endchar, int n) @@ -269,6 +271,7 @@ do_mkvalue(char **p_format, va_list *p_va) case 'L': return PyLong_FromLongLong((LONG_LONG)va_arg(*p_va, LONG_LONG)); #endif +#ifdef Py_USING_UNICODE case 'u': { PyObject *v; @@ -291,6 +294,7 @@ do_mkvalue(char **p_format, va_list *p_va) } return v; } +#endif case 'f': case 'd': return PyFloat_FromDouble( diff --git a/Python/pythonrun.c b/Python/pythonrun.c index 232ba55..621ce9d 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -125,8 +125,10 @@ Py_Initialize(void) /* Init codec registry */ _PyCodecRegistry_Init(); +#ifdef Py_USING_UNICODE /* Init Unicode implementation; relies on the codec registry */ _PyUnicode_Init(); +#endif bimod = _PyBuiltin_Init(); if (bimod == NULL) @@ -206,8 +208,10 @@ Py_Finalize(void) /* Disable signal handling */ PyOS_FiniInterrupts(); +#ifdef Py_USING_UNICODE /* Cleanup Unicode implementation */ _PyUnicode_Fini(); +#endif /* Cleanup Codec registry */ _PyCodecRegistry_Fini(); diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 2eae03d..82d2999 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -176,6 +176,8 @@ static char getdefaultencoding_doc[] = Return the current default string encoding used by the Unicode \n\ implementation."; +#ifdef Py_USING_UNICODE + static PyObject * sys_setdefaultencoding(PyObject *self, PyObject *args) { @@ -193,6 +195,8 @@ static char setdefaultencoding_doc[] = \n\ Set the current default string encoding used by the Unicode implementation."; +#endif + /* * Cached interned string objects used for calling the profile and * trace functions. Initialized by trace_init(). @@ -530,8 +534,10 @@ static PyMethodDef sys_methods[] = { {"exc_info", (PyCFunction)sys_exc_info, METH_NOARGS, exc_info_doc}, {"excepthook", sys_excepthook, METH_VARARGS, excepthook_doc}, {"exit", sys_exit, METH_OLDARGS, exit_doc}, +#ifdef Py_USING_UNICODE {"getdefaultencoding", (PyCFunction)sys_getdefaultencoding, METH_NOARGS, getdefaultencoding_doc}, +#endif #ifdef HAVE_DLOPEN {"getdlopenflags", (PyCFunction)sys_getdlopenflags, METH_NOARGS, getdlopenflags_doc}, @@ -553,8 +559,10 @@ static PyMethodDef sys_methods[] = { #ifdef USE_MALLOPT {"mdebug", sys_mdebug, METH_VARARGS}, #endif +#ifdef Py_USING_UNICODE {"setdefaultencoding", sys_setdefaultencoding, METH_VARARGS, setdefaultencoding_doc}, +#endif {"setcheckinterval", sys_setcheckinterval, METH_VARARGS, setcheckinterval_doc}, #ifdef HAVE_DLOPEN @@ -782,9 +790,11 @@ _PySys_Init(void) PyDict_SetItemString(sysdict, "maxint", v = PyInt_FromLong(PyInt_GetMax())); Py_XDECREF(v); +#ifdef Py_USING_UNICODE PyDict_SetItemString(sysdict, "maxunicode", v = PyInt_FromLong(PyUnicode_GetMax())); Py_XDECREF(v); +#endif PyDict_SetItemString(sysdict, "builtin_module_names", v = list_builtin_module_names()); Py_XDECREF(v); @@ -6693,10 +6693,13 @@ EOF ;; esac + if test "$enable_unicode" = "no" then + UNICODE_OBJS="" echo "$ac_t""not used" 1>&6 else + UNICODE_OBJS="Objects/unicodeobject.o Objects/unicodectype.o" cat >> confdefs.h <<\EOF #define Py_USING_UNICODE 1 EOF @@ -6734,14 +6737,14 @@ fi # check for endianness echo $ac_n "checking whether byte ordering is bigendian""... $ac_c" 1>&6 -echo "configure:6738: checking whether byte ordering is bigendian" >&5 +echo "configure:6741: checking whether byte ordering is bigendian" >&5 if eval "test \"`echo '$''{'ac_cv_c_bigendian'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else ac_cv_c_bigendian=unknown # See if sys/param.h defines the BYTE_ORDER macro. cat > conftest.$ac_ext <<EOF -#line 6745 "configure" +#line 6748 "configure" #include "confdefs.h" #include <sys/types.h> #include <sys/param.h> @@ -6752,11 +6755,11 @@ int main() { #endif ; return 0; } EOF -if { (eval echo configure:6756: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:6759: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* # It does; now see whether it defined to BIG_ENDIAN or not. cat > conftest.$ac_ext <<EOF -#line 6760 "configure" +#line 6763 "configure" #include "confdefs.h" #include <sys/types.h> #include <sys/param.h> @@ -6767,7 +6770,7 @@ int main() { #endif ; return 0; } EOF -if { (eval echo configure:6771: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:6774: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ac_cv_c_bigendian=yes else @@ -6787,7 +6790,7 @@ if test "$cross_compiling" = yes; then { echo "configure: error: can not run test program while cross compiling" 1>&2; exit 1; } else cat > conftest.$ac_ext <<EOF -#line 6791 "configure" +#line 6794 "configure" #include "confdefs.h" main () { /* Are we little or big endian? From Harbison&Steele. */ @@ -6800,7 +6803,7 @@ main () { exit (u.c[sizeof (long) - 1] == 1); } EOF -if { (eval echo configure:6804: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null +if { (eval echo configure:6807: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null then ac_cv_c_bigendian=no else @@ -6827,7 +6830,7 @@ fi # Check whether right shifting a negative integer extends the sign bit # or fills with zeros (like the Cray J90, according to Tim Peters). echo $ac_n "checking whether right shift extends the sign bit""... $ac_c" 1>&6 -echo "configure:6831: checking whether right shift extends the sign bit" >&5 +echo "configure:6834: checking whether right shift extends the sign bit" >&5 if eval "test \"`echo '$''{'ac_cv_rshift_extends_sign'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -6836,7 +6839,7 @@ if test "$cross_compiling" = yes; then { echo "configure: error: can not run test program while cross compiling" 1>&2; exit 1; } else cat > conftest.$ac_ext <<EOF -#line 6840 "configure" +#line 6843 "configure" #include "confdefs.h" int main() @@ -6845,7 +6848,7 @@ int main() } EOF -if { (eval echo configure:6849: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null +if { (eval echo configure:6852: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null then ac_cv_rshift_extends_sign=yes else @@ -6870,13 +6873,13 @@ fi # check for getc_unlocked and related locking functions echo $ac_n "checking for getc_unlocked() and friends""... $ac_c" 1>&6 -echo "configure:6874: checking for getc_unlocked() and friends" >&5 +echo "configure:6877: checking for getc_unlocked() and friends" >&5 if eval "test \"`echo '$''{'ac_cv_have_getc_unlocked'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <<EOF -#line 6880 "configure" +#line 6883 "configure" #include "confdefs.h" #include <stdio.h> int main() { @@ -6888,7 +6891,7 @@ int main() { ; return 0; } EOF -if { (eval echo configure:6892: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then +if { (eval echo configure:6895: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* ac_cv_have_getc_unlocked=yes else @@ -6911,7 +6914,7 @@ fi # check for readline 4.2 echo $ac_n "checking for rl_completion_matches in -lreadline""... $ac_c" 1>&6 -echo "configure:6915: checking for rl_completion_matches in -lreadline" >&5 +echo "configure:6918: checking for rl_completion_matches in -lreadline" >&5 ac_lib_var=`echo readline'_'rl_completion_matches | sed 'y%./+-%__p_%'` if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -6919,7 +6922,7 @@ else ac_save_LIBS="$LIBS" LIBS="-lreadline -ltermcap $LIBS" cat > conftest.$ac_ext <<EOF -#line 6923 "configure" +#line 6926 "configure" #include "confdefs.h" /* Override any gcc2 internal prototype to avoid an error. */ /* We use char because int might match the return type of a gcc2 @@ -6930,7 +6933,7 @@ int main() { rl_completion_matches() ; return 0; } EOF -if { (eval echo configure:6934: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then +if { (eval echo configure:6937: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* eval "ac_cv_lib_$ac_lib_var=yes" else @@ -6955,7 +6958,7 @@ fi echo $ac_n "checking for broken nice()""... $ac_c" 1>&6 -echo "configure:6959: checking for broken nice()" >&5 +echo "configure:6962: checking for broken nice()" >&5 if eval "test \"`echo '$''{'ac_cv_broken_nice'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -6964,7 +6967,7 @@ if test "$cross_compiling" = yes; then { echo "configure: error: can not run test program while cross compiling" 1>&2; exit 1; } else cat > conftest.$ac_ext <<EOF -#line 6968 "configure" +#line 6971 "configure" #include "confdefs.h" int main() @@ -6976,7 +6979,7 @@ int main() } EOF -if { (eval echo configure:6980: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null +if { (eval echo configure:6983: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null then ac_cv_broken_nice=yes else @@ -7007,12 +7010,12 @@ cat >> confdefs.h <<\EOF #endif EOF echo $ac_n "checking for socklen_t""... $ac_c" 1>&6 -echo "configure:7011: checking for socklen_t" >&5 +echo "configure:7014: checking for socklen_t" >&5 if eval "test \"`echo '$''{'ac_cv_type_socklen_t'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <<EOF -#line 7016 "configure" +#line 7019 "configure" #include "confdefs.h" #include <sys/types.h> #if STDC_HEADERS @@ -7061,7 +7064,7 @@ done SRCDIRS="Parser Grammar Objects Python Modules" echo $ac_n "checking for build directories""... $ac_c" 1>&6 -echo "configure:7065: checking for build directories" >&5 +echo "configure:7068: checking for build directories" >&5 for dir in $SRCDIRS; do if test ! -d $dir; then mkdir $dir @@ -7252,6 +7255,7 @@ s%@HAVE_GETHOSTBYNAME_R@%$HAVE_GETHOSTBYNAME_R%g s%@HAVE_GETHOSTBYNAME@%$HAVE_GETHOSTBYNAME%g s%@LIBM@%$LIBM%g s%@LIBC@%$LIBC%g +s%@UNICODE_OBJS@%$UNICODE_OBJS%g s%@SRCDIRS@%$SRCDIRS%g CEOF diff --git a/configure.in b/configure.in index 515bc1e..329c07e 100644 --- a/configure.in +++ b/configure.in @@ -1680,10 +1680,13 @@ ucs4) unicode_size="4" ;; esac +AC_SUBST(UNICODE_OBJS) if test "$enable_unicode" = "no" then + UNICODE_OBJS="" AC_MSG_RESULT(not used) else + UNICODE_OBJS="Objects/unicodeobject.o Objects/unicodectype.o" AC_DEFINE(Py_USING_UNICODE) if test "$unicode_size" = "$ac_cv_sizeof_wchar_t" then @@ -185,6 +185,11 @@ class PyBuildExt(build_ext): if '/usr/local/include' not in self.compiler.include_dirs: self.compiler.include_dirs.insert(0, '/usr/local/include' ) + try: + have_unicode = unicode + except NameError: + have_unicode = 0 + # lib_dirs and inc_dirs are used to search for files; # if a file is found in one of those directories, it can # be assumed that no additional -I,-L directives are needed. @@ -235,7 +240,8 @@ class PyBuildExt(build_ext): # Python C API test module exts.append( Extension('_testcapi', ['_testcapimodule.c']) ) # static Unicode character database - exts.append( Extension('unicodedata', ['unicodedata.c']) ) + if have_unicode: + exts.append( Extension('unicodedata', ['unicodedata.c']) ) # access to ISO C locale support exts.append( Extension('_locale', ['_localemodule.c']) ) |