From 9ff19b54346d39d15cdcf75e9d66ab46ea6064d6 Mon Sep 17 00:00:00 2001 From: Eric Smith Date: Mon, 17 Mar 2008 17:32:20 +0000 Subject: Finished backporting PEP 3127, Integer Literal Support and Syntax. Added 0b and 0o literals to tokenizer. Modified PyOS_strtoul to support 0b and 0o inputs. Modified PyLong_FromString to support guessing 0b and 0o inputs. Renamed test_hexoct.py to test_int_literal.py and added binary tests. Added upper and lower case 0b, 0O, and 0X tests to test_int_literal.py --- Lib/test/test_builtin.py | 45 ++++++++++ Lib/test/test_compile.py | 8 +- Lib/test/test_hexoct.py | 116 -------------------------- Lib/test/test_int_literal.py | 191 +++++++++++++++++++++++++++++++++++++++++++ Objects/longobject.c | 15 +++- Parser/tokenizer.c | 26 +++++- Python/mystrtoul.c | 60 ++++++++++++-- 7 files changed, 336 insertions(+), 125 deletions(-) delete mode 100644 Lib/test/test_hexoct.py create mode 100644 Lib/test/test_int_literal.py diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index 4057ba1..9fa973e 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -853,6 +853,12 @@ class BuiltinTest(unittest.TestCase): self.assertRaises(ValueError, int, "0x", 16) self.assertRaises(ValueError, int, "0x", 0) + self.assertRaises(ValueError, int, "0o", 8) + self.assertRaises(ValueError, int, "0o", 0) + + self.assertRaises(ValueError, int, "0b", 2) + self.assertRaises(ValueError, int, "0b", 0) + # SF bug 1334662: int(string, base) wrong answers # Various representations of 2**32 evaluated to 0 @@ -894,6 +900,45 @@ class BuiltinTest(unittest.TestCase): self.assertEqual(int('2br45qb', 35), 4294967296L) self.assertEqual(int('1z141z4', 36), 4294967296L) + # tests with base 0 + # this fails on 3.0, but in 2.x the old octal syntax is allowed + self.assertEqual(int(' 0123 ', 0), 83) + self.assertEqual(int(' 0123 ', 0), 83) + self.assertEqual(int('000', 0), 0) + self.assertEqual(int('0o123', 0), 83) + self.assertEqual(int('0x123', 0), 291) + self.assertEqual(int('0b100', 0), 4) + self.assertEqual(int(' 0O123 ', 0), 83) + self.assertEqual(int(' 0X123 ', 0), 291) + self.assertEqual(int(' 0B100 ', 0), 4) + + # without base still base 10 + self.assertEqual(int('0123'), 123) + self.assertEqual(int('0123', 10), 123) + + # tests with prefix and base != 0 + self.assertEqual(int('0x123', 16), 291) + self.assertEqual(int('0o123', 8), 83) + self.assertEqual(int('0b100', 2), 4) + self.assertEqual(int('0X123', 16), 291) + self.assertEqual(int('0O123', 8), 83) + self.assertEqual(int('0B100', 2), 4) + + # the code has special checks for the first character after the + # type prefix + self.assertRaises(ValueError, int, '0b2', 2) + self.assertRaises(ValueError, int, '0b02', 2) + self.assertRaises(ValueError, int, '0B2', 2) + self.assertRaises(ValueError, int, '0B02', 2) + self.assertRaises(ValueError, int, '0o8', 8) + self.assertRaises(ValueError, int, '0o08', 8) + self.assertRaises(ValueError, int, '0O8', 8) + self.assertRaises(ValueError, int, '0O08', 8) + self.assertRaises(ValueError, int, '0xg', 16) + self.assertRaises(ValueError, int, '0x0g', 16) + self.assertRaises(ValueError, int, '0Xg', 16) + self.assertRaises(ValueError, int, '0X0g', 16) + # SF bug 1334662: int(string, base) wrong answers # Checks for proper evaluation of 2**32 + 1 self.assertEqual(int('100000000000000000000000000000001', 2), 4294967297L) diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 367a694..465a90c 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -180,7 +180,9 @@ if 1: def test_literals_with_leading_zeroes(self): for arg in ["077787", "0xj", "0x.", "0e", "090000000000000", - "080000000000000", "000000000000009", "000000000000008"]: + "080000000000000", "000000000000009", "000000000000008", + "0b42", "0BADCAFE", "0o123456789", "0b1.1", "0o4.2", + "0b101j2", "0o153j2", "0b100e1", "0o777e1"]: self.assertRaises(SyntaxError, eval, arg) self.assertEqual(eval("0777"), 511) @@ -208,6 +210,10 @@ if 1: self.assertEqual(eval("000000000000007"), 7) self.assertEqual(eval("000000000000008."), 8.) self.assertEqual(eval("000000000000009."), 9.) + self.assertEqual(eval("0b101010"), 42) + self.assertEqual(eval("-0b000000000010"), -2) + self.assertEqual(eval("0o777"), 511) + self.assertEqual(eval("-0o0000010"), -8) def test_unary_minus(self): # Verify treatment of unary minus on negative numbers SF bug #660455 diff --git a/Lib/test/test_hexoct.py b/Lib/test/test_hexoct.py deleted file mode 100644 index f71dbe0..0000000 --- a/Lib/test/test_hexoct.py +++ /dev/null @@ -1,116 +0,0 @@ -"""Test correct treatment of hex/oct constants. - -This is complex because of changes due to PEP 237. -""" - -import sys -platform_long_is_32_bits = sys.maxint == 2147483647 - -import unittest -from test import test_support - -import warnings -warnings.filterwarnings("ignore", "hex/oct constants", FutureWarning, - "") - -class TextHexOct(unittest.TestCase): - - def test_hex_baseline(self): - # Baseline tests - self.assertEqual(0x0, 0) - self.assertEqual(0x10, 16) - if platform_long_is_32_bits: - self.assertEqual(0x7fffffff, 2147483647) - else: - self.assertEqual(0x7fffffffffffffff, 9223372036854775807) - # Ditto with a minus sign and parentheses - self.assertEqual(-(0x0), 0) - self.assertEqual(-(0x10), -16) - if platform_long_is_32_bits: - self.assertEqual(-(0x7fffffff), -2147483647) - else: - self.assertEqual(-(0x7fffffffffffffff), -9223372036854775807) - # Ditto with a minus sign and NO parentheses - self.assertEqual(-0x0, 0) - self.assertEqual(-0x10, -16) - if platform_long_is_32_bits: - self.assertEqual(-0x7fffffff, -2147483647) - else: - self.assertEqual(-0x7fffffffffffffff, -9223372036854775807) - - def test_hex_unsigned(self): - if platform_long_is_32_bits: - # Positive constants - self.assertEqual(0x80000000, 2147483648L) - self.assertEqual(0xffffffff, 4294967295L) - # Ditto with a minus sign and parentheses - self.assertEqual(-(0x80000000), -2147483648L) - self.assertEqual(-(0xffffffff), -4294967295L) - # Ditto with a minus sign and NO parentheses - # This failed in Python 2.2 through 2.2.2 and in 2.3a1 - self.assertEqual(-0x80000000, -2147483648L) - self.assertEqual(-0xffffffff, -4294967295L) - else: - # Positive constants - self.assertEqual(0x8000000000000000, 9223372036854775808L) - self.assertEqual(0xffffffffffffffff, 18446744073709551615L) - # Ditto with a minus sign and parentheses - self.assertEqual(-(0x8000000000000000), -9223372036854775808L) - self.assertEqual(-(0xffffffffffffffff), -18446744073709551615L) - # Ditto with a minus sign and NO parentheses - # This failed in Python 2.2 through 2.2.2 and in 2.3a1 - self.assertEqual(-0x8000000000000000, -9223372036854775808L) - self.assertEqual(-0xffffffffffffffff, -18446744073709551615L) - - def test_oct_baseline(self): - # Baseline tests - self.assertEqual(00, 0) - self.assertEqual(020, 16) - if platform_long_is_32_bits: - self.assertEqual(017777777777, 2147483647) - else: - self.assertEqual(0777777777777777777777, 9223372036854775807) - # Ditto with a minus sign and parentheses - self.assertEqual(-(00), 0) - self.assertEqual(-(020), -16) - if platform_long_is_32_bits: - self.assertEqual(-(017777777777), -2147483647) - else: - self.assertEqual(-(0777777777777777777777), -9223372036854775807) - # Ditto with a minus sign and NO parentheses - self.assertEqual(-00, 0) - self.assertEqual(-020, -16) - if platform_long_is_32_bits: - self.assertEqual(-017777777777, -2147483647) - else: - self.assertEqual(-0777777777777777777777, -9223372036854775807) - - def test_oct_unsigned(self): - if platform_long_is_32_bits: - # Positive constants - self.assertEqual(020000000000, 2147483648L) - self.assertEqual(037777777777, 4294967295L) - # Ditto with a minus sign and parentheses - self.assertEqual(-(020000000000), -2147483648L) - self.assertEqual(-(037777777777), -4294967295L) - # Ditto with a minus sign and NO parentheses - # This failed in Python 2.2 through 2.2.2 and in 2.3a1 - self.assertEqual(-020000000000, -2147483648L) - self.assertEqual(-037777777777, -4294967295L) - else: - # Positive constants - self.assertEqual(01000000000000000000000, 9223372036854775808L) - self.assertEqual(01777777777777777777777, 18446744073709551615L) - # Ditto with a minus sign and parentheses - self.assertEqual(-(01000000000000000000000), -9223372036854775808L) - self.assertEqual(-(01777777777777777777777), -18446744073709551615L) - # Ditto with a minus sign and NO parentheses - # This failed in Python 2.2 through 2.2.2 and in 2.3a1 - self.assertEqual(-01000000000000000000000, -9223372036854775808L) - self.assertEqual(-01777777777777777777777, -18446744073709551615L) - -def test_main(): - test_support.run_unittest(TextHexOct) - -if __name__ == "__main__": - test_main() diff --git a/Lib/test/test_int_literal.py b/Lib/test/test_int_literal.py new file mode 100644 index 0000000..f4f08ac --- /dev/null +++ b/Lib/test/test_int_literal.py @@ -0,0 +1,191 @@ +"""Test correct treatment of hex/oct constants. + +This is complex because of changes due to PEP 237. +""" + +import unittest +from test import test_support + +import warnings +warnings.filterwarnings("ignore", "hex/oct constants", FutureWarning, + "") + +class TextHexOctBin(unittest.TestCase): + + def test_hex_baseline(self): + # A few upper/lowercase tests + self.assertEqual(0x0, 0X0) + self.assertEqual(0x1, 0X1) + self.assertEqual(0x123456789abcdef, 0X123456789abcdef) + # Baseline tests + self.assertEqual(0x0, 0) + self.assertEqual(0x10, 16) + self.assertEqual(0x7fffffff, 2147483647) + self.assertEqual(0x7fffffffffffffff, 9223372036854775807) + # Ditto with a minus sign and parentheses + self.assertEqual(-(0x0), 0) + self.assertEqual(-(0x10), -16) + self.assertEqual(-(0x7fffffff), -2147483647) + self.assertEqual(-(0x7fffffffffffffff), -9223372036854775807) + # Ditto with a minus sign and NO parentheses + self.assertEqual(-0x0, 0) + self.assertEqual(-0x10, -16) + self.assertEqual(-0x7fffffff, -2147483647) + self.assertEqual(-0x7fffffffffffffff, -9223372036854775807) + + def test_hex_unsigned(self): + # Positive constants + self.assertEqual(0x80000000, 2147483648L) + self.assertEqual(0xffffffff, 4294967295L) + # Ditto with a minus sign and parentheses + self.assertEqual(-(0x80000000), -2147483648L) + self.assertEqual(-(0xffffffff), -4294967295L) + # Ditto with a minus sign and NO parentheses + # This failed in Python 2.2 through 2.2.2 and in 2.3a1 + self.assertEqual(-0x80000000, -2147483648L) + self.assertEqual(-0xffffffff, -4294967295L) + + # Positive constants + self.assertEqual(0x8000000000000000, 9223372036854775808L) + self.assertEqual(0xffffffffffffffff, 18446744073709551615L) + # Ditto with a minus sign and parentheses + self.assertEqual(-(0x8000000000000000), -9223372036854775808L) + self.assertEqual(-(0xffffffffffffffff), -18446744073709551615L) + # Ditto with a minus sign and NO parentheses + # This failed in Python 2.2 through 2.2.2 and in 2.3a1 + self.assertEqual(-0x8000000000000000, -9223372036854775808L) + self.assertEqual(-0xffffffffffffffff, -18446744073709551615L) + + def test_oct_baseline(self): + # Baseline tests + self.assertEqual(00, 0) + self.assertEqual(020, 16) + self.assertEqual(017777777777, 2147483647) + self.assertEqual(0777777777777777777777, 9223372036854775807) + # Ditto with a minus sign and parentheses + self.assertEqual(-(00), 0) + self.assertEqual(-(020), -16) + self.assertEqual(-(017777777777), -2147483647) + self.assertEqual(-(0777777777777777777777), -9223372036854775807) + # Ditto with a minus sign and NO parentheses + self.assertEqual(-00, 0) + self.assertEqual(-020, -16) + self.assertEqual(-017777777777, -2147483647) + self.assertEqual(-0777777777777777777777, -9223372036854775807) + + def test_oct_baseline_new(self): + # A few upper/lowercase tests + self.assertEqual(0o0, 0O0) + self.assertEqual(0o1, 0O1) + self.assertEqual(0o1234567, 0O1234567) + # Baseline tests + self.assertEqual(0o0, 0) + self.assertEqual(0o20, 16) + self.assertEqual(0o17777777777, 2147483647) + self.assertEqual(0o777777777777777777777, 9223372036854775807) + # Ditto with a minus sign and parentheses + self.assertEqual(-(0o0), 0) + self.assertEqual(-(0o20), -16) + self.assertEqual(-(0o17777777777), -2147483647) + self.assertEqual(-(0o777777777777777777777), -9223372036854775807) + # Ditto with a minus sign and NO parentheses + self.assertEqual(-0o0, 0) + self.assertEqual(-0o20, -16) + self.assertEqual(-0o17777777777, -2147483647) + self.assertEqual(-0o777777777777777777777, -9223372036854775807) + + def test_oct_unsigned(self): + # Positive constants + self.assertEqual(020000000000, 2147483648L) + self.assertEqual(037777777777, 4294967295L) + # Ditto with a minus sign and parentheses + self.assertEqual(-(020000000000), -2147483648L) + self.assertEqual(-(037777777777), -4294967295L) + # Ditto with a minus sign and NO parentheses + # This failed in Python 2.2 through 2.2.2 and in 2.3a1 + self.assertEqual(-020000000000, -2147483648L) + self.assertEqual(-037777777777, -4294967295L) + + # Positive constants + self.assertEqual(01000000000000000000000, 9223372036854775808L) + self.assertEqual(01777777777777777777777, 18446744073709551615L) + # Ditto with a minus sign and parentheses + self.assertEqual(-(01000000000000000000000), -9223372036854775808L) + self.assertEqual(-(01777777777777777777777), -18446744073709551615L) + # Ditto with a minus sign and NO parentheses + # This failed in Python 2.2 through 2.2.2 and in 2.3a1 + self.assertEqual(-01000000000000000000000, -9223372036854775808L) + self.assertEqual(-01777777777777777777777, -18446744073709551615L) + + def test_oct_unsigned_new(self): + # Positive constants + self.assertEqual(0o20000000000, 2147483648L) + self.assertEqual(0o37777777777, 4294967295L) + # Ditto with a minus sign and parentheses + self.assertEqual(-(0o20000000000), -2147483648L) + self.assertEqual(-(0o37777777777), -4294967295L) + # Ditto with a minus sign and NO parentheses + # This failed in Python 2.2 through 2.2.2 and in 2.3a1 + self.assertEqual(-0o20000000000, -2147483648L) + self.assertEqual(-0o37777777777, -4294967295L) + + # Positive constants + self.assertEqual(0o1000000000000000000000, 9223372036854775808L) + self.assertEqual(0o1777777777777777777777, 18446744073709551615L) + # Ditto with a minus sign and parentheses + self.assertEqual(-(0o1000000000000000000000), -9223372036854775808L) + self.assertEqual(-(0o1777777777777777777777), -18446744073709551615L) + # Ditto with a minus sign and NO parentheses + # This failed in Python 2.2 through 2.2.2 and in 2.3a1 + self.assertEqual(-0o1000000000000000000000, -9223372036854775808L) + self.assertEqual(-0o1777777777777777777777, -18446744073709551615L) + + def test_bin_baseline(self): + # A few upper/lowercase tests + self.assertEqual(0b0, 0B0) + self.assertEqual(0b1, 0B1) + self.assertEqual(0b10101010101, 0B10101010101) + # Baseline tests + self.assertEqual(0b0, 0) + self.assertEqual(0b10000, 16) + self.assertEqual(0b1111111111111111111111111111111, 2147483647) + self.assertEqual(0b111111111111111111111111111111111111111111111111111111111111111, 9223372036854775807) + # Ditto with a minus sign and parentheses + self.assertEqual(-(0b0), 0) + self.assertEqual(-(0b10000), -16) + self.assertEqual(-(0b1111111111111111111111111111111), -2147483647) + self.assertEqual(-(0b111111111111111111111111111111111111111111111111111111111111111), -9223372036854775807) + # Ditto with a minus sign and NO parentheses + self.assertEqual(-0b0, 0) + self.assertEqual(-0b10000, -16) + self.assertEqual(-0b1111111111111111111111111111111, -2147483647) + self.assertEqual(-0b111111111111111111111111111111111111111111111111111111111111111, -9223372036854775807) + + def test_bin_unsigned(self): + # Positive constants + self.assertEqual(0b10000000000000000000000000000000, 2147483648L) + self.assertEqual(0b11111111111111111111111111111111, 4294967295L) + # Ditto with a minus sign and parentheses + self.assertEqual(-(0b10000000000000000000000000000000), -2147483648L) + self.assertEqual(-(0b11111111111111111111111111111111), -4294967295L) + # Ditto with a minus sign and NO parentheses + # This failed in Python 2.2 through 2.2.2 and in 2.3a1 + self.assertEqual(-0b10000000000000000000000000000000, -2147483648L) + self.assertEqual(-0b11111111111111111111111111111111, -4294967295L) + + # Positive constants + self.assertEqual(0b1000000000000000000000000000000000000000000000000000000000000000, 9223372036854775808L) + self.assertEqual(0b1111111111111111111111111111111111111111111111111111111111111111, 18446744073709551615L) + # Ditto with a minus sign and parentheses + self.assertEqual(-(0b1000000000000000000000000000000000000000000000000000000000000000), -9223372036854775808L) + self.assertEqual(-(0b1111111111111111111111111111111111111111111111111111111111111111), -18446744073709551615L) + # Ditto with a minus sign and NO parentheses + # This failed in Python 2.2 through 2.2.2 and in 2.3a1 + self.assertEqual(-0b1000000000000000000000000000000000000000000000000000000000000000, -9223372036854775808L) + self.assertEqual(-0b1111111111111111111111111111111111111111111111111111111111111111, -18446744073709551615L) + +def test_main(): + test_support.run_unittest(TextHexOctBin) + +if __name__ == "__main__": + test_main() diff --git a/Objects/longobject.c b/Objects/longobject.c index 46ed713..afa1b75 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1465,14 +1465,27 @@ PyLong_FromString(char *str, char **pend, int base) while (*str != '\0' && isspace(Py_CHARMASK(*str))) str++; if (base == 0) { + /* No base given. Deduce the base from the contents + of the string */ if (str[0] != '0') base = 10; else if (str[1] == 'x' || str[1] == 'X') base = 16; + else if (str[1] == 'o' || str[1] == 'O') + base = 8; + else if (str[1] == 'b' || str[1] == 'B') + base = 2; else + /* "old" (C-style) octal literal, still valid in + 2.x, although illegal in 3.x */ base = 8; } - if (base == 16 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X')) + /* Whether or not we were deducing the base, skip leading chars + as needed */ + if (str[0] == '0' && + ((base == 16 && (str[1] == 'x' || str[1] == 'X')) || + (base == 8 && (str[1] == 'o' || str[1] == 'O')) || + (base == 2 && (str[1] == 'b' || str[1] == 'B')))) str += 2; start = str; diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 1314f5f..6df3005 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1335,7 +1335,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end) /* Number */ if (isdigit(c)) { if (c == '0') { - /* Hex or octal -- maybe. */ + /* Hex, octal or binary -- maybe. */ c = tok_nextc(tok); if (c == '.') goto fraction; @@ -1356,6 +1356,30 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end) c = tok_nextc(tok); } while (isxdigit(c)); } + else if (c == 'o' || c == 'O') { + /* Octal */ + c = tok_nextc(tok); + if (c < '0' || c > '8') { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + do { + c = tok_nextc(tok); + } while ('0' <= c && c < '8'); + } + else if (c == 'b' || c == 'B') { + /* Binary */ + c = tok_nextc(tok); + if (c != '0' && c != '1') { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + do { + c = tok_nextc(tok); + } while (c == '0' || c == '1'); + } else { int found_decimal = 0; /* Octal; c is first char of it */ diff --git a/Python/mystrtoul.c b/Python/mystrtoul.c index cb3c012..ebd468c 100644 --- a/Python/mystrtoul.c +++ b/Python/mystrtoul.c @@ -83,9 +83,9 @@ static int digitlimit[] = { ** This is a general purpose routine for converting ** an ascii string to an integer in an arbitrary base. ** Leading white space is ignored. If 'base' is zero -** it looks for a leading 0, 0x or 0X to tell which -** base. If these are absent it defaults to 10. -** Base must be 0 or between 2 and 36 (inclusive). +** it looks for a leading 0, 0b, 0B, 0o, 0O, 0x or 0X +** to tell which base. If these are absent it defaults +** to 10. Base must be 0 or between 2 and 36 (inclusive). ** If 'ptr' is non-NULL it will contain a pointer to ** the end of the scan. ** Errors due to bad pointers will probably result in @@ -104,7 +104,7 @@ PyOS_strtoul(register char *str, char **ptr, int base) /* check for leading 0 or 0x for auto-base or base 16 */ switch (base) { - case 0: /* look for leading 0, 0x or 0X */ + case 0: /* look for leading 0, 0b, 0o or 0x */ if (*str == '0') { ++str; if (*str == 'x' || *str == 'X') { @@ -116,14 +116,62 @@ PyOS_strtoul(register char *str, char **ptr, int base) } ++str; base = 16; - } - else + } else if (*str == 'o' || *str == 'O') { + /* there must be at least one digit after 0o */ + if (_PyLong_DigitValue[Py_CHARMASK(str[1])] >= 8) { + if (ptr) + *ptr = str; + return 0; + } + ++str; + base = 8; + } else if (*str == 'b' || *str == 'B') { + /* there must be at least one digit after 0b */ + if (_PyLong_DigitValue[Py_CHARMASK(str[1])] >= 2) { + if (ptr) + *ptr = str; + return 0; + } + ++str; + base = 2; + } else { base = 8; + } } else base = 10; break; + case 2: /* skip leading 0b or 0B */ + if (*str == '0') { + ++str; + if (*str == 'b' || *str == 'B') { + /* there must be at least one digit after 0b */ + if (_PyLong_DigitValue[Py_CHARMASK(str[1])] >= 2) { + if (ptr) + *ptr = str; + return 0; + } + ++str; + } + } + break; + + case 8: /* skip leading 0o or 0O */ + if (*str == '0') { + ++str; + if (*str == 'o' || *str == 'O') { + /* there must be at least one digit after 0o */ + if (_PyLong_DigitValue[Py_CHARMASK(str[1])] >= 8) { + if (ptr) + *ptr = str; + return 0; + } + ++str; + } + } + break; + case 16: /* skip leading 0x or 0X */ if (*str == '0') { ++str; -- cgit v0.12