From 14404b68d8c5a501a2f5ee6f45494865b7b38276 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Sat, 19 Jan 2008 19:27:05 +0000 Subject: Fix #1679: "0x" was taken as a valid integer literal. Fixes the tokenizer, tokenize.py and int() to reject this. Patches by Malte Helmert. --- Lib/test/test_builtin.py | 5 +++++ Lib/test/test_grammar.py | 2 ++ Lib/tokenize.py | 2 +- Misc/NEWS | 2 ++ Parser/tokenizer.c | 7 +++++++ Python/mystrtoul.c | 43 ++++++++++++++++++++++++++++--------------- 6 files changed, 45 insertions(+), 16 deletions(-) diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index d56e6ff..f7b7c0c 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -816,6 +816,11 @@ class BuiltinTest(unittest.TestCase): self.assertEqual(int('0123', 0), 83) self.assertEqual(int('0x123', 16), 291) + # Bug 1679: "0x" is not a valid hex literal + self.assertRaises(ValueError, int, "0x", 16) + self.assertRaises(ValueError, int, "0x", 0) + + # SF bug 1334662: int(string, base) wrong answers # Various representations of 2**32 evaluated to 0 # rather than 2**32 in previous versions diff --git a/Lib/test/test_grammar.py b/Lib/test/test_grammar.py index 51d77f2..4352275 100644 --- a/Lib/test/test_grammar.py +++ b/Lib/test/test_grammar.py @@ -30,6 +30,8 @@ class TokenTests(unittest.TestCase): self.assertEquals(0xff, 255) self.assertEquals(0377, 255) self.assertEquals(2147483647, 017777777777) + # "0x" is not a valid literal + self.assertRaises(SyntaxError, eval, "0x") from sys import maxint if maxint == 2147483647: self.assertEquals(-2147483647-1, -020000000000) diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 9322e0f..1c93944 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -50,7 +50,7 @@ Comment = r'#[^\r\n]*' Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment) Name = r'[a-zA-Z_]\w*' -Hexnumber = r'0[xX][\da-fA-F]*[lL]?' +Hexnumber = r'0[xX][\da-fA-F]+[lL]?' Octnumber = r'0[0-7]*[lL]?' Decnumber = r'[1-9]\d*[lL]?' Intnumber = group(Hexnumber, Octnumber, Decnumber) diff --git a/Misc/NEWS b/Misc/NEWS index a19a8ad..8a6ca66 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,8 @@ What's New in Python 2.6 alpha 1? Core and builtins ----------------- +- Issue #1679: "0x" was taken as a valid integer literal. + - Issue #1865: Bytes as an alias for str and b"" as an alias "" were added. diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 0015dae..0aaec19 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1332,7 +1332,14 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end) goto imaginary; #endif if (c == 'x' || c == 'X') { + /* Hex */ + c = tok_nextc(tok); + if (!isxdigit(c)) { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } do { c = tok_nextc(tok); } while (isxdigit(c)); diff --git a/Python/mystrtoul.c b/Python/mystrtoul.c index f007057..a02992f 100644 --- a/Python/mystrtoul.c +++ b/Python/mystrtoul.c @@ -112,27 +112,40 @@ PyOS_strtoul(register char *str, char **ptr, int base) /* check for leading 0 or 0x for auto-base or base 16 */ switch (base) { - case 0: /* look for leading 0, 0x or 0X */ - if (*str == '0') { - ++str; - if (*str == 'x' || *str == 'X') { - ++str; - base = 16; + case 0: /* look for leading 0, 0x or 0X */ + if (*str == '0') { + ++str; + if (*str == 'x' || *str == 'X') { + /* there must be at least one digit after 0x */ + if (_PyLong_DigitValue[Py_CHARMASK(str[1])] >= 16) { + if (ptr) + *ptr = str; + return 0; } - else - base = 8; + ++str; + base = 16; } else - base = 10; - break; + base = 8; + } + else + base = 10; + break; - case 16: /* skip leading 0x or 0X */ - if (*str == '0') { + case 16: /* skip leading 0x or 0X */ + if (*str == '0') { + ++str; + if (*str == 'x' || *str == 'X') { + /* there must be at least one digit after 0x */ + if (_PyLong_DigitValue[Py_CHARMASK(str[1])] >= 16) { + if (ptr) + *ptr = str; + return 0; + } ++str; - if (*str == 'x' || *str == 'X') - ++str; } - break; + } + break; } /* catch silly bases */ -- cgit v0.12