From 14404b68d8c5a501a2f5ee6f45494865b7b38276 Mon Sep 17 00:00:00 2001
From: Georg Brandl <georg@python.org>
Date: Sat, 19 Jan 2008 19:27:05 +0000
Subject: Fix #1679: "0x" was taken as a valid integer literal. Fixes the
 tokenizer, tokenize.py and int() to reject this. Patches by Malte Helmert.

---
 Lib/test/test_builtin.py |  5 +++++
 Lib/test/test_grammar.py |  2 ++
 Lib/tokenize.py          |  2 +-
 Misc/NEWS                |  2 ++
 Parser/tokenizer.c       |  7 +++++++
 Python/mystrtoul.c       | 43 ++++++++++++++++++++++++++++---------------
 6 files changed, 45 insertions(+), 16 deletions(-)

diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py
index d56e6ff..f7b7c0c 100644
--- a/Lib/test/test_builtin.py
+++ b/Lib/test/test_builtin.py
@@ -816,6 +816,11 @@ class BuiltinTest(unittest.TestCase):
         self.assertEqual(int('0123', 0), 83)
         self.assertEqual(int('0x123', 16), 291)
 
+        # Bug 1679: "0x" is not a valid hex literal
+        self.assertRaises(ValueError, int, "0x", 16)
+        self.assertRaises(ValueError, int, "0x", 0)
+
+
         # SF bug 1334662: int(string, base) wrong answers
         # Various representations of 2**32 evaluated to 0
         # rather than 2**32 in previous versions
diff --git a/Lib/test/test_grammar.py b/Lib/test/test_grammar.py
index 51d77f2..4352275 100644
--- a/Lib/test/test_grammar.py
+++ b/Lib/test/test_grammar.py
@@ -30,6 +30,8 @@ class TokenTests(unittest.TestCase):
         self.assertEquals(0xff, 255)
         self.assertEquals(0377, 255)
         self.assertEquals(2147483647, 017777777777)
+        # "0x" is not a valid literal
+        self.assertRaises(SyntaxError, eval, "0x")
         from sys import maxint
         if maxint == 2147483647:
             self.assertEquals(-2147483647-1, -020000000000)
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 9322e0f..1c93944 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -50,7 +50,7 @@ Comment = r'#[^\r\n]*'
 Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
 Name = r'[a-zA-Z_]\w*'
 
-Hexnumber = r'0[xX][\da-fA-F]*[lL]?'
+Hexnumber = r'0[xX][\da-fA-F]+[lL]?'
 Octnumber = r'0[0-7]*[lL]?'
 Decnumber = r'[1-9]\d*[lL]?'
 Intnumber = group(Hexnumber, Octnumber, Decnumber)
diff --git a/Misc/NEWS b/Misc/NEWS
index a19a8ad..8a6ca66 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,8 @@ What's New in Python 2.6 alpha 1?
 Core and builtins
 -----------------
 
+- Issue #1679: "0x" was taken as a valid integer literal.
+
 - Issue #1865: Bytes as an alias for str and b"" as an alias "" were
   added.
 
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 0015dae..0aaec19 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1332,7 +1332,14 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
 				goto imaginary;
 #endif
 			if (c == 'x' || c == 'X') {
+
 				/* Hex */
+				c = tok_nextc(tok);
+				if (!isxdigit(c)) {
+					tok->done = E_TOKEN;
+					tok_backup(tok, c);
+					return ERRORTOKEN;
+				}
 				do {
 					c = tok_nextc(tok);
 				} while (isxdigit(c));
diff --git a/Python/mystrtoul.c b/Python/mystrtoul.c
index f007057..a02992f 100644
--- a/Python/mystrtoul.c
+++ b/Python/mystrtoul.c
@@ -112,27 +112,40 @@ PyOS_strtoul(register char *str, char **ptr, int base)
 
 	/* check for leading 0 or 0x for auto-base or base 16 */
 	switch (base) {
-		case 0:		/* look for leading 0, 0x or 0X */
-			if (*str == '0') {
-				++str;
-				if (*str == 'x' || *str == 'X') {
-					++str;
-					base = 16;
+	case 0:		/* look for leading 0, 0x or 0X */
+		if (*str == '0') {
+			++str;
+			if (*str == 'x' || *str == 'X') {
+				/* there must be at least one digit after 0x */
+				if (_PyLong_DigitValue[Py_CHARMASK(str[1])] >= 16) {
+					if (ptr)
+						*ptr = str;
+					return 0;
 				}
-				else
-					base = 8;
+				++str;
+				base = 16;
 			}
 			else
-				base = 10;
-			break;
+				base = 8;
+		}
+		else
+			base = 10;
+		break;
 
-		case 16:	/* skip leading 0x or 0X */
-			if (*str == '0') {
+	case 16:	/* skip leading 0x or 0X */
+		if (*str == '0') {
+			++str;
+			if (*str == 'x' || *str == 'X') {
+				/* there must be at least one digit after 0x */
+				if (_PyLong_DigitValue[Py_CHARMASK(str[1])] >= 16) {
+					if (ptr)
+						*ptr = str;
+					return 0;
+				}
 				++str;
-				if (*str == 'x' || *str == 'X')
-					++str;
 			}
-			break;
+		}
+		break;
 	}
 
 	/* catch silly bases */
-- 
cgit v0.12