diff options
-rw-r--r-- | Doc/ref/ref2.tex | 14 | ||||
-rw-r--r-- | Lib/test/test_compile.py | 44 | ||||
-rw-r--r-- | Lib/tokenize.py | 4 | ||||
-rw-r--r-- | Misc/NEWS | 12 | ||||
-rw-r--r-- | Parser/tokenizer.c | 25 |
5 files changed, 84 insertions, 15 deletions
diff --git a/Doc/ref/ref2.tex b/Doc/ref/ref2.tex index 3ccfaef..8b96055 100644 --- a/Doc/ref/ref2.tex +++ b/Doc/ref/ref2.tex @@ -517,26 +517,26 @@ definitions: \production{pointfloat} {[\token{intpart}] \token{fraction} | \token{intpart} "."} \production{exponentfloat} - {(\token{nonzerodigit} \token{digit}* | \token{pointfloat}) + {(\token{intpart} | \token{pointfloat}) \token{exponent}} \production{intpart} - {\token{nonzerodigit} \token{digit}* | "0"} + {\token{digit}+} \production{fraction} {"." \token{digit}+} \production{exponent} {("e" | "E") ["+" | "-"] \token{digit}+} \end{productionlist} -Note that the integer part of a floating point number cannot look like -an octal integer, though the exponent may look like an octal literal -but will always be interpreted using radix 10. For example, -\samp{1e010} is legal, while \samp{07.1} is a syntax error. +Note that the integer and exponent parts of floating point numbers +can look like octal integers, but are interpreted using radix 10. For +example, \samp{077e010} is legal, and denotes the same number +as \samp{77e10}. The allowed range of floating point literals is implementation-dependent. Some examples of floating point literals: \begin{verbatim} -3.14 10. .001 1e100 3.14e-10 +3.14 10. .001 1e100 3.14e-10 0e0 \end{verbatim} Note that numeric literals do not include a sign; a phrase like diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 0276ba6..9f20ba1 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -65,3 +65,47 @@ expect_error("2e") expect_error("2.0e+") expect_error("1e-") expect_error("3-4e/21") + + +if verbose: + print "testing literals with leading zeroes" + +def expect_same(test_source, expected): + got = eval(test_source) + if got != expected: + raise TestFailed("eval(%r) gave %r, but expected %r" % + (test_source, got, expected)) + +expect_error("077787") +expect_error("0xj") +expect_error("0x.") +expect_error("0e") +expect_same("0777", 511) +expect_same("0777L", 511) +expect_same("000777", 511) +expect_same("0xff", 255) +expect_same("0xffL", 255) +expect_same("0XfF", 255) +expect_same("0777.", 777) +expect_same("0777.0", 777) +expect_same("000000000000000000000000000000000000000000000000000777e0", 777) +expect_same("0777e1", 7770) +expect_same("0e0", 0) +expect_same("0000E-012", 0) +expect_same("09.5", 9.5) +expect_same("0777j", 777j) +expect_same("00j", 0j) +expect_same("00.0", 0) +expect_same("0e3", 0) +expect_same("090000000000000.", 90000000000000.) +expect_same("090000000000000.0000000000000000000000", 90000000000000.) +expect_same("090000000000000e0", 90000000000000.) +expect_same("090000000000000e-0", 90000000000000.) +expect_same("090000000000000j", 90000000000000j) +expect_error("090000000000000") # plain octal literal w/ decimal digit +expect_error("080000000000000") # plain octal literal w/ decimal digit +expect_error("000000000000009") # plain octal literal w/ decimal digit +expect_error("000000000000008") # plain octal literal w/ decimal digit +expect_same("000000000000007", 7) +expect_same("000000000000008.", 8.) +expect_same("000000000000009.", 9.) diff --git a/Lib/tokenize.py b/Lib/tokenize.py index b952b36..da2bcd2 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -56,9 +56,9 @@ Decnumber = r'[1-9]\d*[lL]?' Intnumber = group(Hexnumber, Octnumber, Decnumber) Exponent = r'[eE][-+]?\d+' Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent) -Expfloat = r'[1-9]\d*' + Exponent +Expfloat = r'\d+' + Exponent Floatnumber = group(Pointfloat, Expfloat) -Imagnumber = group(r'0[jJ]', r'[1-9]\d*[jJ]', Floatnumber + r'[jJ]') +Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]') Number = group(Imagnumber, Floatnumber, Intnumber) # Tail end of ' string. @@ -3,6 +3,12 @@ What's New in Python 2.2a3? Core ++ The syntax of floating-point and imaginary literals has been + liberalized, to allow leading zeroes. Examples of literals now + legal that were SyntaxErrors before: + + 00.0 0e3 0100j 07.5 00000000000000000008. + + An old tokenizer bug allowed floating point literals with an incomplete exponent, such as 1e and 3.1e-. Such literals now raise SyntaxError. @@ -27,13 +33,13 @@ API module: - rename Py_TPFLAGS_GC to PyTPFLAGS_HAVE_GC - + - use PyObject_GC_New or PyObject_GC_NewVar to allocate objects and PyObject_GC_Del to deallocate them - + - rename PyObject_GC_Init to PyObject_GC_Track and PyObject_GC_Fini to PyObject_GC_UnTrack - + - remove PyGC_HEAD_SIZE from object size calculations - remove calls to PyObject_AS_GC and PyObject_FROM_GC diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 7270629..324d9b6 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -722,7 +722,7 @@ PyTokenizer_Get(register struct tok_state *tok, char **p_start, /* Number */ if (isdigit(c)) { if (c == '0') { - /* Hex or octal */ + /* Hex or octal -- maybe. */ c = tok_nextc(tok); if (c == '.') goto fraction; @@ -737,13 +737,31 @@ PyTokenizer_Get(register struct tok_state *tok, char **p_start, } while (isxdigit(c)); } else { - /* XXX This is broken! E.g., - 09.9 should be accepted as float! */ + int found_decimal = 0; /* Octal; c is first char of it */ /* There's no 'isoctdigit' macro, sigh */ while ('0' <= c && c < '8') { c = tok_nextc(tok); } + if (isdigit(c)) { + found_decimal = 1; + do { + c = tok_nextc(tok); + } while (isdigit(c)); + } + if (c == '.') + goto fraction; + else if (c == 'e' || c == 'E') + goto exponent; +#ifndef WITHOUT_COMPLEX + else if (c == 'j' || c == 'J') + goto imaginary; +#endif + else if (found_decimal) { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } } if (c == 'l' || c == 'L') c = tok_nextc(tok); @@ -765,6 +783,7 @@ PyTokenizer_Get(register struct tok_state *tok, char **p_start, } while (isdigit(c)); } if (c == 'e' || c == 'E') { + exponent: /* Exponent part */ c = tok_nextc(tok); if (c == '+' || c == '-') |