summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/ref/ref2.tex14
-rw-r--r--Lib/test/test_compile.py44
-rw-r--r--Lib/tokenize.py4
-rw-r--r--Misc/NEWS12
-rw-r--r--Parser/tokenizer.c25
5 files changed, 84 insertions, 15 deletions
diff --git a/Doc/ref/ref2.tex b/Doc/ref/ref2.tex
index 3ccfaef..8b96055 100644
--- a/Doc/ref/ref2.tex
+++ b/Doc/ref/ref2.tex
@@ -517,26 +517,26 @@ definitions:
\production{pointfloat}
{[\token{intpart}] \token{fraction} | \token{intpart} "."}
\production{exponentfloat}
- {(\token{nonzerodigit} \token{digit}* | \token{pointfloat})
+ {(\token{intpart} | \token{pointfloat})
\token{exponent}}
\production{intpart}
- {\token{nonzerodigit} \token{digit}* | "0"}
+ {\token{digit}+}
\production{fraction}
{"." \token{digit}+}
\production{exponent}
{("e" | "E") ["+" | "-"] \token{digit}+}
\end{productionlist}
-Note that the integer part of a floating point number cannot look like
-an octal integer, though the exponent may look like an octal literal
-but will always be interpreted using radix 10. For example,
-\samp{1e010} is legal, while \samp{07.1} is a syntax error.
+Note that the integer and exponent parts of floating point numbers
+can look like octal integers, but are interpreted using radix 10. For
+example, \samp{077e010} is legal, and denotes the same number
+as \samp{77e10}.
The allowed range of floating point literals is
implementation-dependent.
Some examples of floating point literals:
\begin{verbatim}
-3.14 10. .001 1e100 3.14e-10
+3.14 10. .001 1e100 3.14e-10 0e0
\end{verbatim}
Note that numeric literals do not include a sign; a phrase like
diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py
index 0276ba6..9f20ba1 100644
--- a/Lib/test/test_compile.py
+++ b/Lib/test/test_compile.py
@@ -65,3 +65,47 @@ expect_error("2e")
expect_error("2.0e+")
expect_error("1e-")
expect_error("3-4e/21")
+
+
+if verbose:
+ print "testing literals with leading zeroes"
+
+def expect_same(test_source, expected):
+ got = eval(test_source)
+ if got != expected:
+ raise TestFailed("eval(%r) gave %r, but expected %r" %
+ (test_source, got, expected))
+
+expect_error("077787")
+expect_error("0xj")
+expect_error("0x.")
+expect_error("0e")
+expect_same("0777", 511)
+expect_same("0777L", 511)
+expect_same("000777", 511)
+expect_same("0xff", 255)
+expect_same("0xffL", 255)
+expect_same("0XfF", 255)
+expect_same("0777.", 777)
+expect_same("0777.0", 777)
+expect_same("000000000000000000000000000000000000000000000000000777e0", 777)
+expect_same("0777e1", 7770)
+expect_same("0e0", 0)
+expect_same("0000E-012", 0)
+expect_same("09.5", 9.5)
+expect_same("0777j", 777j)
+expect_same("00j", 0j)
+expect_same("00.0", 0)
+expect_same("0e3", 0)
+expect_same("090000000000000.", 90000000000000.)
+expect_same("090000000000000.0000000000000000000000", 90000000000000.)
+expect_same("090000000000000e0", 90000000000000.)
+expect_same("090000000000000e-0", 90000000000000.)
+expect_same("090000000000000j", 90000000000000j)
+expect_error("090000000000000") # plain octal literal w/ decimal digit
+expect_error("080000000000000") # plain octal literal w/ decimal digit
+expect_error("000000000000009") # plain octal literal w/ decimal digit
+expect_error("000000000000008") # plain octal literal w/ decimal digit
+expect_same("000000000000007", 7)
+expect_same("000000000000008.", 8.)
+expect_same("000000000000009.", 9.)
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index b952b36..da2bcd2 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -56,9 +56,9 @@ Decnumber = r'[1-9]\d*[lL]?'
Intnumber = group(Hexnumber, Octnumber, Decnumber)
Exponent = r'[eE][-+]?\d+'
Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent)
-Expfloat = r'[1-9]\d*' + Exponent
+Expfloat = r'\d+' + Exponent
Floatnumber = group(Pointfloat, Expfloat)
-Imagnumber = group(r'0[jJ]', r'[1-9]\d*[jJ]', Floatnumber + r'[jJ]')
+Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]')
Number = group(Imagnumber, Floatnumber, Intnumber)
# Tail end of ' string.
diff --git a/Misc/NEWS b/Misc/NEWS
index 7ea6b93..685d685 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -3,6 +3,12 @@ What's New in Python 2.2a3?
Core
++ The syntax of floating-point and imaginary literals has been
+ liberalized, to allow leading zeroes. Examples of literals now
+ legal that were SyntaxErrors before:
+
+ 00.0 0e3 0100j 07.5 00000000000000000008.
+
+ An old tokenizer bug allowed floating point literals with an incomplete
exponent, such as 1e and 3.1e-. Such literals now raise SyntaxError.
@@ -27,13 +33,13 @@ API
module:
- rename Py_TPFLAGS_GC to PyTPFLAGS_HAVE_GC
-
+
- use PyObject_GC_New or PyObject_GC_NewVar to allocate objects and
PyObject_GC_Del to deallocate them
-
+
- rename PyObject_GC_Init to PyObject_GC_Track and PyObject_GC_Fini
to PyObject_GC_UnTrack
-
+
- remove PyGC_HEAD_SIZE from object size calculations
- remove calls to PyObject_AS_GC and PyObject_FROM_GC
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 7270629..324d9b6 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -722,7 +722,7 @@ PyTokenizer_Get(register struct tok_state *tok, char **p_start,
/* Number */
if (isdigit(c)) {
if (c == '0') {
- /* Hex or octal */
+ /* Hex or octal -- maybe. */
c = tok_nextc(tok);
if (c == '.')
goto fraction;
@@ -737,13 +737,31 @@ PyTokenizer_Get(register struct tok_state *tok, char **p_start,
} while (isxdigit(c));
}
else {
- /* XXX This is broken! E.g.,
- 09.9 should be accepted as float! */
+ int found_decimal = 0;
/* Octal; c is first char of it */
/* There's no 'isoctdigit' macro, sigh */
while ('0' <= c && c < '8') {
c = tok_nextc(tok);
}
+ if (isdigit(c)) {
+ found_decimal = 1;
+ do {
+ c = tok_nextc(tok);
+ } while (isdigit(c));
+ }
+ if (c == '.')
+ goto fraction;
+ else if (c == 'e' || c == 'E')
+ goto exponent;
+#ifndef WITHOUT_COMPLEX
+ else if (c == 'j' || c == 'J')
+ goto imaginary;
+#endif
+ else if (found_decimal) {
+ tok->done = E_TOKEN;
+ tok_backup(tok, c);
+ return ERRORTOKEN;
+ }
}
if (c == 'l' || c == 'L')
c = tok_nextc(tok);
@@ -765,6 +783,7 @@ PyTokenizer_Get(register struct tok_state *tok, char **p_start,
} while (isdigit(c));
}
if (c == 'e' || c == 'E') {
+ exponent:
/* Exponent part */
c = tok_nextc(tok);
if (c == '+' || c == '-')