summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLysandros Nikolaou <lisandrosnik@gmail.com>2023-10-12 07:34:35 (GMT)
committerGitHub <noreply@github.com>2023-10-12 07:34:35 (GMT)
commit17d65547df55eaefe077c45242a7f2d175961dfd (patch)
tree008f24ce995cd5e0a88f35bba06c8ea7ad76f19b
parent23645420dcc4f3b7b2ec4045ef6ac126c37a98c2 (diff)
downloadcpython-17d65547df55eaefe077c45242a7f2d175961dfd.zip
cpython-17d65547df55eaefe077c45242a7f2d175961dfd.tar.gz
cpython-17d65547df55eaefe077c45242a7f2d175961dfd.tar.bz2
gh-104169: Fix test_peg_generator after tokenizer refactoring (#110727)
* Fix test_peg_generator after tokenizer refactoring * Remove references to tokenizer.c in comments etc.
-rw-r--r--Lib/test/test_exceptions.py2
-rw-r--r--Lib/test/test_source_encoding.py2
-rw-r--r--Lib/test/test_tokenize.py4
-rw-r--r--Lib/tokenize.py2
-rw-r--r--Modules/config.c.in2
-rw-r--r--Parser/myreadline.c4
-rw-r--r--Parser/string_parser.c5
-rw-r--r--Python/traceback.c2
-rw-r--r--Tools/c-analyzer/TODO4
-rw-r--r--Tools/peg_generator/pegen/build.py11
10 files changed, 24 insertions, 14 deletions
diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py
index 106baf9..05a89e7 100644
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@@ -253,7 +253,7 @@ class ExceptionTests(unittest.TestCase):
check('try:\n pass\nexcept*:\n pass', 3, 8)
check('try:\n pass\nexcept*:\n pass\nexcept* ValueError:\n pass', 3, 8)
- # Errors thrown by tokenizer.c
+ # Errors thrown by the tokenizer
check('(0x+1)', 1, 3)
check('x = 0xI', 1, 6)
check('0010 + 2', 1, 1)
diff --git a/Lib/test/test_source_encoding.py b/Lib/test/test_source_encoding.py
index 2787137..61b0077 100644
--- a/Lib/test/test_source_encoding.py
+++ b/Lib/test/test_source_encoding.py
@@ -255,7 +255,7 @@ class UTF8ValidatorTest(unittest.TestCase):
def test_invalid_utf8(self):
# This is a port of test_utf8_decode_invalid_sequences in
# test_unicode.py to exercise the separate utf8 validator in
- # Parser/tokenizer.c used when reading source files.
+ # Parser/tokenizer/helpers.c used when reading source files.
# That file is written using low-level C file I/O, so the only way to
# test it is to write actual files to disk.
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 06517ac..41b9ebe 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1435,7 +1435,7 @@ class TestDetectEncoding(TestCase):
self.assertEqual(consumed_lines, expected)
def test_latin1_normalization(self):
- # See get_normal_name() in tokenizer.c.
+ # See get_normal_name() in Parser/tokenizer/helpers.c.
encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",
"iso-8859-1-unix", "iso-latin-1-mac")
for encoding in encodings:
@@ -1460,7 +1460,7 @@ class TestDetectEncoding(TestCase):
def test_utf8_normalization(self):
- # See get_normal_name() in tokenizer.c.
+ # See get_normal_name() in Parser/tokenizer/helpers.c.
encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
for encoding in encodings:
for rep in ("-", "_"):
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index c21876f..0ab1893 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -298,7 +298,7 @@ def untokenize(iterable):
def _get_normal_name(orig_enc):
- """Imitates get_normal_name in tokenizer.c."""
+ """Imitates get_normal_name in Parser/tokenizer/helpers.c."""
# Only care about the first 12 characters.
enc = orig_enc[:12].lower().replace("_", "-")
if enc == "utf-8" or enc.startswith("utf-8-"):
diff --git a/Modules/config.c.in b/Modules/config.c.in
index 6081f95..53b4fb2 100644
--- a/Modules/config.c.in
+++ b/Modules/config.c.in
@@ -45,7 +45,7 @@ struct _inittab _PyImport_Inittab[] = {
/* This lives in Python/Python-ast.c */
{"_ast", PyInit__ast},
- /* This lives in Python/Python-tokenizer.c */
+ /* This lives in Python/Python-tokenize.c */
{"_tokenize", PyInit__tokenize},
/* These entries are here for sys.builtin_module_names */
diff --git a/Parser/myreadline.c b/Parser/myreadline.c
index 719a178..1825665 100644
--- a/Parser/myreadline.c
+++ b/Parser/myreadline.c
@@ -1,5 +1,5 @@
-/* Readline interface for tokenizer.c and [raw_]input() in bltinmodule.c.
+/* Readline interface for the tokenizer and [raw_]input() in bltinmodule.c.
By default, or when stdin is not a tty device, we have a super
simple my_readline function using fgets.
Optionally, we can use the GNU readline library.
@@ -364,7 +364,7 @@ PyOS_StdioReadline(FILE *sys_stdin, FILE *sys_stdout, const char *prompt)
char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, const char *) = NULL;
-/* Interface used by tokenizer.c and bltinmodule.c */
+/* Interface used by file_tokenizer.c and bltinmodule.c */
char *
PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, const char *prompt)
diff --git a/Parser/string_parser.c b/Parser/string_parser.c
index c5f4218..f1e0277 100644
--- a/Parser/string_parser.c
+++ b/Parser/string_parser.c
@@ -14,8 +14,9 @@ static int
warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token *t)
{
unsigned char c = *first_invalid_escape;
- if ((t->type == FSTRING_MIDDLE || t->type == FSTRING_END) && (c == '{' || c == '}')) { // in this case the tokenizer has already emitted a warning,
- // see tokenizer.c:warn_invalid_escape_sequence
+ if ((t->type == FSTRING_MIDDLE || t->type == FSTRING_END) && (c == '{' || c == '}')) {
+ // in this case the tokenizer has already emitted a warning,
+ // see Parser/tokenizer/helpers.c:warn_invalid_escape_sequence
return 0;
}
diff --git a/Python/traceback.c b/Python/traceback.c
index 5de1bff..f786144 100644
--- a/Python/traceback.c
+++ b/Python/traceback.c
@@ -32,7 +32,7 @@
#define MAX_FRAME_DEPTH 100
#define MAX_NTHREADS 100
-/* Function from Parser/tokenizer.c */
+/* Function from Parser/tokenizer/file_tokenizer.c */
extern char* _PyTokenizer_FindEncodingFilename(int, PyObject *);
/*[clinic input]
diff --git a/Tools/c-analyzer/TODO b/Tools/c-analyzer/TODO
index 27a5358..3d59953 100644
--- a/Tools/c-analyzer/TODO
+++ b/Tools/c-analyzer/TODO
@@ -428,8 +428,8 @@ Objects/typeobject.c:type_new():PyId___slots__ _Py_IDENTIFIER(
Objects/unicodeobject.c:unicodeiter_reduce():PyId_iter _Py_IDENTIFIER(iter)
Objects/weakrefobject.c:proxy_bytes():PyId___bytes__ _Py_IDENTIFIER(__bytes__)
Objects/weakrefobject.c:weakref_repr():PyId___name__ _Py_IDENTIFIER(__name__)
-Parser/tokenizer.c:fp_setreadl():PyId_open _Py_IDENTIFIER(open)
-Parser/tokenizer.c:fp_setreadl():PyId_readline _Py_IDENTIFIER(readline)
+Parser/tokenizer/file_tokenizer.c:fp_setreadl():PyId_open _Py_IDENTIFIER(open)
+Parser/tokenizer/file_tokenizer.c:fp_setreadl():PyId_readline _Py_IDENTIFIER(readline)
Python/Python-ast.c:ast_type_reduce():PyId___dict__ _Py_IDENTIFIER(__dict__)
Python/Python-ast.c:make_type():PyId___module__ _Py_IDENTIFIER(__module__)
Python/_warnings.c:PyId_stderr _Py_IDENTIFIER(stderr)
diff --git a/Tools/peg_generator/pegen/build.py b/Tools/peg_generator/pegen/build.py
index 6b04ae9..30bfb31 100644
--- a/Tools/peg_generator/pegen/build.py
+++ b/Tools/peg_generator/pegen/build.py
@@ -123,7 +123,14 @@ def compile_c_extension(
common_sources = [
str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"),
str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"),
- str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"),
+ str(MOD_DIR.parent.parent.parent / "Parser" / "lexer" / "lexer.c"),
+ str(MOD_DIR.parent.parent.parent / "Parser" / "lexer" / "state.c"),
+ str(MOD_DIR.parent.parent.parent / "Parser" / "lexer" / "buffer.c"),
+ str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer" / "string_tokenizer.c"),
+ str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer" / "file_tokenizer.c"),
+ str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer" / "utf8_tokenizer.c"),
+ str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer" / "readline_tokenizer.c"),
+ str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer" / "helpers.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "pegen_errors.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "action_helpers.c"),
@@ -133,6 +140,8 @@ def compile_c_extension(
include_dirs = [
str(MOD_DIR.parent.parent.parent / "Include" / "internal"),
str(MOD_DIR.parent.parent.parent / "Parser"),
+ str(MOD_DIR.parent.parent.parent / "Parser" / "lexer"),
+ str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer"),
]
extension = Extension(
extension_name,