From 7311f3b583c739f862ae17c3925985ec3ea244f5 Mon Sep 17 00:00:00 2001 From: Evan Jones Date: Wed, 4 Jan 2012 21:12:27 -0500 Subject: Lexer: include leading whitespace in the comment token. Indented comments are ignored rather than causing errors. --- src/lexer.cc | 116 ++++++++++++++++++++++++++++------------------------ src/lexer.in.cc | 2 +- src/parsers_test.cc | 25 +++++++---- 3 files changed, 82 insertions(+), 61 deletions(-) diff --git a/src/lexer.cc b/src/lexer.cc index 75b91e7..6bcdedc 100644 --- a/src/lexer.cc +++ b/src/lexer.cc @@ -104,39 +104,40 @@ Lexer::Token Lexer::ReadToken() { { char yych; + unsigned int yyaccept = 0; static const unsigned char yybm[] = { - 0, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 0, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 192, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 160, 160, 128, - 160, 160, 160, 160, 160, 160, 160, 160, - 160, 160, 128, 128, 128, 128, 128, 128, - 128, 160, 160, 160, 160, 160, 160, 160, - 160, 160, 160, 160, 160, 160, 160, 160, - 160, 160, 160, 160, 160, 160, 160, 160, - 160, 160, 160, 128, 128, 128, 128, 160, - 128, 160, 160, 160, 160, 160, 160, 160, - 160, 160, 160, 160, 160, 160, 160, 160, - 160, 160, 160, 160, 160, 160, 160, 160, - 160, 160, 160, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, + 0, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 0, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 192, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 96, 96, 64, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 64, 64, 64, 64, 64, 64, + 64, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 64, 64, 64, 64, 96, + 64, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, }; yych = *p; @@ -144,11 +145,12 @@ Lexer::Token Lexer::ReadToken() { if (yych <= ',') { if (yych <= 0x1F) { if (yych <= 0x00) goto yy21; - if (yych == '\n') goto yy4; + if (yych == '\n') goto yy6; goto yy23; } else { - if (yych <= ' ') goto yy6; - if (yych != '#') goto yy23; + if (yych <= ' ') goto yy2; + if (yych == '#') goto yy4; + goto yy23; } } else { if (yych <= ':') { @@ -185,19 +187,21 @@ Lexer::Token Lexer::ReadToken() { } } } +yy2: + yyaccept = 0; yych = *(q = ++p); - if (yych >= 0x01) goto yy62; + goto yy65; yy3: - { token = ERROR; break; } + { token = INDENT; break; } yy4: - ++p; - { token = NEWLINE; break; } + yyaccept = 1; + yych = *(q = ++p); + if (yych >= 0x01) goto yy60; +yy5: + { token = ERROR; break; } yy6: ++p; - yych = *p; - goto yy60; -yy7: - { token = INDENT; break; } + { token = NEWLINE; break; } yy8: ++p; if ((yych = *p) == 'u') goto yy54; @@ -238,7 +242,7 @@ yy21: { token = TEOF; break; } yy23: yych = *++p; - goto yy3; + goto yy5; yy24: ++p; yych = *p; @@ -329,20 +333,26 @@ yy60: if (yybm[0+yych] & 64) { goto yy59; } - goto yy7; -yy61: + if (yych >= 0x01) goto yy62; + p = q; + if (yyaccept <= 0) { + goto yy3; + } else { + goto yy5; + } +yy62: ++p; + { continue; } +yy64: + yyaccept = 0; + q = ++p; yych = *p; -yy62: +yy65: if (yybm[0+yych] & 128) { - goto yy61; + goto yy64; } - if (yych >= 0x01) goto yy64; - p = q; + if (yych == '#') goto yy59; goto yy3; -yy64: - ++p; - { continue; } } } diff --git a/src/lexer.in.cc b/src/lexer.in.cc index c0c197b..954c364 100644 --- a/src/lexer.in.cc +++ b/src/lexer.in.cc @@ -110,7 +110,7 @@ Lexer::Token Lexer::ReadToken() { simple_varname = [a-zA-Z0-9_-]+; varname = [a-zA-Z0-9_.-]+; - "#"[^\000\n]*"\n" { continue; } + [ ]*"#"[^\000\n]*"\n" { continue; } [\n] { token = NEWLINE; break; } [ ]+ { token = INDENT; break; } "build" { token = BUILD; break; } diff --git a/src/parsers_test.cc b/src/parsers_test.cc index e7047da..d6b3117 100644 --- a/src/parsers_test.cc +++ b/src/parsers_test.cc @@ -64,6 +64,24 @@ TEST_F(ParserTest, Rules) { EXPECT_EQ("[cat ][$in][ > ][$out]", rule->command().Serialize()); } +TEST_F(ParserTest, IgnoreIndentedComments) { + ASSERT_NO_FATAL_FAILURE(AssertParse( +" #indented comment\n" +"rule cat\n" +" command = cat $in > $out\n" +" #generator = 1\n" +" restat = 1 # comment\n" +" #comment\n" +"build result: cat in_1.cc in-2.O\n" +" #comment\n")); + + ASSERT_EQ(2u, state.rules_.size()); + const Rule* rule = state.rules_.begin()->second; + EXPECT_EQ("cat", rule->name()); + EXPECT_TRUE(rule->restat()); + EXPECT_FALSE(rule->generator()); +} + TEST_F(ParserTest, Variables) { ASSERT_NO_FATAL_FAILURE(AssertParse( "l = one-letter-test\n" @@ -448,13 +466,6 @@ TEST_F(ParserTest, Errors) { // as we see them, not after we've read them all! EXPECT_EQ("input:4: empty path\n", err); } - - { - ManifestParser parser(NULL, NULL); - string err; - EXPECT_FALSE(parser.ParseTest(" # bad indented comment\n", &err)); - EXPECT_EQ("input:1: unexpected indent\n", err); - } } TEST_F(ParserTest, MultipleOutputs) -- cgit v0.12