From 4a4f9d40e178a9a9e88f4cd502d2be49bf7938d8 Mon Sep 17 00:00:00 2001 From: Brad King Date: Mon, 19 Oct 2015 14:25:03 -0400 Subject: Fix depfile parser handling of multiple rules Currently we handle Makefile rules of the form: out: in1 in2 in3 and the form: out: in1 \ in2 \ in3 Teach the depfile parser to handle the additional form: out: in1 out: in2 out: in3 This is also valid Makefile syntax and is the depfile format generated by the Intel Compiler for Windows. Note that the `gcc -MP` option adds empty phony rules to the generated Makefile fragment: out: in1 in2 in3 in1: in2: in3: Previously we tolerated these because they were treated as inputs, which was accidentally correct. Instead we must now tolerate these by ignoring targets for which no dependencies are specified. --- src/depfile_parser.cc | 104 +++++++++++++++++++++++++++----------- src/depfile_parser.in.cc | 31 ++++++++++-- src/depfile_parser_test.cc | 123 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 225 insertions(+), 33 deletions(-) diff --git a/src/depfile_parser.cc b/src/depfile_parser.cc index 2ad2a00..7724eed 100644 --- a/src/depfile_parser.cc +++ b/src/depfile_parser.cc @@ -35,8 +35,11 @@ bool DepfileParser::Parse(string* content, string* err) { // parsing_targets: whether we are parsing targets or dependencies. char* in = &(*content)[0]; char* end = in + content->size(); + bool have_target = false; + bool have_secondary_target_on_this_rule = false; bool parsing_targets = true; while (in < end) { + bool have_newline = false; // out: current output point (typically same as in, but can fall behind // as we de-escape backslashes). char* out = in; @@ -45,6 +48,7 @@ bool DepfileParser::Parse(string* content, string* err) { for (;;) { // start: beginning of the current parsed span. const char* start = in; + char* yymarker = NULL; { unsigned char yych; @@ -84,17 +88,25 @@ bool DepfileParser::Parse(string* content, string* err) { }; yych = *in; if (yybm[0+yych] & 128) { - goto yy6; - } - if (yych <= '$') { - if (yych <= 0x00) goto yy2; - if (yych <= '#') goto yy4; goto yy9; + } + if (yych <= '\r') { + if (yych <= '\t') { + if (yych >= 0x01) goto yy4; + } else { + if (yych <= '\n') goto yy6; + if (yych <= '\f') goto yy4; + goto yy8; + } } else { - if (yych == '\\') goto yy10; - goto yy4; + if (yych <= '$') { + if (yych <= '#') goto yy4; + goto yy12; + } else { + if (yych == '\\') goto yy13; + goto yy4; + } } -yy2: ++in; { break; @@ -108,9 +120,20 @@ yy5: break; } yy6: + ++in; + { + // A newline ends the current file name and the current rule. + have_newline = true; + break; + } +yy8: + yych = *++in; + if (yych == '\n') goto yy6; + goto yy5; +yy9: yych = *++in; if (yybm[0+yych] & 128) { - goto yy6; + goto yy9; } { // Got a span of plain text. @@ -121,41 +144,41 @@ yy6: out += len; continue; } -yy9: +yy12: yych = *++in; - if (yych == '$') goto yy11; + if (yych == '$') goto yy14; goto yy5; -yy10: - yych = *++in; +yy13: + yych = *(yymarker = ++in); if (yych <= '"') { if (yych <= '\f') { if (yych <= 0x00) goto yy5; - if (yych == '\n') goto yy5; - goto yy13; + if (yych == '\n') goto yy18; + goto yy16; } else { - if (yych <= '\r') goto yy5; - if (yych == ' ') goto yy15; - goto yy13; + if (yych <= '\r') goto yy20; + if (yych == ' ') goto yy22; + goto yy16; } } else { if (yych <= 'Z') { - if (yych <= '#') goto yy15; - if (yych == '*') goto yy15; - goto yy13; + if (yych <= '#') goto yy22; + if (yych == '*') goto yy22; + goto yy16; } else { - if (yych <= ']') goto yy15; - if (yych == '|') goto yy15; - goto yy13; + if (yych <= ']') goto yy22; + if (yych == '|') goto yy22; + goto yy16; } } -yy11: +yy14: ++in; { // De-escape dollar character. *out++ = '$'; continue; } -yy13: +yy16: ++in; { // Let backslash before other characters through verbatim. @@ -163,7 +186,18 @@ yy13: *out++ = yych; continue; } -yy15: +yy18: + ++in; + { + // A line continuation ends the current file name. + break; + } +yy20: + yych = *++in; + if (yych == '\n') goto yy18; + in = yymarker; + goto yy5; +yy22: ++in; { // De-escape backslashed character. @@ -179,20 +213,30 @@ yy15: if (len > 0 && filename[len - 1] == ':') { len--; // Strip off trailing colon, if any. parsing_targets = false; + have_target = true; } if (len > 0) { if (is_dependency) { + if (have_secondary_target_on_this_rule) { + *err = "depfile has multiple output paths"; + return false; + } ins_.push_back(StringPiece(filename, len)); } else if (!out_.str_) { out_ = StringPiece(filename, len); } else if (out_ != StringPiece(filename, len)) { - *err = "depfile has multiple output paths"; - return false; + have_secondary_target_on_this_rule = true; } } + + if (have_newline) { + // A newline ends a rule so the next filename will be a new target. + parsing_targets = true; + have_secondary_target_on_this_rule = false; + } } - if (parsing_targets) { + if (!have_target) { *err = "expected ':' in depfile"; return false; } diff --git a/src/depfile_parser.in.cc b/src/depfile_parser.in.cc index 4df8ce2..d299ee2 100644 --- a/src/depfile_parser.in.cc +++ b/src/depfile_parser.in.cc @@ -34,8 +34,11 @@ bool DepfileParser::Parse(string* content, string* err) { // parsing_targets: whether we are parsing targets or dependencies. char* in = &(*content)[0]; char* end = in + content->size(); + bool have_target = false; + bool have_secondary_target_on_this_rule = false; bool parsing_targets = true; while (in < end) { + bool have_newline = false; // out: current output point (typically same as in, but can fall behind // as we de-escape backslashes). char* out = in; @@ -44,10 +47,12 @@ bool DepfileParser::Parse(string* content, string* err) { for (;;) { // start: beginning of the current parsed span. const char* start = in; + char* yymarker = NULL; /*!re2c re2c:define:YYCTYPE = "unsigned char"; re2c:define:YYCURSOR = in; re2c:define:YYLIMIT = end; + re2c:define:YYMARKER = yymarker; re2c:yyfill:enable = 0; @@ -56,6 +61,7 @@ bool DepfileParser::Parse(string* content, string* err) { nul = "\000"; escape = [ \\#*[|\]]; + newline = '\r'?'\n'; '\\' escape { // De-escape backslashed character. @@ -85,6 +91,15 @@ bool DepfileParser::Parse(string* content, string* err) { nul { break; } + '\\' newline { + // A line continuation ends the current file name. + break; + } + newline { + // A newline ends the current file name and the current rule. + have_newline = true; + break; + } [^] { // For any other character (e.g. whitespace), swallow it here, // allowing the outer logic to loop around again. @@ -98,20 +113,30 @@ bool DepfileParser::Parse(string* content, string* err) { if (len > 0 && filename[len - 1] == ':') { len--; // Strip off trailing colon, if any. parsing_targets = false; + have_target = true; } if (len > 0) { if (is_dependency) { + if (have_secondary_target_on_this_rule) { + *err = "depfile has multiple output paths"; + return false; + } ins_.push_back(StringPiece(filename, len)); } else if (!out_.str_) { out_ = StringPiece(filename, len); } else if (out_ != StringPiece(filename, len)) { - *err = "depfile has multiple output paths"; - return false; + have_secondary_target_on_this_rule = true; } } + + if (have_newline) { + // A newline ends a rule so the next filename will be a new target. + parsing_targets = true; + have_secondary_target_on_this_rule = false; + } } - if (parsing_targets) { + if (!have_target) { *err = "expected ':' in depfile"; return false; } diff --git a/src/depfile_parser_test.cc b/src/depfile_parser_test.cc index e3eec07..70e4029 100644 --- a/src/depfile_parser_test.cc +++ b/src/depfile_parser_test.cc @@ -158,3 +158,126 @@ TEST_F(DepfileParserTest, RejectMultipleDifferentOutputs) { EXPECT_FALSE(Parse("foo bar: x y z", &err)); ASSERT_EQ("depfile has multiple output paths", err); } + +TEST_F(DepfileParserTest, MultipleEmptyRules) { + string err; + EXPECT_TRUE(Parse("foo: x\n" + "foo: \n" + "foo:\n", &err)); + ASSERT_EQ("foo", parser_.out_.AsString()); + ASSERT_EQ(1u, parser_.ins_.size()); + EXPECT_EQ("x", parser_.ins_[0].AsString()); +} + +TEST_F(DepfileParserTest, UnifyMultipleRulesLF) { + string err; + EXPECT_TRUE(Parse("foo: x\n" + "foo: y\n" + "foo \\\n" + "foo: z\n", &err)); + ASSERT_EQ("foo", parser_.out_.AsString()); + ASSERT_EQ(3u, parser_.ins_.size()); + EXPECT_EQ("x", parser_.ins_[0].AsString()); + EXPECT_EQ("y", parser_.ins_[1].AsString()); + EXPECT_EQ("z", parser_.ins_[2].AsString()); +} + +TEST_F(DepfileParserTest, UnifyMultipleRulesCRLF) { + string err; + EXPECT_TRUE(Parse("foo: x\r\n" + "foo: y\r\n" + "foo \\\r\n" + "foo: z\r\n", &err)); + ASSERT_EQ("foo", parser_.out_.AsString()); + ASSERT_EQ(3u, parser_.ins_.size()); + EXPECT_EQ("x", parser_.ins_[0].AsString()); + EXPECT_EQ("y", parser_.ins_[1].AsString()); + EXPECT_EQ("z", parser_.ins_[2].AsString()); +} + +TEST_F(DepfileParserTest, UnifyMixedRulesLF) { + string err; + EXPECT_TRUE(Parse("foo: x\\\n" + " y\n" + "foo \\\n" + "foo: z\n", &err)); + ASSERT_EQ("foo", parser_.out_.AsString()); + ASSERT_EQ(3u, parser_.ins_.size()); + EXPECT_EQ("x", parser_.ins_[0].AsString()); + EXPECT_EQ("y", parser_.ins_[1].AsString()); + EXPECT_EQ("z", parser_.ins_[2].AsString()); +} + +TEST_F(DepfileParserTest, UnifyMixedRulesCRLF) { + string err; + EXPECT_TRUE(Parse("foo: x\\\r\n" + " y\r\n" + "foo \\\r\n" + "foo: z\r\n", &err)); + ASSERT_EQ("foo", parser_.out_.AsString()); + ASSERT_EQ(3u, parser_.ins_.size()); + EXPECT_EQ("x", parser_.ins_[0].AsString()); + EXPECT_EQ("y", parser_.ins_[1].AsString()); + EXPECT_EQ("z", parser_.ins_[2].AsString()); +} + +TEST_F(DepfileParserTest, IndentedRulesLF) { + string err; + EXPECT_TRUE(Parse(" foo: x\n" + " foo: y\n" + " foo: z\n", &err)); + ASSERT_EQ("foo", parser_.out_.AsString()); + ASSERT_EQ(3u, parser_.ins_.size()); + EXPECT_EQ("x", parser_.ins_[0].AsString()); + EXPECT_EQ("y", parser_.ins_[1].AsString()); + EXPECT_EQ("z", parser_.ins_[2].AsString()); +} + +TEST_F(DepfileParserTest, IndentedRulesCRLF) { + string err; + EXPECT_TRUE(Parse(" foo: x\r\n" + " foo: y\r\n" + " foo: z\r\n", &err)); + ASSERT_EQ("foo", parser_.out_.AsString()); + ASSERT_EQ(3u, parser_.ins_.size()); + EXPECT_EQ("x", parser_.ins_[0].AsString()); + EXPECT_EQ("y", parser_.ins_[1].AsString()); + EXPECT_EQ("z", parser_.ins_[2].AsString()); +} + +TEST_F(DepfileParserTest, TolerateMP) { + string err; + EXPECT_TRUE(Parse("foo: x y z\n" + "x:\n" + "y:\n" + "z:\n", &err)); + ASSERT_EQ("foo", parser_.out_.AsString()); + ASSERT_EQ(3u, parser_.ins_.size()); + EXPECT_EQ("x", parser_.ins_[0].AsString()); + EXPECT_EQ("y", parser_.ins_[1].AsString()); + EXPECT_EQ("z", parser_.ins_[2].AsString()); +} + +TEST_F(DepfileParserTest, MultipleRulesTolerateMP) { + string err; + EXPECT_TRUE(Parse("foo: x\n" + "x:\n" + "foo: y\n" + "y:\n" + "foo: z\n" + "z:\n", &err)); + ASSERT_EQ("foo", parser_.out_.AsString()); + ASSERT_EQ(3u, parser_.ins_.size()); + EXPECT_EQ("x", parser_.ins_[0].AsString()); + EXPECT_EQ("y", parser_.ins_[1].AsString()); + EXPECT_EQ("z", parser_.ins_[2].AsString()); +} + +TEST_F(DepfileParserTest, MultipleRulesRejectDifferentOutputs) { + // check that multiple different outputs are rejected by the parser + // when spread across multiple rules + string err; + EXPECT_FALSE(Parse("foo: x y\n" + "bar: y z\n", &err)); + ASSERT_EQ("depfile has multiple output paths", err); +} -- cgit v0.12