summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/depfile_parser.cc193
-rw-r--r--src/depfile_parser.h5
-rw-r--r--src/depfile_parser.in.cc96
-rw-r--r--src/depfile_parser_test.cc23
4 files changed, 198 insertions, 119 deletions
diff --git a/src/depfile_parser.cc b/src/depfile_parser.cc
index b547661..832ad65 100644
--- a/src/depfile_parser.cc
+++ b/src/depfile_parser.cc
@@ -24,34 +24,45 @@
// How do you end a line with a backslash? The netbsd Make docs suggest
// reading the result of a shell command echoing a backslash!
//
-// Rather than implement the above, we do the simpler thing here.
+// Rather than implement all of above, we do a simpler thing here:
+// Backslashes escape a set of characters (see "escapes" defined below),
+// otherwise they are passed through verbatim.
// If anyone actually has depfiles that rely on the more complicated
// behavior we can adjust this.
bool DepfileParser::Parse(string* content, string* err) {
- char* p = &(*content)[0];
- char* end = p + content->size();
- for (;;) {
- const char* start = p;
- char yych;
-
+ // in: current parser input point.
+ // end: end of input.
+ char* in = &(*content)[0];
+ char* end = in + content->size();
+ while (in < end) {
+ // out: current output point (typically same as in, but can fall behind
+ // as we de-escape backslashes).
+ char* out = in;
+ // filename: start of the current parsed filename.
+ char* filename = out;
+ for (;;) {
+ // start: beginning of the current parsed span.
+ const char* start = in;
+ char yych;
+
{
static const unsigned char yybm[] = {
0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 128, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
- 128, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 64, 64, 64, 64, 64,
- 64, 64, 64, 64, 64, 64, 64, 64,
- 64, 64, 64, 0, 0, 0, 0, 0,
- 0, 64, 64, 64, 64, 64, 64, 64,
- 64, 64, 64, 64, 64, 64, 64, 64,
- 64, 64, 64, 64, 64, 64, 64, 64,
- 64, 64, 64, 0, 64, 0, 0, 64,
- 0, 64, 64, 64, 64, 64, 64, 64,
- 64, 64, 64, 64, 64, 64, 64, 64,
- 64, 64, 64, 64, 64, 64, 64, 64,
- 64, 64, 64, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 0, 0, 0, 0, 0,
+ 0, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 0, 0, 0, 0, 128,
+ 0, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -70,86 +81,108 @@ bool DepfileParser::Parse(string* content, string* err) {
0, 0, 0, 0, 0, 0, 0, 0,
};
- if ((end - p) < 2) break;
- yych = *p;
- if (yych <= '@') {
+ yych = *in;
+ if (yych <= '[') {
+ if (yych <= ':') {
+ if (yych <= '*') goto yy6;
+ goto yy4;
+ } else {
+ if (yych <= '@') goto yy6;
+ if (yych <= 'Z') goto yy4;
+ goto yy6;
+ }
+ } else {
+ if (yych <= '_') {
+ if (yych <= '\\') goto yy2;
+ if (yych <= '^') goto yy6;
+ goto yy4;
+ } else {
+ if (yych <= '`') goto yy6;
+ if (yych <= 'z') goto yy4;
+ goto yy6;
+ }
+ }
+yy2:
+ ++in;
+ if ((yych = *in) <= '$') {
if (yych <= 0x1F) {
- if (yych == '\n') goto yy4;
- goto yy7;
+ if (yych != '\n') goto yy9;
} else {
- if (yych <= ' ') goto yy4;
- if (yych <= '*') goto yy7;
- if (yych <= ':') goto yy6;
- goto yy7;
+ if (yych <= ' ') goto yy11;
+ if (yych <= '"') goto yy9;
+ goto yy11;
}
} else {
- if (yych <= '^') {
- if (yych <= 'Z') goto yy6;
- if (yych != '\\') goto yy7;
+ if (yych <= 'Z') {
+ if (yych == '*') goto yy11;
+ goto yy9;
} else {
- if (yych == '`') goto yy7;
- if (yych <= 'z') goto yy6;
- goto yy7;
+ if (yych <= '\\') goto yy11;
+ if (yych == '|') goto yy11;
+ goto yy9;
}
}
- ++p;
- if ((yych = *p) == '\n') goto yy13;
- goto yy10;
yy3:
{
- // Got a filename.
- int len = p - start;
- if (start[len - 1] == ':')
- len--; // Strip off trailing colon, if any.
-
- if (len == 0)
- continue; // Drop isolated colons.
-
- if (!out_.str_) {
- out_ = StringPiece(start, len);
- } else {
- ins_.push_back(StringPiece(start, len));
+ // For any other character (e.g. whitespace), swallow it here,
+ // allowing the outer logic to loop around again.
+ break;
}
- continue;
- }
yy4:
- ++p;
- yych = *p;
- goto yy12;
+ ++in;
+ yych = *in;
+ goto yy8;
yy5:
- { continue; }
-yy6:
- yych = *++p;
- goto yy10;
-yy7:
- ++p;
{
- *err = "BUG: depfile lexer encountered unknown state";
- return false;
- }
-yy9:
- ++p;
- if (end <= p) break;
- yych = *p;
-yy10:
- if (yybm[0+yych] & 64) {
- goto yy9;
+ // Got a span of plain text. Copy it to out if necessary.
+ int len = in - start;
+ if (out < start)
+ memmove(out, start, len);
+ out += len;
+ continue;
}
+yy6:
+ yych = *++in;
goto yy3;
-yy11:
- ++p;
- if (end <= p) break;
- yych = *p;
-yy12:
+yy7:
+ ++in;
+ yych = *in;
+yy8:
if (yybm[0+yych] & 128) {
- goto yy11;
+ goto yy7;
}
goto yy5;
-yy13:
- ++p;
- { continue; }
+yy9:
+ ++in;
+ {
+ // Let backslash before other characters through verbatim.
+ *out++ = '\\';
+ *out++ = yych;
+ continue;
+ }
+yy11:
+ ++in;
+ {
+ // De-escape backslashed character.
+ *out++ = yych;
+ continue;
+ }
+ }
+
}
+ int len = out - filename;
+ if (len > 0 && filename[len - 1] == ':')
+ len--; // Strip off trailing colon, if any.
+
+ if (len == 0)
+ continue;
+
+ if (!out_.str_) {
+ out_ = StringPiece(filename, len);
+ } else {
+ ins_.push_back(StringPiece(filename, len));
+ }
}
return true;
}
diff --git a/src/depfile_parser.h b/src/depfile_parser.h
index 08bf68a..c900956 100644
--- a/src/depfile_parser.h
+++ b/src/depfile_parser.h
@@ -20,8 +20,9 @@ using namespace std;
/// Parser for the dependency information emitted by gcc's -M flags.
struct DepfileParser {
- /// Parse an input file. Warning: may mutate the content in-place
- /// and parsed StringPieces are pointers within it.
+ /// Parse an input file. Input must be NUL-terminated.
+ /// Warning: may mutate the content in-place and parsed StringPieces are
+ /// pointers within it.
bool Parse(string* content, string* err);
StringPiece out_;
diff --git a/src/depfile_parser.in.cc b/src/depfile_parser.in.cc
index 7ac95c6..c469a2c 100644
--- a/src/depfile_parser.in.cc
+++ b/src/depfile_parser.in.cc
@@ -23,51 +23,79 @@
// How do you end a line with a backslash? The netbsd Make docs suggest
// reading the result of a shell command echoing a backslash!
//
-// Rather than implement the above, we do the simpler thing here.
+// Rather than implement all of above, we do a simpler thing here:
+// Backslashes escape a set of characters (see "escapes" defined below),
+// otherwise they are passed through verbatim.
// If anyone actually has depfiles that rely on the more complicated
// behavior we can adjust this.
bool DepfileParser::Parse(string* content, string* err) {
- char* p = &(*content)[0];
- char* end = p + content->size();
- for (;;) {
- const char* start = p;
- char yych;
- /*!re2c
- re2c:define:YYCTYPE = "char";
- re2c:define:YYCURSOR = p;
- re2c:define:YYLIMIT = end;
+ // in: current parser input point.
+ // end: end of input.
+ char* in = &(*content)[0];
+ char* end = in + content->size();
+ while (in < end) {
+ // out: current output point (typically same as in, but can fall behind
+ // as we de-escape backslashes).
+ char* out = in;
+ // filename: start of the current parsed filename.
+ char* filename = out;
+ for (;;) {
+ // start: beginning of the current parsed span.
+ const char* start = in;
+ char yych;
+ /*!re2c
+ re2c:define:YYCTYPE = "char";
+ re2c:define:YYCURSOR = in;
+ re2c:define:YYLIMIT = end;
- re2c:yyfill:parameter = 0;
- re2c:define:YYFILL = break;
+ re2c:yyfill:enable = 0;
- re2c:indent:top = 2;
- re2c:indent:string = " ";
+ re2c:indent:top = 2;
+ re2c:indent:string = " ";
- re2c:yych:emit = 0;
+ re2c:yych:emit = 0;
- '\\\n' { continue; }
- [ \n]+ { continue; }
- [a-zA-Z0-9+,/\\_:.-]+ {
- // Got a filename.
- int len = p - start;
- if (start[len - 1] == ':')
- len--; // Strip off trailing colon, if any.
+ escape = [ \\#*$[|];
- if (len == 0)
- continue; // Drop isolated colons.
-
- if (!out_.str_) {
- out_ = StringPiece(start, len);
- } else {
- ins_.push_back(StringPiece(start, len));
+ '\\' escape {
+ // De-escape backslashed character.
+ *out++ = yych;
+ continue;
}
- continue;
+ '\\'. {
+ // Let backslash before other characters through verbatim.
+ *out++ = '\\';
+ *out++ = yych;
+ continue;
+ }
+ [a-zA-Z0-9+,/_:.-]+ {
+ // Got a span of plain text. Copy it to out if necessary.
+ int len = in - start;
+ if (out < start)
+ memmove(out, start, len);
+ out += len;
+ continue;
+ }
+ [^] {
+ // For any other character (e.g. whitespace), swallow it here,
+ // allowing the outer logic to loop around again.
+ break;
+ }
+ */
}
- [^] {
- *err = "BUG: depfile lexer encountered unknown state";
- return false;
+
+ int len = out - filename;
+ if (len > 0 && filename[len - 1] == ':')
+ len--; // Strip off trailing colon, if any.
+
+ if (len == 0)
+ continue;
+
+ if (!out_.str_) {
+ out_ = StringPiece(filename, len);
+ } else {
+ ins_.push_back(StringPiece(filename, len));
}
- */
}
return true;
}
diff --git a/src/depfile_parser_test.cc b/src/depfile_parser_test.cc
index 3549f97..43e677c 100644
--- a/src/depfile_parser_test.cc
+++ b/src/depfile_parser_test.cc
@@ -73,15 +73,32 @@ TEST_F(DepfileParserTest, BackSlashes) {
EXPECT_EQ(4u, parser_.ins_.size());
}
-TEST_F(DepfileParserTest, DISABLED_Spaces) {
+TEST_F(DepfileParserTest, Spaces) {
string err;
EXPECT_TRUE(Parse(
-"foo\\ bar: a\\ b a b",
+"a\\ bc\\ def: a\\ b c d",
&err));
ASSERT_EQ("", err);
- EXPECT_EQ("foo bar",
+ EXPECT_EQ("a bc def",
parser_.out_.AsString());
ASSERT_EQ(3u, parser_.ins_.size());
EXPECT_EQ("a b",
parser_.ins_[0].AsString());
+ EXPECT_EQ("c",
+ parser_.ins_[1].AsString());
+ EXPECT_EQ("d",
+ parser_.ins_[2].AsString());
+}
+
+TEST_F(DepfileParserTest, Escapes) {
+ // Put backslashes before a variety of characters, see which ones make
+ // it through.
+ string err;
+ EXPECT_TRUE(Parse(
+"\\!\\@\\#\\$\\%\\^\\&\\\\",
+ &err));
+ ASSERT_EQ("", err);
+ EXPECT_EQ("\\!\\@#$\\%\\^\\&\\",
+ parser_.out_.AsString());
+ ASSERT_EQ(0u, parser_.ins_.size());
}