summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEvan Martin <martine@danga.com>2012-04-26 04:52:07 (GMT)
committerEvan Martin <martine@danga.com>2012-04-26 04:52:58 (GMT)
commit01c7b2dea3a7cc2db5416e6f5aec67f14a735110 (patch)
treef305f5ed0c93fb930943b3779b02734a399a9baf
parent27fb0d1e8cff1effb5c4e264ddf62db0d592158c (diff)
downloadNinja-01c7b2dea3a7cc2db5416e6f5aec67f14a735110.zip
Ninja-01c7b2dea3a7cc2db5416e6f5aec67f14a735110.tar.gz
Ninja-01c7b2dea3a7cc2db5416e6f5aec67f14a735110.tar.bz2
allow UTF-8 in rule descriptions
The lexer already mostly allowed this, except that chars >127 were being interpreted as negative indexes into the lexer table.
-rw-r--r--src/lexer.cc8
-rw-r--r--src/lexer.in.cc2
-rw-r--r--src/parsers_test.cc7
3 files changed, 12 insertions, 5 deletions
diff --git a/src/lexer.cc b/src/lexer.cc
index 9e4392c..b3efe22 100644
--- a/src/lexer.cc
+++ b/src/lexer.cc
@@ -123,7 +123,7 @@ Lexer::Token Lexer::ReadToken() {
start = p;
{
- char yych;
+ unsigned char yych;
unsigned int yyaccept = 0;
static const unsigned char yybm[] = {
0, 64, 64, 64, 64, 64, 64, 64,
@@ -404,7 +404,7 @@ void Lexer::EatWhitespace() {
ofs_ = p;
{
- char yych;
+ unsigned char yych;
static const unsigned char yybm[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -485,7 +485,7 @@ bool Lexer::ReadIdent(string* out) {
const char* start = p;
{
- char yych;
+ unsigned char yych;
static const unsigned char yybm[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -573,7 +573,7 @@ bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) {
start = p;
{
- char yych;
+ unsigned char yych;
static const unsigned char yybm[] = {
0, 128, 128, 128, 128, 128, 128, 128,
128, 128, 0, 128, 128, 128, 128, 128,
diff --git a/src/lexer.in.cc b/src/lexer.in.cc
index 28a5bdf..e478921 100644
--- a/src/lexer.in.cc
+++ b/src/lexer.in.cc
@@ -121,7 +121,7 @@ Lexer::Token Lexer::ReadToken() {
for (;;) {
start = p;
/*!re2c
- re2c:define:YYCTYPE = "char";
+ re2c:define:YYCTYPE = "unsigned char";
re2c:define:YYCURSOR = p;
re2c:define:YYMARKER = q;
re2c:yyfill:enable = 0;
diff --git a/src/parsers_test.cc b/src/parsers_test.cc
index a8bf179..c5151b8 100644
--- a/src/parsers_test.cc
+++ b/src/parsers_test.cc
@@ -658,3 +658,10 @@ TEST_F(ParserTest, DefaultStatements) {
EXPECT_EQ("b", nodes[1]->path());
EXPECT_EQ("c", nodes[2]->path());
}
+
+TEST_F(ParserTest, UTF8) {
+ ASSERT_NO_FATAL_FAILURE(AssertParse(
+"rule utf8\n"
+" command = true\n"
+" description = compilaci\xC3\xB3\n"));
+}