From 67253133f8ca99034a1eabe326a51c74f5e95c87 Mon Sep 17 00:00:00 2001 From: Ben Boeckel Date: Sat, 8 Feb 2014 11:35:52 -0500 Subject: ExpandListArguments: Optimize the parser Optimize cmSystemTools::ExpandListArguments so as not to build a string character-by-character. This avoids excess reallocations of the result string. --- Source/cmSystemTools.cxx | 51 +++++++++++++++++------------------------------- 1 file changed, 18 insertions(+), 33 deletions(-) diff --git a/Source/cmSystemTools.cxx b/Source/cmSystemTools.cxx index ff05975..7cc63bb 100644 --- a/Source/cmSystemTools.cxx +++ b/Source/cmSystemTools.cxx @@ -1044,7 +1044,7 @@ void cmSystemTools::ExpandListArgument(const std::string& arg, bool emptyArgs) { // If argument is empty, it is an empty list. - if(arg.length() == 0 && !emptyArgs) + if(!emptyArgs && arg.empty()) { return; } @@ -1054,10 +1054,11 @@ void cmSystemTools::ExpandListArgument(const std::string& arg, newargs.push_back(arg); return; } - std::vector newArgVec; + std::string newArg; + const char *last = arg.c_str(); // Break the string at non-escaped semicolons not nested in []. int squareNesting = 0; - for(const char* c = arg.c_str(); *c; ++c) + for(const char* c = last; *c; ++c) { switch(*c) { @@ -1065,34 +1066,21 @@ void cmSystemTools::ExpandListArgument(const std::string& arg, { // We only want to allow escaping of semicolons. Other // escapes should not be processed here. - ++c; - if(*c == ';') - { - newArgVec.push_back(*c); - } - else + const char* next = c + 1; + if(*next == ';') { - newArgVec.push_back('\\'); - if(*c) - { - newArgVec.push_back(*c); - } - else - { - // Terminate the loop properly. - --c; - } + newArg.append(last, c - last); + // Skip over the escape character + last = c = next; } } break; case '[': { ++squareNesting; - newArgVec.push_back(*c); } break; case ']': { --squareNesting; - newArgVec.push_back(*c); } break; case ';': { @@ -1100,31 +1088,28 @@ void cmSystemTools::ExpandListArgument(const std::string& arg, // brackets. if(squareNesting == 0) { - if ( newArgVec.size() || emptyArgs ) + newArg.append(last, c - last); + // Skip over the semicolon + last = c + 1; + if ( !newArg.empty() || emptyArgs ) { // Add the last argument if the string is not empty. - newArgVec.push_back(0); - newargs.push_back(&*newArgVec.begin()); - newArgVec.clear(); + newargs.push_back(newArg); + newArg = ""; } } - else - { - newArgVec.push_back(*c); - } } break; default: { // Just append this character. - newArgVec.push_back(*c); } break; } } - if ( newArgVec.size() || emptyArgs ) + newArg.append(last); + if ( !newArg.empty() || emptyArgs ) { // Add the last argument if the string is not empty. - newArgVec.push_back(0); - newargs.push_back(&*newArgVec.begin()); + newargs.push_back(newArg); } } -- cgit v0.12 From 68eb1757445dd1bb6537e32be8c9a72360112978 Mon Sep 17 00:00:00 2001 From: Ben Boeckel Date: Sat, 8 Feb 2014 12:01:30 -0500 Subject: cmGeneratorExpressionLexer: Use a switch statement to parse Optimize cmGeneratorExpressionLexer::Tokenize to use a switch statement. The many dereferences of the input pointer were expensive. Also remove excess pointer arithmetic. --- Source/cmGeneratorExpressionLexer.cxx | 70 +++++++++++++++++------------------ 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/Source/cmGeneratorExpressionLexer.cxx b/Source/cmGeneratorExpressionLexer.cxx index cd71ec0..117a24e 100644 --- a/Source/cmGeneratorExpressionLexer.cxx +++ b/Source/cmGeneratorExpressionLexer.cxx @@ -42,42 +42,42 @@ cmGeneratorExpressionLexer::Tokenize(const char *input) const char *upto = c; for ( ; *c; ++c) - { - if(c[0] == '$' && c[1] == '<') { - InsertText(upto, c, result); - upto = c; - result.push_back(cmGeneratorExpressionToken( - cmGeneratorExpressionToken::BeginExpression, upto, 2)); - upto = c + 2; - ++c; - SawBeginExpression = true; - } - else if(c[0] == '>') - { - InsertText(upto, c, result); - upto = c; - result.push_back(cmGeneratorExpressionToken( - cmGeneratorExpressionToken::EndExpression, upto, 1)); - upto = c + 1; - SawGeneratorExpression = SawBeginExpression; - } - else if(c[0] == ':') - { - InsertText(upto, c, result); - upto = c; - result.push_back(cmGeneratorExpressionToken( - cmGeneratorExpressionToken::ColonSeparator, upto, 1)); - upto = c + 1; - } - else if(c[0] == ',') - { - InsertText(upto, c, result); - upto = c; - result.push_back(cmGeneratorExpressionToken( - cmGeneratorExpressionToken::CommaSeparator, upto, 1)); - upto = c + 1; - } + switch(*c) + { + case '$': + if(c[1] == '<') + { + InsertText(upto, c, result); + result.push_back(cmGeneratorExpressionToken( + cmGeneratorExpressionToken::BeginExpression, c, 2)); + upto = c + 2; + ++c; + SawBeginExpression = true; + } + break; + case '>': + InsertText(upto, c, result); + result.push_back(cmGeneratorExpressionToken( + cmGeneratorExpressionToken::EndExpression, c, 1)); + upto = c + 1; + SawGeneratorExpression = SawBeginExpression; + break; + case ':': + InsertText(upto, c, result); + result.push_back(cmGeneratorExpressionToken( + cmGeneratorExpressionToken::ColonSeparator, c, 1)); + upto = c + 1; + break; + case ',': + InsertText(upto, c, result); + result.push_back(cmGeneratorExpressionToken( + cmGeneratorExpressionToken::CommaSeparator, c, 1)); + upto = c + 1; + break; + default: + break; + } } InsertText(upto, c, result); -- cgit v0.12 From 7c565d2fd5e86d420ea83eb724ad5380ca5c2e97 Mon Sep 17 00:00:00 2001 From: Ben Boeckel Date: Sun, 9 Feb 2014 05:09:52 -0500 Subject: cmGeneratorExpression: Improve parsing in StripEmptyListElements The char-by-char parsing causes lots of reallocations which shouldn't be necessary. To improve this, fast-path strings without a semicolon, reserve space in the result, and insert into the result in chunks. --- Source/cmGeneratorExpression.cxx | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/Source/cmGeneratorExpression.cxx b/Source/cmGeneratorExpression.cxx index 2e66d78..cd30546 100644 --- a/Source/cmGeneratorExpression.cxx +++ b/Source/cmGeneratorExpression.cxx @@ -157,17 +157,24 @@ cmCompiledGeneratorExpression::~cmCompiledGeneratorExpression() std::string cmGeneratorExpression::StripEmptyListElements( const std::string &input) { + if (input.find(';') == input.npos) + { + return input; + } std::string result; + result.reserve(input.size()); const char *c = input.c_str(); + const char *last = c; bool skipSemiColons = true; for ( ; *c; ++c) { - if(c[0] == ';') + if(*c == ';') { if(skipSemiColons) { - continue; + result.append(last, c - last); + last = c + 1; } skipSemiColons = true; } @@ -175,8 +182,8 @@ std::string cmGeneratorExpression::StripEmptyListElements( { skipSemiColons = false; } - result += *c; } + result.append(last); if (!result.empty() && *(result.end() - 1) == ';') { -- cgit v0.12 From 048be20536b6960e3a4a5001ba3d642e2e5cdad1 Mon Sep 17 00:00:00 2001 From: Ben Boeckel Date: Tue, 11 Feb 2014 13:54:07 -0500 Subject: Help: Add release notes for the 'faster-parsers' topic --- Help/release/dev/faster-parsers.rst | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 Help/release/dev/faster-parsers.rst diff --git a/Help/release/dev/faster-parsers.rst b/Help/release/dev/faster-parsers.rst new file mode 100644 index 0000000..c2a8bfb --- /dev/null +++ b/Help/release/dev/faster-parsers.rst @@ -0,0 +1,6 @@ +faster-parsers +-------------- + +* The :manual:`cmake-language(7)` internal implementation of generator + expression and list expansion parsers have been optimized and shows + non-trivial speedup on large projects. -- cgit v0.12