summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Source/cmFileCommand.cxx304
-rw-r--r--Source/cmFileCommand.h25
-rw-r--r--Tests/StringFileTest/CMakeLists.txt12
3 files changed, 341 insertions, 0 deletions
diff --git a/Source/cmFileCommand.cxx b/Source/cmFileCommand.cxx
index d988dd3..1d0187c 100644
--- a/Source/cmFileCommand.cxx
+++ b/Source/cmFileCommand.cxx
@@ -70,6 +70,10 @@ bool cmFileCommand::InitialPass(std::vector<std::string> const& args)
{
return this->HandleReadCommand(args);
}
+ else if ( subCommand == "STRINGS" )
+ {
+ return this->HandleStringsCommand(args);
+ }
else if ( subCommand == "GLOB" )
{
return this->HandleGlobCommand(args, false);
@@ -252,6 +256,306 @@ bool cmFileCommand::HandleReadCommand(std::vector<std::string> const& args)
}
//----------------------------------------------------------------------------
+bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args)
+{
+ if(args.size() < 3)
+ {
+ this->SetError("STRINGS requires a file name and output variable");
+ return false;
+ }
+
+ // Get the file to read.
+ std::string fileName = args[1];
+ if(!cmsys::SystemTools::FileIsFullPath(fileName.c_str()))
+ {
+ fileName = this->Makefile->GetCurrentDirectory();
+ fileName += "/" + args[1];
+ }
+
+ // Get the variable in which to store the results.
+ std::string outVar = args[2];
+
+ // Parse the options.
+ enum { arg_none,
+ arg_limit_input,
+ arg_limit_output,
+ arg_limit_count,
+ arg_length_minimum,
+ arg_length_maximum,
+ arg__maximum,
+ arg_regex };
+ unsigned int minlen = 0;
+ unsigned int maxlen = 0;
+ int limit_input = -1;
+ int limit_output = -1;
+ unsigned int limit_count = 0;
+ cmsys::RegularExpression regex;
+ bool have_regex = false;
+ bool newline_consume = false;
+ int arg_mode = arg_none;
+ for(unsigned int i=3; i < args.size(); ++i)
+ {
+ if(args[i] == "LIMIT_INPUT")
+ {
+ arg_mode = arg_limit_input;
+ }
+ else if(args[i] == "LIMIT_OUTPUT")
+ {
+ arg_mode = arg_limit_output;
+ }
+ else if(args[i] == "LIMIT_COUNT")
+ {
+ arg_mode = arg_limit_count;
+ }
+ else if(args[i] == "LENGTH_MINIMUM")
+ {
+ arg_mode = arg_length_minimum;
+ }
+ else if(args[i] == "LENGTH_MAXIMUM")
+ {
+ arg_mode = arg_length_maximum;
+ }
+ else if(args[i] == "REGEX")
+ {
+ arg_mode = arg_regex;
+ }
+ else if(args[i] == "NEWLINE_CONSUME")
+ {
+ newline_consume = true;
+ arg_mode = arg_none;
+ }
+ else if(arg_mode == arg_limit_input)
+ {
+ if(sscanf(args[i].c_str(), "%d", &limit_input) != 1 ||
+ limit_input < 0)
+ {
+ cmOStringStream e;
+ e << "STRINGS option LIMIT_INPUT value \""
+ << args[i] << "\" is not an unsigned integer.";
+ this->SetError(e.str().c_str());
+ return false;
+ }
+ arg_mode = arg_none;
+ }
+ else if(arg_mode == arg_limit_output)
+ {
+ if(sscanf(args[i].c_str(), "%d", &limit_output) != 1 ||
+ limit_output < 0)
+ {
+ cmOStringStream e;
+ e << "STRINGS option LIMIT_OUTPUT value \""
+ << args[i] << "\" is not an unsigned integer.";
+ this->SetError(e.str().c_str());
+ return false;
+ }
+ arg_mode = arg_none;
+ }
+ else if(arg_mode == arg_limit_count)
+ {
+ int count;
+ if(sscanf(args[i].c_str(), "%d", &count) != 1 || count < 0)
+ {
+ cmOStringStream e;
+ e << "STRINGS option LIMIT_COUNT value \""
+ << args[i] << "\" is not an unsigned integer.";
+ this->SetError(e.str().c_str());
+ return false;
+ }
+ limit_count = count;
+ arg_mode = arg_none;
+ }
+ else if(arg_mode == arg_length_minimum)
+ {
+ int len;
+ if(sscanf(args[i].c_str(), "%d", &len) != 1 || len < 0)
+ {
+ cmOStringStream e;
+ e << "STRINGS option LENGTH_MINIMUM value \""
+ << args[i] << "\" is not an unsigned integer.";
+ this->SetError(e.str().c_str());
+ return false;
+ }
+ minlen = len;
+ arg_mode = arg_none;
+ }
+ else if(arg_mode == arg_length_maximum)
+ {
+ int len;
+ if(sscanf(args[i].c_str(), "%d", &len) != 1 || len < 0)
+ {
+ cmOStringStream e;
+ e << "STRINGS option LENGTH_MAXIMUM value \""
+ << args[i] << "\" is not an unsigned integer.";
+ this->SetError(e.str().c_str());
+ return false;
+ }
+ maxlen = len;
+ arg_mode = arg_none;
+ }
+ else if(arg_mode == arg_regex)
+ {
+ if(!regex.compile(args[i].c_str()))
+ {
+ cmOStringStream e;
+ e << "STRINGS option REGEX value \""
+ << args[i] << "\" could not be compiled.";
+ this->SetError(e.str().c_str());
+ return false;
+ }
+ have_regex = true;
+ arg_mode = arg_none;
+ }
+ else
+ {
+ cmOStringStream e;
+ e << "STRINGS given unknown argument \""
+ << args[i] << "\"";
+ this->SetError(e.str().c_str());
+ return false;
+ }
+ }
+
+ // Open the specified file.
+#if defined(_WIN32) || defined(__CYGWIN__)
+ std::ifstream fin(fileName.c_str(), std::ios::in | std::ios::binary);
+#else
+ std::ifstream fin(fileName.c_str(), std::ios::in);
+#endif
+ if(!fin)
+ {
+ cmOStringStream e;
+ e << "STRINGS file \"" << fileName << "\" cannot be read.";
+ this->SetError(e.str().c_str());
+ return false;
+ }
+
+ // Parse strings out of the file.
+ int output_size = 0;
+ std::vector<std::string> strings;
+ std::string s;
+ int c;
+ while((!limit_count || strings.size() < limit_count) &&
+ (limit_input < 0 || static_cast<int>(fin.tellg()) < limit_input) &&
+ (c = fin.get(), fin))
+ {
+ if(c == '\0')
+ {
+ // A terminating null character has been found. Check if the
+ // current string matches the requirements. Since it was
+ // terminated by a null character, we require that the length be
+ // at least one no matter what the user specified.
+ if(s.length() >= minlen && s.length() >= 1 &&
+ (!have_regex || regex.find(s.c_str())))
+ {
+ output_size += s.size() + 1;
+ if(limit_output >= 0 && output_size >= limit_output)
+ {
+ s = "";
+ break;
+ }
+ strings.push_back(s);
+ }
+
+ // Reset the string to empty.
+ s = "";
+ }
+ else if(c == '\n' && !newline_consume)
+ {
+ // The current line has been terminated. Check if the current
+ // string matches the requirements. The length may now be as
+ // low as zero since blank lines are allowed.
+ if(s.length() >= minlen &&
+ (!have_regex || regex.find(s.c_str())))
+ {
+ output_size += s.size() + 1;
+ if(limit_output >= 0 && output_size >= limit_output)
+ {
+ s = "";
+ break;
+ }
+ strings.push_back(s);
+ }
+
+ // Reset the string to empty.
+ s = "";
+ }
+ else if(c == '\r')
+ {
+ // Ignore CR character to make output always have UNIX newlines.
+ }
+ else if(c >= 0x20 && c < 0x7F || c == '\t' ||
+ (c == '\n' && newline_consume))
+ {
+ // This is an ASCII character that may be part of a string.
+ s += c;
+ }
+ else
+ {
+ // This is a non-string character. Reset the string to emtpy.
+ s = "";
+ }
+
+ // Terminate a string if the maximum length is reached.
+ if(maxlen > 0 && s.size() == maxlen)
+ {
+ if(s.length() >= minlen &&
+ (!have_regex || regex.find(s.c_str())))
+ {
+ output_size += s.size() + 1;
+ if(limit_output >= 0 && output_size >= limit_output)
+ {
+ s = "";
+ break;
+ }
+ strings.push_back(s);
+ }
+ s = "";
+ }
+ }
+
+ // If there is a non-empty current string we have hit the end of the
+ // input file or the input size limit. Check if the current string
+ // matches the requirements.
+ if((!limit_count || strings.size() < limit_count) &&
+ !s.empty() && s.length() >= minlen &&
+ (!have_regex || regex.find(s.c_str())))
+ {
+ output_size += s.size() + 1;
+ if(limit_output < 0 || output_size < limit_output)
+ {
+ strings.push_back(s);
+ }
+ }
+
+ // Encode the result in a CMake list.
+ const char* sep = "";
+ std::string output;
+ for(std::vector<std::string>::const_iterator si = strings.begin();
+ si != strings.end(); ++si)
+ {
+ // Separate the strings in the output to make it a list.
+ output += sep;
+ sep = ";";
+
+ // Store the string in the output, but escape semicolons to
+ // make sure it is a list.
+ std::string const& sr = *si;
+ for(unsigned int i=0; i < sr.size(); ++i)
+ {
+ if(sr[i] == ';')
+ {
+ output += '\\';
+ }
+ output += sr[i];
+ }
+ }
+
+ // Save the output in a makefile variable.
+ this->Makefile->AddDefinition(outVar.c_str(), output.c_str());
+ return true;
+}
+
+//----------------------------------------------------------------------------
bool cmFileCommand::HandleGlobCommand(std::vector<std::string> const& args,
bool recurse)
{
diff --git a/Source/cmFileCommand.h b/Source/cmFileCommand.h
index ed43207..d6327b1 100644
--- a/Source/cmFileCommand.h
+++ b/Source/cmFileCommand.h
@@ -67,6 +67,10 @@ public:
" FILE(WRITE filename \"message to write\"... )\n"
" FILE(APPEND filename \"message to write\"... )\n"
" FILE(READ filename variable [LIMIT numBytes])\n"
+ " FILE(STRINGS filename variable [LIMIT_COUNT num]\n"
+ " [LIMIT_INPUT numBytes] [LIMIT_OUTPUT numBytes]\n"
+ " [LENGTH_MINIMUM numBytes] [LENGTH_MAXIMUM numBytes]\n"
+ " [NEWLINE_CONSUME] [REGEX regex])\n"
" FILE(GLOB variable [RELATIVE path] [globbing expressions]...)\n"
" FILE(GLOB_RECURSE variable [RELATIVE path] \n"
" [globbing expressions]...)\n"
@@ -87,6 +91,26 @@ public:
"want to generate input files to CMake.\n"
"READ will read the content of a file and store it into the "
"variable.\n"
+ "STRINGS will parse a list of ASCII strings from a file and store it "
+ "in a variable. Binary data in the file are ignored. Carriage return "
+ "(CR) characters are ignored. "
+ "LIMIT_COUNT sets the maximum number of strings to return. "
+ "LIMIT_INPUT sets the maximum number of bytes to read from "
+ "the input file. "
+ "LIMIT_OUTPUT sets the maximum number of bytes to store in the "
+ "output variable. "
+ "LENGTH_MINIMUM sets the minimum length of a string to return. "
+ "Shorter strings are ignored. "
+ "LENGTH_MAXIMUM sets the maximum length of a string to return. Longer "
+ "strings are split into strings no longer than the maximum length. "
+ "NEWLINE_CONSUME allows newlines to be included in strings instead "
+ "of terminating them. "
+ "REGEX specifies a regular expression that a string must match to be "
+ "returned. "
+ "Typical usage \n"
+ " FILE(STRINGS myfile.txt myfile)\n"
+ "stores a list in the variable \"myfile\" in which each item is "
+ "a line from the input file.\n"
"GLOB will generate a list of all files that match the globbing "
"expressions and store it into the variable. Globbing expressions "
"are similar to regular expressions, but much simpler. If RELATIVE "
@@ -119,6 +143,7 @@ protected:
bool HandleRemove(std::vector<std::string> const& args, bool recurse);
bool HandleWriteCommand(std::vector<std::string> const& args, bool append);
bool HandleReadCommand(std::vector<std::string> const& args);
+ bool HandleStringsCommand(std::vector<std::string> const& args);
bool HandleGlobCommand(std::vector<std::string> const& args, bool recurse);
bool HandleMakeDirectoryCommand(std::vector<std::string> const& args);
bool HandleInstallCommand(std::vector<std::string> const& args);
diff --git a/Tests/StringFileTest/CMakeLists.txt b/Tests/StringFileTest/CMakeLists.txt
index a682076..618f92a 100644
--- a/Tests/StringFileTest/CMakeLists.txt
+++ b/Tests/StringFileTest/CMakeLists.txt
@@ -4,6 +4,18 @@ INCLUDE_DIRECTORIES(${StringFileTest_BINARY_DIR})
# Read file test
FILE(READ "${CMAKE_CURRENT_SOURCE_DIR}/InputFile.h.in" infile)
+# FILE(STRINGS) test
+FILE(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/InputFile.h.in" infile_strings
+ LIMIT_COUNT 1 LIMIT_INPUT 1024 LIMIT_OUTPUT 1024
+ LENGTH_MINIMUM 10 LENGTH_MAXIMUM 23 REGEX include NEWLINE_CONSUME)
+SET(infile_strings_goal "#include \"includefile\"\n")
+IF("${infile_strings}" STREQUAL "${infile_strings_goal}")
+ MESSAGE("FILE(STRINGS) correctly read [${infile_strings}]")
+ELSE("${infile_strings}" STREQUAL "${infile_strings_goal}")
+ MESSAGE(SEND_ERROR
+ "FILE(STRINGS) incorrectly read [${infile_strings}]")
+ENDIF("${infile_strings}" STREQUAL "${infile_strings_goal}")
+
# String test
STRING(REGEX MATCH "[cC][mM][aA][kK][eE]" rmvar "CMake is great")
STRING(REGEX MATCHALL "[cC][mM][aA][kK][eE]" rmallvar "CMake is better than cmake or CMake")