diff options
author | Brad King <brad.king@kitware.com> | 2010-04-14 15:43:02 (GMT) |
---|---|---|
committer | Brad King <brad.king@kitware.com> | 2010-04-14 15:44:43 (GMT) |
commit | 33ddb23e67bc2e202468dde59c4fdbb4e04e8873 (patch) | |
tree | 63af78054409627c1b0b0b181c2e4e82d713d490 | |
parent | 13e6b430ff1f12ad6f7a96498b6f9f5763dd9529 (diff) | |
download | CMake-33ddb23e67bc2e202468dde59c4fdbb4e04e8873.zip CMake-33ddb23e67bc2e202468dde59c4fdbb4e04e8873.tar.gz CMake-33ddb23e67bc2e202468dde59c4fdbb4e04e8873.tar.bz2 |
Handle non-ASCII terminators in file(STRINGS)
Commit "Support more special characters in file(STRINGS)" (2009-10-06)
attempted to support parsing strings from binaries produced by the
Portland Group Fortran compiler. The compiler seems to put an extra
byte just at the end of its string literals. Previously we dealt with
this by explicitly enumerating bytes known to occur, but it seems that
many such possibilities exist. Now we support extraction of strings
that end in any non-ASCII character.
-rw-r--r-- | Source/cmFileCommand.cxx | 50 |
1 files changed, 19 insertions, 31 deletions
diff --git a/Source/cmFileCommand.cxx b/Source/cmFileCommand.cxx index c2e90b6..5611527 100644 --- a/Source/cmFileCommand.cxx +++ b/Source/cmFileCommand.cxx @@ -529,13 +529,6 @@ bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args) return false; } - // At least one compiler (Portland Group Fortran) produces binaries - // with some extra characters in strings. - char extra[256]; // = {}; // some compilers do not like this - memset(extra, 0, sizeof(extra)); - extra[0x0c] = 1; // FF (form feed) - extra[0x14] = 1; // DC4 (device control 4) - // Parse strings out of the file. int output_size = 0; std::vector<std::string> strings; @@ -545,28 +538,7 @@ bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args) (limit_input < 0 || static_cast<int>(fin.tellg()) < limit_input) && (c = fin.get(), fin)) { - if(c == '\0') - { - // A terminating null character has been found. Check if the - // current string matches the requirements. Since it was - // terminated by a null character, we require that the length be - // at least one no matter what the user specified. - if(s.length() >= minlen && s.length() >= 1 && - (!have_regex || regex.find(s.c_str()))) - { - output_size += static_cast<int>(s.size()) + 1; - if(limit_output >= 0 && output_size >= limit_output) - { - s = ""; - break; - } - strings.push_back(s); - } - - // Reset the string to empty. - s = ""; - } - else if(c == '\n' && !newline_consume) + if(c == '\n' && !newline_consume) { // The current line has been terminated. Check if the current // string matches the requirements. The length may now be as @@ -590,7 +562,7 @@ bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args) { // Ignore CR character to make output always have UNIX newlines. } - else if((c >= 0x20 && c < 0x7F) || c == '\t' || extra[c] || + else if((c >= 0x20 && c < 0x7F) || c == '\t' || (c == '\n' && newline_consume)) { // This is an ASCII character that may be part of a string. @@ -600,7 +572,23 @@ bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args) } else { - // This is a non-string character. Reset the string to emtpy. + // TODO: Support ENCODING option. See issue #10519. + // A non-string character has been found. Check if the current + // string matches the requirements. We require that the length + // be at least one no matter what the user specified. + if(s.length() >= minlen && s.length() >= 1 && + (!have_regex || regex.find(s.c_str()))) + { + output_size += static_cast<int>(s.size()) + 1; + if(limit_output >= 0 && output_size >= limit_output) + { + s = ""; + break; + } + strings.push_back(s); + } + + // Reset the string to empty. s = ""; } |