diff options
Diffstat (limited to 'Source/kwsys/RegularExpression.hxx.in')
-rw-r--r-- | Source/kwsys/RegularExpression.hxx.in | 548 |
1 files changed, 548 insertions, 0 deletions
diff --git a/Source/kwsys/RegularExpression.hxx.in b/Source/kwsys/RegularExpression.hxx.in new file mode 100644 index 0000000..92e4b36 --- /dev/null +++ b/Source/kwsys/RegularExpression.hxx.in @@ -0,0 +1,548 @@ +/* Distributed under the OSI-approved BSD 3-Clause License. See accompanying + file Copyright.txt or https://cmake.org/licensing#kwsys for details. */ +// Original Copyright notice: +// Copyright (C) 1991 Texas Instruments Incorporated. +// +// Permission is granted to any individual or institution to use, copy, modify, +// and distribute this software, provided that this complete copyright and +// permission notice is maintained, intact, in all copies and supporting +// documentation. +// +// Texas Instruments Incorporated provides this software "as is" without +// express or implied warranty. +// +// Created: MNF 06/13/89 Initial Design and Implementation +// Updated: LGO 08/09/89 Inherit from Generic +// Updated: MBN 09/07/89 Added conditional exception handling +// Updated: MBN 12/15/89 Sprinkled "const" qualifiers all over the place! +// Updated: DLS 03/22/91 New lite version +// + +#ifndef @KWSYS_NAMESPACE@_RegularExpression_hxx +#define @KWSYS_NAMESPACE@_RegularExpression_hxx + +#include <@KWSYS_NAMESPACE@/Configure.h> +#include <@KWSYS_NAMESPACE@/Configure.hxx> + +#include <string> + +/* Disable useless Borland warnings. KWSys tries not to force things + on its includers, but there is no choice here. */ +#if defined(__BORLANDC__) +#pragma warn - 8027 /* function not inlined. */ +#endif + +namespace @KWSYS_NAMESPACE@ { + +// Forward declaration +class RegularExpression; + +/** \class RegularExpressionMatch + * \brief Stores the pattern matches of a RegularExpression + */ +class @KWSYS_NAMESPACE@_EXPORT RegularExpressionMatch +{ +public: + RegularExpressionMatch(); + + bool isValid() const; + void clear(); + + std::string::size_type start() const; + std::string::size_type end() const; + std::string::size_type start(int n) const; + std::string::size_type end(int n) const; + std::string match(int n) const; + + enum + { + NSUBEXP = 10 + }; + +private: + friend class RegularExpression; + const char* startp[NSUBEXP]; + const char* endp[NSUBEXP]; + const char* searchstring; +}; + +/** + * \brief Creates an invalid match object + */ +inline RegularExpressionMatch::RegularExpressionMatch() +{ + startp[0] = 0; + endp[0] = 0; + searchstring = 0; +} + +/** + * \brief Returns true if the match pointers are valid + */ +inline bool RegularExpressionMatch::isValid() const +{ + return (this->startp[0] != 0); +} + +/** + * \brief Resets to the (invalid) construction state. + */ +inline void RegularExpressionMatch::clear() +{ + startp[0] = 0; + endp[0] = 0; + searchstring = 0; +} + +/** + * \brief Returns the start index of the full match. + */ +inline std::string::size_type RegularExpressionMatch::start() const +{ + return static_cast<std::string::size_type>(this->startp[0] - searchstring); +} + +/** + * \brief Returns the end index of the full match. + */ +inline std::string::size_type RegularExpressionMatch::end() const +{ + return static_cast<std::string::size_type>(this->endp[0] - searchstring); +} + +/** + * \brief Returns the start index of nth submatch. + * start(0) is the start of the full match. + */ +inline std::string::size_type RegularExpressionMatch::start(int n) const +{ + return static_cast<std::string::size_type>(this->startp[n] - + this->searchstring); +} + +/** + * \brief Returns the end index of nth submatch. + * end(0) is the end of the full match. + */ +inline std::string::size_type RegularExpressionMatch::end(int n) const +{ + return static_cast<std::string::size_type>(this->endp[n] - + this->searchstring); +} + +/** + * \brief Returns the nth submatch as a string. + */ +inline std::string RegularExpressionMatch::match(int n) const +{ + if (this->startp[n] == 0) { + return std::string(); + } else { + return std::string(this->startp[n], static_cast<std::string::size_type>( + this->endp[n] - this->startp[n])); + } +} + +/** \class RegularExpression + * \brief Implements pattern matching with regular expressions. + * + * This is the header file for the regular expression class. An object of + * this class contains a regular expression, in a special "compiled" format. + * This compiled format consists of several slots all kept as the objects + * private data. The RegularExpression class provides a convenient way to + * represent regular expressions. It makes it easy to search for the same + * regular expression in many different strings without having to compile a + * string to regular expression format more than necessary. + * + * This class implements pattern matching via regular expressions. + * A regular expression allows a programmer to specify complex + * patterns that can be searched for and matched against the + * character string of a string object. In its simplest form, a + * regular expression is a sequence of characters used to + * search for exact character matches. However, many times the + * exact sequence to be found is not known, or only a match at + * the beginning or end of a string is desired. The RegularExpression regu- + * lar expression class implements regular expression pattern + * matching as is found and implemented in many UNIX commands + * and utilities. + * + * Example: The perl code + * + * $filename =~ m"([a-z]+)\.cc"; + * print $1; + * + * Is written as follows in C++ + * + * RegularExpression re("([a-z]+)\\.cc"); + * re.find(filename); + * cerr << re.match(1); + * + * + * The regular expression class provides a convenient mechanism + * for specifying and manipulating regular expressions. The + * regular expression object allows specification of such pat- + * terns by using the following regular expression metacharac- + * ters: + * + * ^ Matches at beginning of a line + * + * $ Matches at end of a line + * + * . Matches any single character + * + * [ ] Matches any character(s) inside the brackets + * + * [^ ] Matches any character(s) not inside the brackets + * + * - Matches any character in range on either side of a dash + * + * * Matches preceding pattern zero or more times + * + * + Matches preceding pattern one or more times + * + * ? Matches preceding pattern zero or once only + * + * () Saves a matched expression and uses it in a later match + * + * Note that more than one of these metacharacters can be used + * in a single regular expression in order to create complex + * search patterns. For example, the pattern [^ab1-9] says to + * match any character sequence that does not begin with the + * characters "ab" followed by numbers in the series one + * through nine. + * + * There are three constructors for RegularExpression. One just creates an + * empty RegularExpression object. Another creates a RegularExpression + * object and initializes it with a regular expression that is given in the + * form of a char*. The third takes a reference to a RegularExpression + * object as an argument and creates an object initialized with the + * information from the given RegularExpression object. + * + * The find member function finds the first occurrence of the regular + * expression of that object in the string given to find as an argument. Find + * returns a boolean, and if true, mutates the private data appropriately. + * Find sets pointers to the beginning and end of the thing last found, they + * are pointers into the actual string that was searched. The start and end + * member functions return indices into the searched string that correspond + * to the beginning and end pointers respectively. The compile member + * function takes a char* and puts the compiled version of the char* argument + * into the object's private data fields. The == and != operators only check + * the to see if the compiled regular expression is the same, and the + * deep_equal functions also checks to see if the start and end pointers are + * the same. The is_valid function returns false if program is set to NULL, + * (i.e. there is no valid compiled expression). The set_invalid function + * sets the program to NULL (Warning: this deletes the compiled expression). + * The following examples may help clarify regular expression usage: + * + * * The regular expression "^hello" matches a "hello" only at the + * beginning of a line. It would match "hello there" but not "hi, + * hello there". + * + * * The regular expression "long$" matches a "long" only at the end + * of a line. It would match "so long\0", but not "long ago". + * + * * The regular expression "t..t..g" will match anything that has a + * "t" then any two characters, another "t", any two characters and + * then a "g". It will match "testing", or "test again" but would + * not match "toasting" + * + * * The regular expression "[1-9ab]" matches any number one through + * nine, and the characters "a" and "b". It would match "hello 1" + * or "begin", but would not match "no-match". + * + * * The regular expression "[^1-9ab]" matches any character that is + * not a number one through nine, or an "a" or "b". It would NOT + * match "hello 1" or "begin", but would match "no-match". + * + * * The regular expression "br* " matches something that begins with + * a "b", is followed by zero or more "r"s, and ends in a space. It + * would match "brrrrr ", and "b ", but would not match "brrh ". + * + * * The regular expression "br+ " matches something that begins with + * a "b", is followed by one or more "r"s, and ends in a space. It + * would match "brrrrr ", and "br ", but would not match "b " or + * "brrh ". + * + * * The regular expression "br? " matches something that begins with + * a "b", is followed by zero or one "r"s, and ends in a space. It + * would match "br ", and "b ", but would not match "brrrr " or + * "brrh ". + * + * * The regular expression "(..p)b" matches something ending with pb + * and beginning with whatever the two characters before the first p + * encountered in the line were. It would find "repb" in "rep drepa + * qrepb". The regular expression "(..p)a" would find "repa qrepb" + * in "rep drepa qrepb" + * + * * The regular expression "d(..p)" matches something ending with p, + * beginning with d, and having two characters in between that are + * the same as the two characters before the first p encountered in + * the line. It would match "drepa qrepb" in "rep drepa qrepb". + * + * All methods of RegularExpression can be called simultaneously from + * different threads but only if each invocation uses an own instance of + * RegularExpression. + */ +class @KWSYS_NAMESPACE@_EXPORT RegularExpression +{ +public: + /** + * Instantiate RegularExpression with program=NULL. + */ + inline RegularExpression(); + + /** + * Instantiate RegularExpression with compiled char*. + */ + inline RegularExpression(char const*); + + /** + * Instantiate RegularExpression as a copy of another regular expression. + */ + RegularExpression(RegularExpression const&); + + /** + * Instantiate RegularExpression with compiled string. + */ + inline RegularExpression(std::string const&); + + /** + * Destructor. + */ + inline ~RegularExpression(); + + /** + * Compile a regular expression into internal code + * for later pattern matching. + */ + bool compile(char const*); + + /** + * Compile a regular expression into internal code + * for later pattern matching. + */ + inline bool compile(std::string const&); + + /** + * Matches the regular expression to the given string. + * Returns true if found, and sets start and end indexes + * in the RegularExpressionMatch instance accordingly. + * + * This method is thread safe when called with different + * RegularExpressionMatch instances. + */ + bool find(char const*, RegularExpressionMatch&) const; + + /** + * Matches the regular expression to the given string. + * Returns true if found, and sets start and end indexes accordingly. + */ + inline bool find(char const*); + + /** + * Matches the regular expression to the given std string. + * Returns true if found, and sets start and end indexes accordingly. + */ + inline bool find(std::string const&); + + /** + * Match indices + */ + inline RegularExpressionMatch const& regMatch() const; + inline std::string::size_type start() const; + inline std::string::size_type end() const; + inline std::string::size_type start(int n) const; + inline std::string::size_type end(int n) const; + + /** + * Match strings + */ + inline std::string match(int n) const; + + /** + * Copy the given regular expression. + */ + RegularExpression& operator=(const RegularExpression& rxp); + + /** + * Returns true if two regular expressions have the same + * compiled program for pattern matching. + */ + bool operator==(RegularExpression const&) const; + + /** + * Returns true if two regular expressions have different + * compiled program for pattern matching. + */ + inline bool operator!=(RegularExpression const&) const; + + /** + * Returns true if have the same compiled regular expressions + * and the same start and end pointers. + */ + bool deep_equal(RegularExpression const&) const; + + /** + * True if the compiled regexp is valid. + */ + inline bool is_valid() const; + + /** + * Marks the regular expression as invalid. + */ + inline void set_invalid(); + +private: + RegularExpressionMatch regmatch; + char regstart; // Internal use only + char reganch; // Internal use only + const char* regmust; // Internal use only + std::string::size_type regmlen; // Internal use only + char* program; + int progsize; +}; + +/** + * Create an empty regular expression. + */ +inline RegularExpression::RegularExpression() +{ + this->program = 0; +} + +/** + * Creates a regular expression from string s, and + * compiles s. + */ +inline RegularExpression::RegularExpression(const char* s) +{ + this->program = 0; + if (s) { + this->compile(s); + } +} + +/** + * Creates a regular expression from string s, and + * compiles s. + */ +inline RegularExpression::RegularExpression(const std::string& s) +{ + this->program = 0; + this->compile(s); +} + +/** + * Destroys and frees space allocated for the regular expression. + */ +inline RegularExpression::~RegularExpression() +{ + //#ifndef _WIN32 + delete[] this->program; + //#endif +} + +/** + * Compile a regular expression into internal code + * for later pattern matching. + */ +inline bool RegularExpression::compile(std::string const& s) +{ + return this->compile(s.c_str()); +} + +/** + * Matches the regular expression to the given std string. + * Returns true if found, and sets start and end indexes accordingly. + */ +inline bool RegularExpression::find(const char* s) +{ + return this->find(s, this->regmatch); +} + +/** + * Matches the regular expression to the given std string. + * Returns true if found, and sets start and end indexes accordingly. + */ +inline bool RegularExpression::find(std::string const& s) +{ + return this->find(s.c_str()); +} + +/** + * Returns the internal match object + */ +inline RegularExpressionMatch const& RegularExpression::regMatch() const +{ + return this->regmatch; +} + +/** + * Returns the start index of the full match. + */ +inline std::string::size_type RegularExpression::start() const +{ + return regmatch.start(); +} + +/** + * Returns the end index of the full match. + */ +inline std::string::size_type RegularExpression::end() const +{ + return regmatch.end(); +} + +/** + * Return start index of nth submatch. start(0) is the start of the full match. + */ +inline std::string::size_type RegularExpression::start(int n) const +{ + return regmatch.start(n); +} + +/** + * Return end index of nth submatch. end(0) is the end of the full match. + */ +inline std::string::size_type RegularExpression::end(int n) const +{ + return regmatch.end(n); +} + +/** + * Return nth submatch as a string. + */ +inline std::string RegularExpression::match(int n) const +{ + return regmatch.match(n); +} + +/** + * Returns true if two regular expressions have different + * compiled program for pattern matching. + */ +inline bool RegularExpression::operator!=(const RegularExpression& r) const +{ + return (!(*this == r)); +} + +/** + * Returns true if a valid regular expression is compiled + * and ready for pattern matching. + */ +inline bool RegularExpression::is_valid() const +{ + return (this->program != 0); +} + +inline void RegularExpression::set_invalid() +{ + //#ifndef _WIN32 + delete[] this->program; + //#endif + this->program = 0; +} + +} // namespace @KWSYS_NAMESPACE@ + +#endif |