summaryrefslogtreecommitdiffstats
path: root/Source/kwsys/RegularExpression.hxx.in
diff options
context:
space:
mode:
Diffstat (limited to 'Source/kwsys/RegularExpression.hxx.in')
-rw-r--r--Source/kwsys/RegularExpression.hxx.in548
1 files changed, 548 insertions, 0 deletions
diff --git a/Source/kwsys/RegularExpression.hxx.in b/Source/kwsys/RegularExpression.hxx.in
new file mode 100644
index 0000000..92e4b36
--- /dev/null
+++ b/Source/kwsys/RegularExpression.hxx.in
@@ -0,0 +1,548 @@
+/* Distributed under the OSI-approved BSD 3-Clause License. See accompanying
+ file Copyright.txt or https://cmake.org/licensing#kwsys for details. */
+// Original Copyright notice:
+// Copyright (C) 1991 Texas Instruments Incorporated.
+//
+// Permission is granted to any individual or institution to use, copy, modify,
+// and distribute this software, provided that this complete copyright and
+// permission notice is maintained, intact, in all copies and supporting
+// documentation.
+//
+// Texas Instruments Incorporated provides this software "as is" without
+// express or implied warranty.
+//
+// Created: MNF 06/13/89 Initial Design and Implementation
+// Updated: LGO 08/09/89 Inherit from Generic
+// Updated: MBN 09/07/89 Added conditional exception handling
+// Updated: MBN 12/15/89 Sprinkled "const" qualifiers all over the place!
+// Updated: DLS 03/22/91 New lite version
+//
+
+#ifndef @KWSYS_NAMESPACE@_RegularExpression_hxx
+#define @KWSYS_NAMESPACE@_RegularExpression_hxx
+
+#include <@KWSYS_NAMESPACE@/Configure.h>
+#include <@KWSYS_NAMESPACE@/Configure.hxx>
+
+#include <string>
+
+/* Disable useless Borland warnings. KWSys tries not to force things
+ on its includers, but there is no choice here. */
+#if defined(__BORLANDC__)
+#pragma warn - 8027 /* function not inlined. */
+#endif
+
+namespace @KWSYS_NAMESPACE@ {
+
+// Forward declaration
+class RegularExpression;
+
+/** \class RegularExpressionMatch
+ * \brief Stores the pattern matches of a RegularExpression
+ */
+class @KWSYS_NAMESPACE@_EXPORT RegularExpressionMatch
+{
+public:
+ RegularExpressionMatch();
+
+ bool isValid() const;
+ void clear();
+
+ std::string::size_type start() const;
+ std::string::size_type end() const;
+ std::string::size_type start(int n) const;
+ std::string::size_type end(int n) const;
+ std::string match(int n) const;
+
+ enum
+ {
+ NSUBEXP = 10
+ };
+
+private:
+ friend class RegularExpression;
+ const char* startp[NSUBEXP];
+ const char* endp[NSUBEXP];
+ const char* searchstring;
+};
+
+/**
+ * \brief Creates an invalid match object
+ */
+inline RegularExpressionMatch::RegularExpressionMatch()
+{
+ startp[0] = 0;
+ endp[0] = 0;
+ searchstring = 0;
+}
+
+/**
+ * \brief Returns true if the match pointers are valid
+ */
+inline bool RegularExpressionMatch::isValid() const
+{
+ return (this->startp[0] != 0);
+}
+
+/**
+ * \brief Resets to the (invalid) construction state.
+ */
+inline void RegularExpressionMatch::clear()
+{
+ startp[0] = 0;
+ endp[0] = 0;
+ searchstring = 0;
+}
+
+/**
+ * \brief Returns the start index of the full match.
+ */
+inline std::string::size_type RegularExpressionMatch::start() const
+{
+ return static_cast<std::string::size_type>(this->startp[0] - searchstring);
+}
+
+/**
+ * \brief Returns the end index of the full match.
+ */
+inline std::string::size_type RegularExpressionMatch::end() const
+{
+ return static_cast<std::string::size_type>(this->endp[0] - searchstring);
+}
+
+/**
+ * \brief Returns the start index of nth submatch.
+ * start(0) is the start of the full match.
+ */
+inline std::string::size_type RegularExpressionMatch::start(int n) const
+{
+ return static_cast<std::string::size_type>(this->startp[n] -
+ this->searchstring);
+}
+
+/**
+ * \brief Returns the end index of nth submatch.
+ * end(0) is the end of the full match.
+ */
+inline std::string::size_type RegularExpressionMatch::end(int n) const
+{
+ return static_cast<std::string::size_type>(this->endp[n] -
+ this->searchstring);
+}
+
+/**
+ * \brief Returns the nth submatch as a string.
+ */
+inline std::string RegularExpressionMatch::match(int n) const
+{
+ if (this->startp[n] == 0) {
+ return std::string();
+ } else {
+ return std::string(this->startp[n], static_cast<std::string::size_type>(
+ this->endp[n] - this->startp[n]));
+ }
+}
+
+/** \class RegularExpression
+ * \brief Implements pattern matching with regular expressions.
+ *
+ * This is the header file for the regular expression class. An object of
+ * this class contains a regular expression, in a special "compiled" format.
+ * This compiled format consists of several slots all kept as the objects
+ * private data. The RegularExpression class provides a convenient way to
+ * represent regular expressions. It makes it easy to search for the same
+ * regular expression in many different strings without having to compile a
+ * string to regular expression format more than necessary.
+ *
+ * This class implements pattern matching via regular expressions.
+ * A regular expression allows a programmer to specify complex
+ * patterns that can be searched for and matched against the
+ * character string of a string object. In its simplest form, a
+ * regular expression is a sequence of characters used to
+ * search for exact character matches. However, many times the
+ * exact sequence to be found is not known, or only a match at
+ * the beginning or end of a string is desired. The RegularExpression regu-
+ * lar expression class implements regular expression pattern
+ * matching as is found and implemented in many UNIX commands
+ * and utilities.
+ *
+ * Example: The perl code
+ *
+ * $filename =~ m"([a-z]+)\.cc";
+ * print $1;
+ *
+ * Is written as follows in C++
+ *
+ * RegularExpression re("([a-z]+)\\.cc");
+ * re.find(filename);
+ * cerr << re.match(1);
+ *
+ *
+ * The regular expression class provides a convenient mechanism
+ * for specifying and manipulating regular expressions. The
+ * regular expression object allows specification of such pat-
+ * terns by using the following regular expression metacharac-
+ * ters:
+ *
+ * ^ Matches at beginning of a line
+ *
+ * $ Matches at end of a line
+ *
+ * . Matches any single character
+ *
+ * [ ] Matches any character(s) inside the brackets
+ *
+ * [^ ] Matches any character(s) not inside the brackets
+ *
+ * - Matches any character in range on either side of a dash
+ *
+ * * Matches preceding pattern zero or more times
+ *
+ * + Matches preceding pattern one or more times
+ *
+ * ? Matches preceding pattern zero or once only
+ *
+ * () Saves a matched expression and uses it in a later match
+ *
+ * Note that more than one of these metacharacters can be used
+ * in a single regular expression in order to create complex
+ * search patterns. For example, the pattern [^ab1-9] says to
+ * match any character sequence that does not begin with the
+ * characters "ab" followed by numbers in the series one
+ * through nine.
+ *
+ * There are three constructors for RegularExpression. One just creates an
+ * empty RegularExpression object. Another creates a RegularExpression
+ * object and initializes it with a regular expression that is given in the
+ * form of a char*. The third takes a reference to a RegularExpression
+ * object as an argument and creates an object initialized with the
+ * information from the given RegularExpression object.
+ *
+ * The find member function finds the first occurrence of the regular
+ * expression of that object in the string given to find as an argument. Find
+ * returns a boolean, and if true, mutates the private data appropriately.
+ * Find sets pointers to the beginning and end of the thing last found, they
+ * are pointers into the actual string that was searched. The start and end
+ * member functions return indices into the searched string that correspond
+ * to the beginning and end pointers respectively. The compile member
+ * function takes a char* and puts the compiled version of the char* argument
+ * into the object's private data fields. The == and != operators only check
+ * the to see if the compiled regular expression is the same, and the
+ * deep_equal functions also checks to see if the start and end pointers are
+ * the same. The is_valid function returns false if program is set to NULL,
+ * (i.e. there is no valid compiled expression). The set_invalid function
+ * sets the program to NULL (Warning: this deletes the compiled expression).
+ * The following examples may help clarify regular expression usage:
+ *
+ * * The regular expression "^hello" matches a "hello" only at the
+ * beginning of a line. It would match "hello there" but not "hi,
+ * hello there".
+ *
+ * * The regular expression "long$" matches a "long" only at the end
+ * of a line. It would match "so long\0", but not "long ago".
+ *
+ * * The regular expression "t..t..g" will match anything that has a
+ * "t" then any two characters, another "t", any two characters and
+ * then a "g". It will match "testing", or "test again" but would
+ * not match "toasting"
+ *
+ * * The regular expression "[1-9ab]" matches any number one through
+ * nine, and the characters "a" and "b". It would match "hello 1"
+ * or "begin", but would not match "no-match".
+ *
+ * * The regular expression "[^1-9ab]" matches any character that is
+ * not a number one through nine, or an "a" or "b". It would NOT
+ * match "hello 1" or "begin", but would match "no-match".
+ *
+ * * The regular expression "br* " matches something that begins with
+ * a "b", is followed by zero or more "r"s, and ends in a space. It
+ * would match "brrrrr ", and "b ", but would not match "brrh ".
+ *
+ * * The regular expression "br+ " matches something that begins with
+ * a "b", is followed by one or more "r"s, and ends in a space. It
+ * would match "brrrrr ", and "br ", but would not match "b " or
+ * "brrh ".
+ *
+ * * The regular expression "br? " matches something that begins with
+ * a "b", is followed by zero or one "r"s, and ends in a space. It
+ * would match "br ", and "b ", but would not match "brrrr " or
+ * "brrh ".
+ *
+ * * The regular expression "(..p)b" matches something ending with pb
+ * and beginning with whatever the two characters before the first p
+ * encountered in the line were. It would find "repb" in "rep drepa
+ * qrepb". The regular expression "(..p)a" would find "repa qrepb"
+ * in "rep drepa qrepb"
+ *
+ * * The regular expression "d(..p)" matches something ending with p,
+ * beginning with d, and having two characters in between that are
+ * the same as the two characters before the first p encountered in
+ * the line. It would match "drepa qrepb" in "rep drepa qrepb".
+ *
+ * All methods of RegularExpression can be called simultaneously from
+ * different threads but only if each invocation uses an own instance of
+ * RegularExpression.
+ */
+class @KWSYS_NAMESPACE@_EXPORT RegularExpression
+{
+public:
+ /**
+ * Instantiate RegularExpression with program=NULL.
+ */
+ inline RegularExpression();
+
+ /**
+ * Instantiate RegularExpression with compiled char*.
+ */
+ inline RegularExpression(char const*);
+
+ /**
+ * Instantiate RegularExpression as a copy of another regular expression.
+ */
+ RegularExpression(RegularExpression const&);
+
+ /**
+ * Instantiate RegularExpression with compiled string.
+ */
+ inline RegularExpression(std::string const&);
+
+ /**
+ * Destructor.
+ */
+ inline ~RegularExpression();
+
+ /**
+ * Compile a regular expression into internal code
+ * for later pattern matching.
+ */
+ bool compile(char const*);
+
+ /**
+ * Compile a regular expression into internal code
+ * for later pattern matching.
+ */
+ inline bool compile(std::string const&);
+
+ /**
+ * Matches the regular expression to the given string.
+ * Returns true if found, and sets start and end indexes
+ * in the RegularExpressionMatch instance accordingly.
+ *
+ * This method is thread safe when called with different
+ * RegularExpressionMatch instances.
+ */
+ bool find(char const*, RegularExpressionMatch&) const;
+
+ /**
+ * Matches the regular expression to the given string.
+ * Returns true if found, and sets start and end indexes accordingly.
+ */
+ inline bool find(char const*);
+
+ /**
+ * Matches the regular expression to the given std string.
+ * Returns true if found, and sets start and end indexes accordingly.
+ */
+ inline bool find(std::string const&);
+
+ /**
+ * Match indices
+ */
+ inline RegularExpressionMatch const& regMatch() const;
+ inline std::string::size_type start() const;
+ inline std::string::size_type end() const;
+ inline std::string::size_type start(int n) const;
+ inline std::string::size_type end(int n) const;
+
+ /**
+ * Match strings
+ */
+ inline std::string match(int n) const;
+
+ /**
+ * Copy the given regular expression.
+ */
+ RegularExpression& operator=(const RegularExpression& rxp);
+
+ /**
+ * Returns true if two regular expressions have the same
+ * compiled program for pattern matching.
+ */
+ bool operator==(RegularExpression const&) const;
+
+ /**
+ * Returns true if two regular expressions have different
+ * compiled program for pattern matching.
+ */
+ inline bool operator!=(RegularExpression const&) const;
+
+ /**
+ * Returns true if have the same compiled regular expressions
+ * and the same start and end pointers.
+ */
+ bool deep_equal(RegularExpression const&) const;
+
+ /**
+ * True if the compiled regexp is valid.
+ */
+ inline bool is_valid() const;
+
+ /**
+ * Marks the regular expression as invalid.
+ */
+ inline void set_invalid();
+
+private:
+ RegularExpressionMatch regmatch;
+ char regstart; // Internal use only
+ char reganch; // Internal use only
+ const char* regmust; // Internal use only
+ std::string::size_type regmlen; // Internal use only
+ char* program;
+ int progsize;
+};
+
+/**
+ * Create an empty regular expression.
+ */
+inline RegularExpression::RegularExpression()
+{
+ this->program = 0;
+}
+
+/**
+ * Creates a regular expression from string s, and
+ * compiles s.
+ */
+inline RegularExpression::RegularExpression(const char* s)
+{
+ this->program = 0;
+ if (s) {
+ this->compile(s);
+ }
+}
+
+/**
+ * Creates a regular expression from string s, and
+ * compiles s.
+ */
+inline RegularExpression::RegularExpression(const std::string& s)
+{
+ this->program = 0;
+ this->compile(s);
+}
+
+/**
+ * Destroys and frees space allocated for the regular expression.
+ */
+inline RegularExpression::~RegularExpression()
+{
+ //#ifndef _WIN32
+ delete[] this->program;
+ //#endif
+}
+
+/**
+ * Compile a regular expression into internal code
+ * for later pattern matching.
+ */
+inline bool RegularExpression::compile(std::string const& s)
+{
+ return this->compile(s.c_str());
+}
+
+/**
+ * Matches the regular expression to the given std string.
+ * Returns true if found, and sets start and end indexes accordingly.
+ */
+inline bool RegularExpression::find(const char* s)
+{
+ return this->find(s, this->regmatch);
+}
+
+/**
+ * Matches the regular expression to the given std string.
+ * Returns true if found, and sets start and end indexes accordingly.
+ */
+inline bool RegularExpression::find(std::string const& s)
+{
+ return this->find(s.c_str());
+}
+
+/**
+ * Returns the internal match object
+ */
+inline RegularExpressionMatch const& RegularExpression::regMatch() const
+{
+ return this->regmatch;
+}
+
+/**
+ * Returns the start index of the full match.
+ */
+inline std::string::size_type RegularExpression::start() const
+{
+ return regmatch.start();
+}
+
+/**
+ * Returns the end index of the full match.
+ */
+inline std::string::size_type RegularExpression::end() const
+{
+ return regmatch.end();
+}
+
+/**
+ * Return start index of nth submatch. start(0) is the start of the full match.
+ */
+inline std::string::size_type RegularExpression::start(int n) const
+{
+ return regmatch.start(n);
+}
+
+/**
+ * Return end index of nth submatch. end(0) is the end of the full match.
+ */
+inline std::string::size_type RegularExpression::end(int n) const
+{
+ return regmatch.end(n);
+}
+
+/**
+ * Return nth submatch as a string.
+ */
+inline std::string RegularExpression::match(int n) const
+{
+ return regmatch.match(n);
+}
+
+/**
+ * Returns true if two regular expressions have different
+ * compiled program for pattern matching.
+ */
+inline bool RegularExpression::operator!=(const RegularExpression& r) const
+{
+ return (!(*this == r));
+}
+
+/**
+ * Returns true if a valid regular expression is compiled
+ * and ready for pattern matching.
+ */
+inline bool RegularExpression::is_valid() const
+{
+ return (this->program != 0);
+}
+
+inline void RegularExpression::set_invalid()
+{
+ //#ifndef _WIN32
+ delete[] this->program;
+ //#endif
+ this->program = 0;
+}
+
+} // namespace @KWSYS_NAMESPACE@
+
+#endif