diff options
author | Brad King <brad.king@kitware.com> | 2018-09-25 23:43:41 (GMT) |
---|---|---|
committer | Brad King <brad.king@kitware.com> | 2018-12-12 13:10:15 (GMT) |
commit | ff69763ca08fbd3a9831ec0c8adb32a7111b1a46 (patch) | |
tree | 2ed587456b95ba27ab334a6f19f5703620463736 /Source | |
parent | 410a3e4b22c72794d4f96e41c1d37d84d6e7e54d (diff) | |
download | CMake-ff69763ca08fbd3a9831ec0c8adb32a7111b1a46.zip CMake-ff69763ca08fbd3a9831ec0c8adb32a7111b1a46.tar.gz CMake-ff69763ca08fbd3a9831ec0c8adb32a7111b1a46.tar.bz2 |
String: Add a custom string type
Create a `cm::String` type that holds a view of a string buffer and
optionally shares ownership of the buffer. Instances can either
borrow longer-lived storage (e.g. static storage of string literals)
or internally own a `std::string` instance. In the latter case,
share ownership with copies and substrings. Allocate a new internal
string only on operations that require mutation.
This will allow us to recover string sharing semantics that we
used to get from C++98 std::string copy-on-write implementations.
Such implementations are not allowed by C++11 so code our own in
a custom string type instead.
Diffstat (limited to 'Source')
-rw-r--r-- | Source/CMakeLists.txt | 2 | ||||
-rw-r--r-- | Source/cmString.cxx | 131 | ||||
-rw-r--r-- | Source/cmString.hxx | 683 |
3 files changed, 816 insertions, 0 deletions
diff --git a/Source/CMakeLists.txt b/Source/CMakeLists.txt index 4bf2e73..0ce09d4 100644 --- a/Source/CMakeLists.txt +++ b/Source/CMakeLists.txt @@ -568,6 +568,8 @@ set(SRCS cmSiteNameCommand.h cmSourceGroupCommand.cxx cmSourceGroupCommand.h + cmString.cxx + cmString.hxx cmStringReplaceHelper.cxx cmStringCommand.cxx cmStringCommand.h diff --git a/Source/cmString.cxx b/Source/cmString.cxx new file mode 100644 index 0000000..e965bfb --- /dev/null +++ b/Source/cmString.cxx @@ -0,0 +1,131 @@ +/* Distributed under the OSI-approved BSD 3-Clause License. See accompanying + file Copyright.txt or https://cmake.org/licensing for details. */ +#define _SCL_SECURE_NO_WARNINGS + +#include "cmString.hxx" + +#include <memory> +#include <ostream> +#include <stdexcept> +#include <string> +#include <type_traits> + +namespace cm { + +static std::string const empty_string_; + +void String::internally_mutate_to_stable_string() +{ + // We assume that only one thread mutates this instance at + // a time even if we point to a shared string buffer refernced + // by other threads. + *this = String(data(), size()); +} + +std::string const& String::str() +{ + if (!data()) { + // We view no string. + // This is stable for the lifetime of our current value. + return empty_string_; + } + + if (string_ && data() == string_->data() && size() == string_->size()) { + // We view an entire string. + // This is stable for the lifetime of our current value. + return *string_; + } + + // Mutate to hold a std::string that is stable for the lifetime + // of our current value. + this->internally_mutate_to_stable_string(); + return *string_; +} + +const char* String::c_str() +{ + const char* c = data(); + if (c == nullptr) { + return c; + } + + // We always point into a null-terminated string so it is safe to + // access one past the end. If it is a null byte then we can use + // the pointer directly. + if (c[size()] == '\0') { + return c; + } + + // Mutate to hold a std::string so we can get a null terminator. + this->internally_mutate_to_stable_string(); + c = string_->c_str(); + return c; +} + +String& String::insert(size_type index, size_type count, char ch) +{ + std::string s; + s.reserve(size() + count); + s.assign(data(), size()); + s.insert(index, count, ch); + return *this = std::move(s); +} + +String& String::erase(size_type index, size_type count) +{ + if (index > size()) { + throw std::out_of_range("Index out of range in String::erase"); + } + size_type const rcount = std::min(count, size() - index); + size_type const rindex = index + rcount; + std::string s; + s.reserve(size() - rcount); + s.assign(data(), index); + s.append(data() + rindex, size() - rindex); + return *this = std::move(s); +} + +String String::substr(size_type pos, size_type count) const +{ + if (pos > size()) { + throw std::out_of_range("Index out of range in String::substr"); + } + return String(*this, pos, count); +} + +String::String(std::string&& s, Private) + : string_(std::make_shared<std::string>(std::move(s))) + , view_(string_->data(), string_->size()) +{ +} + +String::size_type String::copy(char* dest, size_type count, + size_type pos) const +{ + return view_.copy(dest, count, pos); +} + +std::ostream& operator<<(std::ostream& os, String const& s) +{ + return os.write(s.data(), s.size()); +} + +std::string& operator+=(std::string& self, String const& s) +{ + return self += s.view(); +} + +String IntoString<char*>::into_string(const char* s) +{ + if (!s) { + return String(); + } + return std::string(s); +} + +string_view AsStringView<String>::view(String const& s) +{ + return s.view(); +} + +} // namespace cm diff --git a/Source/cmString.hxx b/Source/cmString.hxx new file mode 100644 index 0000000..833234c --- /dev/null +++ b/Source/cmString.hxx @@ -0,0 +1,683 @@ +/* Distributed under the OSI-approved BSD 3-Clause License. See accompanying + file Copyright.txt or https://cmake.org/licensing for details. */ +#ifndef cmString_hxx +#define cmString_hxx + +#include "cmConfigure.h" // IWYU pragma: keep + +#include "cm_string_view.hxx" + +#include <algorithm> +#include <functional> +#include <initializer_list> +#include <memory> +#include <ostream> +#include <string> +#include <type_traits> + +namespace cm { + +class String; + +/** + * Trait to convert type T into a String. + * Implementations must derive from 'std::true_type' + * and define an 'into_string' member that accepts + * type T (by value or reference) and returns one of: + * + * - 'std::string' to construct an owned instance. + * - 'cm::string_view' to construct a borrowed or null instances. + * The buffer from which the view is borrowed must outlive + * all copies of the resulting String, e.g. static storage. + * - 'cm::String' for already-constructed instances. + */ +template <typename T> +struct IntoString : std::false_type +{ +}; + +template <typename T> +struct IntoString<T&> : IntoString<T> +{ +}; + +template <typename T> +struct IntoString<T const> : IntoString<T> +{ +}; + +template <typename T> +struct IntoString<T const*> : IntoString<T*> +{ +}; + +template <typename T, std::string::size_type N> +struct IntoString<T const[N]> : IntoString<T[N]> +{ +}; + +template <> +struct IntoString<char*> : std::true_type +{ + static String into_string(const char* s); +}; + +template <> +struct IntoString<std::nullptr_t> : std::true_type +{ + static string_view into_string(std::nullptr_t) { return string_view(); } +}; + +template <std::string::size_type N> +struct IntoString<char[N]> : std::true_type +{ + static std::string into_string(char const (&s)[N]) + { + return std::string(s, N - 1); + } +}; + +template <> +struct IntoString<std::string> : std::true_type +{ + static std::string into_string(std::string s) { return s; } +}; + +template <> +struct IntoString<string_view> : std::true_type +{ + static std::string into_string(string_view s) { return std::string(s); } +}; + +template <> +struct IntoString<char> : std::true_type +{ + static std::string into_string(char const& c) { return std::string(1, c); } +}; + +/** + * Trait to convert type T into a 'cm::string_view'. + * Implementations must derive from 'std::true_type' and + * define a 'view' member that accepts type T (by reference) + * and returns a 'cm::string_view'. + */ +template <typename T> +struct AsStringView : std::false_type +{ +}; + +template <typename T> +struct AsStringView<T&> : AsStringView<T> +{ +}; + +template <typename T> +struct AsStringView<T const> : AsStringView<T> +{ +}; + +template <typename T> +struct AsStringView<T const*> : AsStringView<T*> +{ +}; + +template <typename T, std::string::size_type N> +struct AsStringView<T const[N]> : AsStringView<T[N]> +{ +}; + +template <> +struct AsStringView<char*> : std::true_type +{ + static string_view view(const char* s) { return s; } +}; + +template <std::string::size_type N> +struct AsStringView<char[N]> : std::true_type +{ + static string_view view(char const (&s)[N]) { return string_view(s, N - 1); } +}; + +template <> +struct AsStringView<std::string> : std::true_type +{ + static string_view view(std::string const& s) { return s; } +}; + +template <> +struct AsStringView<char> : std::true_type +{ + static string_view view(const char& s) { return string_view(&s, 1); } +}; + +template <> +struct AsStringView<string_view> : std::true_type +{ + static string_view view(string_view const& s) { return s; } +}; + +template <> +struct AsStringView<String> : std::true_type +{ + static string_view view(String const& s); +}; + +/** + * \class String + * + * A custom string type that holds a view of a string buffer + * and optionally shares ownership of the buffer. Instances + * may have one of the following states: + * + * - null: views and owns nothing. + * Conversion to 'bool' is 'false'. + * 'data()' and 'c_str()' return nullptr. + * 'size()' returns 0. + * 'str()' returns an empty string. + * + * - borrowed: views a string but does not own it. This is used + * to bind to static storage (e.g. string literals) or for + * temporary instances that do not outlive the borrowed buffer. + * Copies and substrings still borrow the original buffer. + * Mutation allocates a new internal string and converts to + * the 'owned' state. + * Conversion to 'bool' is 'true'. + * 'c_str()' may internally mutate to the 'owned' state. + * 'str()' internally mutates to the 'owned' state. + * + * - owned: views an immutable 'std::string' instance owned internally. + * Copies and substrings share ownership of the internal string. + * Mutation allocates a new internal string. + * Conversion to 'bool' is 'true'. + */ +class String +{ + enum class Private + { + }; + +public: + using traits_type = std::string::traits_type; + using value_type = string_view::value_type; + using pointer = string_view::pointer; + using const_pointer = string_view::const_pointer; + using reference = string_view::reference; + using const_reference = string_view::const_reference; + using const_iterator = string_view::const_iterator; + using iterator = string_view::const_iterator; + using const_reverse_iterator = string_view::const_reverse_iterator; + using reverse_iterator = string_view::const_reverse_iterator; + using difference_type = string_view::difference_type; + using size_type = string_view::size_type; + + static size_type const npos = string_view::npos; + + /** Construct a null string. */ + String() = default; + + /** Construct from any type implementing the IntoString trait. */ + template <typename T, + typename = typename std::enable_if<IntoString<T>::value>::type> + String(T&& s) + : String(IntoString<T>::into_string(std::forward<T>(s)), Private()) + { + } + + /** Construct via std::string initializer list constructor. */ + String(std::initializer_list<char> il) + : String(std::string(il)) + { + } + + /** Construct by copying the specified buffer. */ + String(const char* d, size_type s) + : String(std::string(d, s)) + { + } + + /** Construct by copying from input iterator range. */ + template <typename InputIterator> + String(InputIterator first, InputIterator last) + : String(std::string(first, last)) + { + } + + /** Construct a string with 'n' copies of character 'c'. */ + String(size_type n, char c) + : String(std::string(n, c)) + { + } + + /** Construct from a substring of another String instance. + This shares ownership of the other string's buffer + but views only a substring. */ + String(String const& s, size_type pos, size_type count = npos) + : string_(s.string_) + , view_(s.data() + pos, std::min(count, s.size() - pos)) + { + } + + /** Construct by moving from another String instance. + The other instance is left as a null string. */ + String(String&& s) noexcept + : string_(std::move(s.string_)) + , view_(s.view_) + { + s.view_ = string_view(); + } + + /** Construct by copying from another String instance. + This shares ownership of the other string's buffer. */ + String(String const&) noexcept = default; + + ~String() = default; + + /** Assign by moving from another String instance. + The other instance is left as a null string. */ + String& operator=(String&& s) noexcept + { + string_ = std::move(s.string_); + view_ = s.view_; + s.view_ = string_view(); + return *this; + } + + /** Assign by copying from another String instance. + This shares ownership of the other string's buffer. */ + String& operator=(String const&) noexcept = default; + + /** Assign from any type implementing the IntoString trait. */ + template <typename T> + typename // NOLINT(*) + std::enable_if<IntoString<T>::value, String&>::type + operator=(T&& s) + { + *this = String(std::forward<T>(s)); + return *this; + } + + /** Assign via std::string initializer list constructor. */ + String& operator=(std::initializer_list<char> il) + { + *this = String(il); + return *this; + } + + /** Return true if the instance is not a null string. */ + explicit operator bool() const noexcept { return data() != nullptr; } + + /** Return a view of the string. */ + string_view view() const noexcept { return view_; } + + /** Return true if the instance is an empty stringn or null string. */ + bool empty() const noexcept { return view_.empty(); } + + /** Return a pointer to the start of the string. */ + const char* data() const noexcept { return view_.data(); } + + /** Return the length of the string in bytes. */ + size_type size() const noexcept { return view_.size(); } + size_type length() const noexcept { return view_.length(); } + + /** Return the character at the given position. + No bounds checking is performed. */ + char operator[](size_type pos) const noexcept { return view_[pos]; } + + /** Return the character at the given position. + If the position is out of bounds, throws std::out_of_range. */ + char at(size_type pos) const { return view_.at(pos); } + + char front() const noexcept { return view_.front(); } + + char back() const noexcept { return view_.back(); } + + /** Get a refernce to a normal std::string. The reference + is valid until this instance is mutated or destroyed. */ + std::string const& str(); + + /** Get a pointer to a C-style null-terminated string + containing the same value as this instance. The pointer + is valid until this instance is mutated, destroyed, + or str() is called. */ + const char* c_str(); + + const_iterator begin() const noexcept { return view_.begin(); } + const_iterator end() const noexcept { return view_.end(); } + const_iterator cbegin() const noexcept { return begin(); } + const_iterator cend() const noexcept { return end(); } + + const_reverse_iterator rbegin() const noexcept { return view_.rbegin(); } + const_reverse_iterator rend() const noexcept { return view_.rend(); } + const_reverse_iterator crbegin() const noexcept { return rbegin(); } + const_reverse_iterator crend() const noexcept { return rend(); } + + /** Append to the string using any type that implements the + AsStringView trait. */ + template <typename T> + typename std::enable_if<AsStringView<T>::value, String&>::type operator+=( + T&& s) + { + string_view v = AsStringView<T>::view(std::forward<T>(s)); + std::string r; + r.reserve(size() + v.size()); + r.assign(data(), size()); + r.append(v.data(), v.size()); + return *this = std::move(r); + } + + /** Assign to an empty string. */ + void clear() { *this = String(string_view("", 0), Private()); } + + /** Insert 'count' copies of 'ch' at position 'index'. */ + String& insert(size_type index, size_type count, char ch); + + /** Erase 'count' characters starting at position 'index'. */ + String& erase(size_type index = 0, size_type count = npos); + + void push_back(char ch) + { + std::string s; + s.reserve(size() + 1); + s.assign(data(), size()); + s.push_back(ch); + *this = std::move(s); + } + + void pop_back() { *this = String(*this, 0, size() - 1); } + + template <typename T> + typename std::enable_if<AsStringView<T>::value, String&>::type replace( + size_type pos, size_type count, T&& s) + { + const_iterator first = begin() + pos; + const_iterator last = first + count; + return replace(first, last, std::forward<T>(s)); + } + + template <typename InputIterator> + String& replace(const_iterator first, const_iterator last, + InputIterator first2, InputIterator last2) + { + std::string out; + out.append(view_.begin(), first); + out.append(first2, last2); + out.append(last, view_.end()); + return *this = std::move(out); + } + + template <typename T> + typename std::enable_if<AsStringView<T>::value, String&>::type replace( + const_iterator first, const_iterator last, T&& s) + { + string_view v = AsStringView<T>::view(std::forward<T>(s)); + std::string out; + out.reserve((first - view_.begin()) + v.size() + (view_.end() - last)); + out.append(view_.begin(), first); + out.append(v.data(), v.size()); + out.append(last, view_.end()); + return *this = std::move(out); + } + + template <typename T> + typename std::enable_if<AsStringView<T>::value, String&>::type replace( + size_type pos, size_type count, T&& s, size_type pos2, + size_type count2 = npos) + { + string_view v = AsStringView<T>::view(std::forward<T>(s)); + v = v.substr(pos2, count2); + return replace(pos, count, v); + } + + String& replace(size_type pos, size_type count, size_type count2, char ch) + { + const_iterator first = begin() + pos; + const_iterator last = first + count; + return replace(first, last, count2, ch); + } + + String& replace(const_iterator first, const_iterator last, size_type count2, + char ch) + { + std::string out; + out.reserve((first - view_.begin()) + count2 + (view_.end() - last)); + out.append(view_.begin(), first); + out.append(count2, ch); + out.append(last, view_.end()); + return *this = std::move(out); + } + + size_type copy(char* dest, size_type count, size_type pos = 0) const; + + void resize(size_type count) { resize(count, char()); } + + void resize(size_type count, char ch) + { + std::string s; + s.reserve(count); + if (count <= size()) { + s.assign(data(), count); + } else { + s.assign(data(), size()); + s.resize(count, ch); + } + *this = std::move(s); + } + + void swap(String& other) + { + std::swap(string_, other.string_); + std::swap(view_, other.view_); + } + + /** Return a substring starting at position 'pos' and + consisting of at most 'count' characters. */ + String substr(size_type pos = 0, size_type count = npos) const; + + template <typename T> + typename std::enable_if<AsStringView<T>::value, int>::type compare( + T&& s) const + { + return view_.compare(AsStringView<T>::view(std::forward<T>(s))); + } + + int compare(size_type pos1, size_type count1, string_view v) const + { + return view_.compare(pos1, count1, v); + } + + int compare(size_type pos1, size_type count1, string_view v, size_type pos2, + size_type count2) const + { + return view_.compare(pos1, count1, v, pos2, count2); + } + + int compare(size_type pos1, size_type count1, const char* s) const + { + return view_.compare(pos1, count1, s); + } + + int compare(size_type pos1, size_type count1, const char* s, + size_type count2) const + { + return view_.compare(pos1, count1, s, count2); + } + + template <typename T> + typename std::enable_if<AsStringView<T>::value, size_type>::type find( + T&& s, size_type pos = 0) const + { + string_view v = AsStringView<T>::view(std::forward<T>(s)); + return view_.find(v, pos); + } + + size_type find(const char* s, size_type pos, size_type count) const + { + return view_.find(s, pos, count); + } + + template <typename T> + typename std::enable_if<AsStringView<T>::value, size_type>::type rfind( + T&& s, size_type pos = npos) const + { + string_view v = AsStringView<T>::view(std::forward<T>(s)); + return view_.rfind(v, pos); + } + + size_type rfind(const char* s, size_type pos, size_type count) const + { + return view_.rfind(s, pos, count); + } + + template <typename T> + typename std::enable_if<AsStringView<T>::value, size_type>::type + find_first_of(T&& s, size_type pos = 0) const + { + string_view v = AsStringView<T>::view(std::forward<T>(s)); + return view_.find_first_of(v, pos); + } + + size_type find_first_of(const char* s, size_type pos, size_type count) const + { + return view_.find_first_of(s, pos, count); + } + + template <typename T> + typename std::enable_if<AsStringView<T>::value, size_type>::type + find_first_not_of(T&& s, size_type pos = 0) const + { + string_view v = AsStringView<T>::view(std::forward<T>(s)); + return view_.find_first_not_of(v, pos); + } + + size_type find_first_not_of(const char* s, size_type pos, + size_type count) const + { + return view_.find_first_not_of(s, pos, count); + } + + template <typename T> + typename std::enable_if<AsStringView<T>::value, size_type>::type + find_last_of(T&& s, size_type pos = npos) const + { + string_view v = AsStringView<T>::view(std::forward<T>(s)); + return view_.find_last_of(v, pos); + } + + size_type find_last_of(const char* s, size_type pos, size_type count) const + { + return view_.find_last_of(s, pos, count); + } + + template <typename T> + typename std::enable_if<AsStringView<T>::value, size_type>::type + find_last_not_of(T&& s, size_type pos = npos) const + { + string_view v = AsStringView<T>::view(std::forward<T>(s)); + return view_.find_last_not_of(v, pos); + } + + size_type find_last_not_of(const char* s, size_type pos, + size_type count) const + { + return view_.find_last_not_of(s, pos, count); + } + +private: + // Internal constructor to move from existing String. + String(String&& s, Private) noexcept + : String(std::move(s)) + { + } + + // Internal constructor for dynamically allocated string. + String(std::string&& s, Private); + + // Internal constructor for view of statically allocated string. + String(string_view v, Private) + : string_() + , view_(v) + { + } + + void internally_mutate_to_stable_string(); + + std::shared_ptr<std::string const> string_; + string_view view_; +}; + +template <typename L, typename R> +typename std::enable_if<AsStringView<L>::value && AsStringView<R>::value, + bool>::type +operator==(L&& l, R&& r) +{ + return (AsStringView<L>::view(std::forward<L>(l)) == + AsStringView<R>::view(std::forward<R>(r))); +} + +template <typename L, typename R> +typename std::enable_if<AsStringView<L>::value && AsStringView<R>::value, + bool>::type +operator!=(L&& l, R&& r) +{ + return (AsStringView<L>::view(std::forward<L>(l)) != + AsStringView<R>::view(std::forward<R>(r))); +} + +template <typename L, typename R> +typename std::enable_if<AsStringView<L>::value && AsStringView<R>::value, + bool>::type +operator<(L&& l, R&& r) +{ + return (AsStringView<L>::view(std::forward<L>(l)) < + AsStringView<R>::view(std::forward<R>(r))); +} + +template <typename L, typename R> +typename std::enable_if<AsStringView<L>::value && AsStringView<R>::value, + bool>::type +operator<=(L&& l, R&& r) +{ + return (AsStringView<L>::view(std::forward<L>(l)) <= + AsStringView<R>::view(std::forward<R>(r))); +} + +template <typename L, typename R> +typename std::enable_if<AsStringView<L>::value && AsStringView<R>::value, + bool>::type +operator>(L&& l, R&& r) +{ + return (AsStringView<L>::view(std::forward<L>(l)) > + AsStringView<R>::view(std::forward<R>(r))); +} + +template <typename L, typename R> +typename std::enable_if<AsStringView<L>::value && AsStringView<R>::value, + bool>::type +operator>=(L&& l, R&& r) +{ + return (AsStringView<L>::view(std::forward<L>(l)) >= + AsStringView<R>::view(std::forward<R>(r))); +} + +std::ostream& operator<<(std::ostream& os, String const& s); +std::string& operator+=(std::string& self, String const& s); + +} // namespace cm + +namespace std { + +template <> +struct hash<cm::String> +{ + typedef cm::String argument_type; + typedef size_t result_type; + + result_type operator()(argument_type const& s) const noexcept + { + result_type const h(std::hash<cm::string_view>{}(s.view())); + return h; + } +}; +} + +#endif |