diff options
author | Ben Boeckel <ben.boeckel@kitware.com> | 2019-03-14 18:33:28 (GMT) |
---|---|---|
committer | Ben Boeckel <ben.boeckel@kitware.com> | 2019-03-18 18:18:13 (GMT) |
commit | 53184a727d0f3b233988212628fec6ef7803da69 (patch) | |
tree | 6d533af81c3b81368b4fd0aab0c9f91ba1cb51ca /Source | |
parent | 875f49cc588b898f5da24bff34638d16de5c6413 (diff) | |
download | CMake-53184a727d0f3b233988212628fec6ef7803da69.zip CMake-53184a727d0f3b233988212628fec6ef7803da69.tar.gz CMake-53184a727d0f3b233988212628fec6ef7803da69.tar.bz2 |
cm_utf8: add an is_valid function
Diffstat (limited to 'Source')
-rw-r--r-- | Source/cm_utf8.c | 19 | ||||
-rw-r--r-- | Source/cm_utf8.h | 4 |
2 files changed, 23 insertions, 0 deletions
diff --git a/Source/cm_utf8.c b/Source/cm_utf8.c index d41d097..62e7e8c 100644 --- a/Source/cm_utf8.c +++ b/Source/cm_utf8.c @@ -2,6 +2,8 @@ file Copyright.txt or https://cmake.org/licensing for details. */ #include "cm_utf8.h" +#include <string.h> + /* RFC 3629 07-bit: 0xxxxxxx @@ -85,3 +87,20 @@ const char* cm_utf8_decode_character(const char* first, const char* last, return first; } } + +int cm_utf8_is_valid(const char* s) +{ + if (!s) { + return 0; + } + + const char* last = s + strlen(s); + const char* pos = s; + unsigned int pc; + + while (pos != last && (pos = cm_utf8_decode_character(pos, last, &pc))) { + /* Nothing to do. */ + } + + return pos == last; +} diff --git a/Source/cm_utf8.h b/Source/cm_utf8.h index fcb43e0..27dc559 100644 --- a/Source/cm_utf8.h +++ b/Source/cm_utf8.h @@ -13,6 +13,10 @@ extern "C" { const char* cm_utf8_decode_character(const char* first, const char* last, unsigned int* pc); +/** Returns whether a C string is a sequence of valid UTF-8 encoded Unicode + codepoints. Returns non-zero on success. */ +int cm_utf8_is_valid(const char* s); + #ifdef __cplusplus } /* extern "C" */ #endif |