diff options
Diffstat (limited to 'Source')
-rw-r--r-- | Source/cm_utf8.c | 19 | ||||
-rw-r--r-- | Source/cm_utf8.h | 4 |
2 files changed, 23 insertions, 0 deletions
diff --git a/Source/cm_utf8.c b/Source/cm_utf8.c index d41d097..62e7e8c 100644 --- a/Source/cm_utf8.c +++ b/Source/cm_utf8.c @@ -2,6 +2,8 @@ file Copyright.txt or https://cmake.org/licensing for details. */ #include "cm_utf8.h" +#include <string.h> + /* RFC 3629 07-bit: 0xxxxxxx @@ -85,3 +87,20 @@ const char* cm_utf8_decode_character(const char* first, const char* last, return first; } } + +int cm_utf8_is_valid(const char* s) +{ + if (!s) { + return 0; + } + + const char* last = s + strlen(s); + const char* pos = s; + unsigned int pc; + + while (pos != last && (pos = cm_utf8_decode_character(pos, last, &pc))) { + /* Nothing to do. */ + } + + return pos == last; +} diff --git a/Source/cm_utf8.h b/Source/cm_utf8.h index fcb43e0..27dc559 100644 --- a/Source/cm_utf8.h +++ b/Source/cm_utf8.h @@ -13,6 +13,10 @@ extern "C" { const char* cm_utf8_decode_character(const char* first, const char* last, unsigned int* pc); +/** Returns whether a C string is a sequence of valid UTF-8 encoded Unicode + codepoints. Returns non-zero on success. */ +int cm_utf8_is_valid(const char* s); + #ifdef __cplusplus } /* extern "C" */ #endif |