diff options
author | Brad King <brad.king@kitware.com> | 2019-03-19 12:58:14 (GMT) |
---|---|---|
committer | Kitware Robot <kwrobot@kitware.com> | 2019-03-19 12:58:39 (GMT) |
commit | 3c4f92cf5bd38355b4843dd6e882fea012306a10 (patch) | |
tree | f6c6d1ca639fa0313edd81ffe8914352f7ab6503 /Source | |
parent | 6bbc82b4b30ff17c5b3c899c482b1a724e15d5fb (diff) | |
parent | 53184a727d0f3b233988212628fec6ef7803da69 (diff) | |
download | CMake-3c4f92cf5bd38355b4843dd6e882fea012306a10.zip CMake-3c4f92cf5bd38355b4843dd6e882fea012306a10.tar.gz CMake-3c4f92cf5bd38355b4843dd6e882fea012306a10.tar.bz2 |
Merge topic 'is-valid-utf8'
53184a727d cm_utf8: add an is_valid function
Acked-by: Kitware Robot <kwrobot@kitware.com>
Merge-request: !3104
Diffstat (limited to 'Source')
-rw-r--r-- | Source/cm_utf8.c | 19 | ||||
-rw-r--r-- | Source/cm_utf8.h | 4 |
2 files changed, 23 insertions, 0 deletions
diff --git a/Source/cm_utf8.c b/Source/cm_utf8.c index d41d097..62e7e8c 100644 --- a/Source/cm_utf8.c +++ b/Source/cm_utf8.c @@ -2,6 +2,8 @@ file Copyright.txt or https://cmake.org/licensing for details. */ #include "cm_utf8.h" +#include <string.h> + /* RFC 3629 07-bit: 0xxxxxxx @@ -85,3 +87,20 @@ const char* cm_utf8_decode_character(const char* first, const char* last, return first; } } + +int cm_utf8_is_valid(const char* s) +{ + if (!s) { + return 0; + } + + const char* last = s + strlen(s); + const char* pos = s; + unsigned int pc; + + while (pos != last && (pos = cm_utf8_decode_character(pos, last, &pc))) { + /* Nothing to do. */ + } + + return pos == last; +} diff --git a/Source/cm_utf8.h b/Source/cm_utf8.h index fcb43e0..27dc559 100644 --- a/Source/cm_utf8.h +++ b/Source/cm_utf8.h @@ -13,6 +13,10 @@ extern "C" { const char* cm_utf8_decode_character(const char* first, const char* last, unsigned int* pc); +/** Returns whether a C string is a sequence of valid UTF-8 encoded Unicode + codepoints. Returns non-zero on success. */ +int cm_utf8_is_valid(const char* s); + #ifdef __cplusplus } /* extern "C" */ #endif |