summaryrefslogtreecommitdiffstats
path: root/Source
diff options
context:
space:
mode:
authorBrad King <brad.king@kitware.com>2019-03-19 12:58:14 (GMT)
committerKitware Robot <kwrobot@kitware.com>2019-03-19 12:58:39 (GMT)
commit3c4f92cf5bd38355b4843dd6e882fea012306a10 (patch)
treef6c6d1ca639fa0313edd81ffe8914352f7ab6503 /Source
parent6bbc82b4b30ff17c5b3c899c482b1a724e15d5fb (diff)
parent53184a727d0f3b233988212628fec6ef7803da69 (diff)
downloadCMake-3c4f92cf5bd38355b4843dd6e882fea012306a10.zip
CMake-3c4f92cf5bd38355b4843dd6e882fea012306a10.tar.gz
CMake-3c4f92cf5bd38355b4843dd6e882fea012306a10.tar.bz2
Merge topic 'is-valid-utf8'
53184a727d cm_utf8: add an is_valid function Acked-by: Kitware Robot <kwrobot@kitware.com> Merge-request: !3104
Diffstat (limited to 'Source')
-rw-r--r--Source/cm_utf8.c19
-rw-r--r--Source/cm_utf8.h4
2 files changed, 23 insertions, 0 deletions
diff --git a/Source/cm_utf8.c b/Source/cm_utf8.c
index d41d097..62e7e8c 100644
--- a/Source/cm_utf8.c
+++ b/Source/cm_utf8.c
@@ -2,6 +2,8 @@
file Copyright.txt or https://cmake.org/licensing for details. */
#include "cm_utf8.h"
+#include <string.h>
+
/*
RFC 3629
07-bit: 0xxxxxxx
@@ -85,3 +87,20 @@ const char* cm_utf8_decode_character(const char* first, const char* last,
return first;
}
}
+
+int cm_utf8_is_valid(const char* s)
+{
+ if (!s) {
+ return 0;
+ }
+
+ const char* last = s + strlen(s);
+ const char* pos = s;
+ unsigned int pc;
+
+ while (pos != last && (pos = cm_utf8_decode_character(pos, last, &pc))) {
+ /* Nothing to do. */
+ }
+
+ return pos == last;
+}
diff --git a/Source/cm_utf8.h b/Source/cm_utf8.h
index fcb43e0..27dc559 100644
--- a/Source/cm_utf8.h
+++ b/Source/cm_utf8.h
@@ -13,6 +13,10 @@ extern "C" {
const char* cm_utf8_decode_character(const char* first, const char* last,
unsigned int* pc);
+/** Returns whether a C string is a sequence of valid UTF-8 encoded Unicode
+ codepoints. Returns non-zero on success. */
+int cm_utf8_is_valid(const char* s);
+
#ifdef __cplusplus
} /* extern "C" */
#endif