summaryrefslogtreecommitdiffstats
path: root/Source
diff options
context:
space:
mode:
Diffstat (limited to 'Source')
-rw-r--r--Source/cm_utf8.c19
-rw-r--r--Source/cm_utf8.h4
2 files changed, 23 insertions, 0 deletions
diff --git a/Source/cm_utf8.c b/Source/cm_utf8.c
index d41d097..62e7e8c 100644
--- a/Source/cm_utf8.c
+++ b/Source/cm_utf8.c
@@ -2,6 +2,8 @@
file Copyright.txt or https://cmake.org/licensing for details. */
#include "cm_utf8.h"
+#include <string.h>
+
/*
RFC 3629
07-bit: 0xxxxxxx
@@ -85,3 +87,20 @@ const char* cm_utf8_decode_character(const char* first, const char* last,
return first;
}
}
+
+int cm_utf8_is_valid(const char* s)
+{
+ if (!s) {
+ return 0;
+ }
+
+ const char* last = s + strlen(s);
+ const char* pos = s;
+ unsigned int pc;
+
+ while (pos != last && (pos = cm_utf8_decode_character(pos, last, &pc))) {
+ /* Nothing to do. */
+ }
+
+ return pos == last;
+}
diff --git a/Source/cm_utf8.h b/Source/cm_utf8.h
index fcb43e0..27dc559 100644
--- a/Source/cm_utf8.h
+++ b/Source/cm_utf8.h
@@ -13,6 +13,10 @@ extern "C" {
const char* cm_utf8_decode_character(const char* first, const char* last,
unsigned int* pc);
+/** Returns whether a C string is a sequence of valid UTF-8 encoded Unicode
+ codepoints. Returns non-zero on success. */
+int cm_utf8_is_valid(const char* s);
+
#ifdef __cplusplus
} /* extern "C" */
#endif