diff options
author | Brad King <brad.king@kitware.com> | 2023-08-01 13:28:58 (GMT) |
---|---|---|
committer | Kitware Robot <kwrobot@kitware.com> | 2023-08-01 13:29:09 (GMT) |
commit | d56b0f9339c841c05fedee9ffb872638429caaa9 (patch) | |
tree | fba720aa5f79842f65bc674b10303b9157a202b5 | |
parent | 40f25113af5265eb139325253ccce1f12a1cedea (diff) | |
parent | 3ce4e9523cda9738c634fab54a0fe482571c63d1 (diff) | |
download | CMake-d56b0f9339c841c05fedee9ffb872638429caaa9.zip CMake-d56b0f9339c841c05fedee9ffb872638429caaa9.tar.gz CMake-d56b0f9339c841c05fedee9ffb872638429caaa9.tar.bz2 |
Merge topic 'test_utf8_improve'
3ce4e9523c testUTF8: Improve using string_view
Acked-by: Kitware Robot <kwrobot@kitware.com>
Tested-by: buildbot <buildbot@kitware.com>
Merge-request: !8673
-rw-r--r-- | Tests/CMakeLib/testUTF8.cxx | 107 |
1 files changed, 49 insertions, 58 deletions
diff --git a/Tests/CMakeLib/testUTF8.cxx b/Tests/CMakeLib/testUTF8.cxx index fc0b539..180d29d 100644 --- a/Tests/CMakeLib/testUTF8.cxx +++ b/Tests/CMakeLib/testUTF8.cxx @@ -1,67 +1,57 @@ /* Distributed under the OSI-approved BSD 3-Clause License. See accompanying file Copyright.txt or https://cmake.org/licensing for details. */ +#include <cm/string_view> + #include <stdio.h> #include <cm_utf8.h> -typedef char test_utf8_char[5]; - -static void test_utf8_char_print(test_utf8_char const c) -{ - unsigned char const* d = reinterpret_cast<unsigned char const*>(c); -#ifndef __clang_analyzer__ // somehow thinks arguments are not initialized - printf("[0x%02X,0x%02X,0x%02X,0x%02X]", static_cast<int>(d[0]), - static_cast<int>(d[1]), static_cast<int>(d[2]), - static_cast<int>(d[3])); -#endif -} +using test_utf8_char = const cm::string_view; -static void byte_array_print(char const* s) +static void byte_array_print(test_utf8_char s) { - unsigned char const* d = reinterpret_cast<unsigned char const*>(s); bool started = false; printf("["); - for (; *d; ++d) { + for (char c : s) { if (started) { printf(","); } started = true; - printf("0x%02X", static_cast<int>(*d)); + printf("0x%02X", static_cast<unsigned char>(c)); } printf("]"); } struct test_utf8_entry { - int n; test_utf8_char str; unsigned int chr; }; static test_utf8_entry const good_entry[] = { - { 1, "\x20\x00\x00\x00", 0x0020 }, /* Space. */ - { 2, "\xC2\xA9\x00\x00", 0x00A9 }, /* Copyright. */ - { 3, "\xE2\x80\x98\x00", 0x2018 }, /* Open-single-quote. */ - { 3, "\xE2\x80\x99\x00", 0x2019 }, /* Close-single-quote. */ - { 4, "\xF0\xA3\x8E\xB4", 0x233B4 }, /* Example from RFC 3629. */ - { 3, "\xED\x80\x80\x00", 0xD000 }, /* Valid 0xED prefixed codepoint. */ - { 4, "\xF4\x8F\xBF\xBF", 0x10FFFF }, /* Highest valid RFC codepoint. */ - { 0, { 0, 0, 0, 0, 0 }, 0 } + { "\x20", 0x0020 }, /* Space. */ + { "\xC2\xA9", 0x00A9 }, /* Copyright. */ + { "\xE2\x80\x98", 0x2018 }, /* Open-single-quote. */ + { "\xE2\x80\x99", 0x2019 }, /* Close-single-quote. */ + { "\xF0\xA3\x8E\xB4", 0x233B4 }, /* Example from RFC 3629. */ + { "\xED\x80\x80", 0xD000 }, /* Valid 0xED prefixed codepoint. */ + { "\xF4\x8F\xBF\xBF", 0x10FFFF }, /* Highest valid RFC codepoint. */ + { {}, 0 } }; static test_utf8_char const bad_chars[] = { - "\x80\x00\x00\x00", /* Leading continuation byte. */ - "\xC0\x80\x00\x00", /* Overlong encoding. */ - "\xC1\x80\x00\x00", /* Overlong encoding. */ - "\xC2\x00\x00\x00", /* Missing continuation byte. */ - "\xE0\x00\x00\x00", /* Missing continuation bytes. */ - "\xE0\x80\x80\x00", /* Overlong encoding. */ + "\x80", /* Leading continuation byte. */ + "\xC0\x80", /* Overlong encoding. */ + "\xC1\x80", /* Overlong encoding. */ + "\xC2", /* Missing continuation byte. */ + "\xE0", /* Missing continuation bytes. */ + "\xE0\x80\x80", /* Overlong encoding. */ "\xF0\x80\x80\x80", /* Overlong encoding. */ - "\xED\xA0\x80\x00", /* UTF-16 surrogate half. */ - "\xED\xBF\xBF\x00", /* UTF-16 surrogate half. */ + "\xED\xA0\x80", /* UTF-16 surrogate half. */ + "\xED\xBF\xBF", /* UTF-16 surrogate half. */ "\xF4\x90\x80\x80", /* Lowest out-of-range codepoint. */ "\xF5\x80\x80\x80", /* Prefix forces out-of-range codepoints. */ - { 0, 0, 0, 0, 0 } + {} }; static char const* good_strings[] = { "", "ASCII", "\xC2\xA9 Kitware", 0 }; @@ -71,49 +61,50 @@ static char const* bad_strings[] = { 0 }; -static void report_good(bool passed, test_utf8_char const c) +static void report_good(bool passed, test_utf8_char c) { printf("%s: decoding good ", passed ? "pass" : "FAIL"); - test_utf8_char_print(c); - printf(" (%s) ", c); + byte_array_print(c); + printf(" (%s) ", c.data()); } -static void report_bad(bool passed, test_utf8_char const c) +static void report_bad(bool passed, test_utf8_char c) { printf("%s: decoding bad ", passed ? "pass" : "FAIL"); - test_utf8_char_print(c); + byte_array_print(c); printf(" "); } -static bool decode_good(test_utf8_entry const entry) +static bool decode_good(test_utf8_entry const& entry) { + const auto& s = entry.str; unsigned int uc; if (const char* e = - cm_utf8_decode_character(entry.str, entry.str + 4, &uc)) { - int used = static_cast<int>(e - entry.str); + cm_utf8_decode_character(s.data(), s.data() + s.size(), &uc)) { + int used = static_cast<int>(e - s.data()); if (uc != entry.chr) { - report_good(false, entry.str); + report_good(false, s); printf("expected 0x%04X, got 0x%04X\n", entry.chr, uc); return false; } - if (used != entry.n) { - report_good(false, entry.str); - printf("had %d bytes, used %d\n", entry.n, used); + if (used != int(s.size())) { + report_good(false, s); + printf("had %d bytes, used %d\n", int(s.size()), used); return false; } - report_good(true, entry.str); + report_good(true, s); printf("got 0x%04X\n", uc); return true; } - report_good(false, entry.str); + report_good(false, s); printf("failed\n"); return false; } -static bool decode_bad(test_utf8_char const s) +static bool decode_bad(test_utf8_char s) { unsigned int uc = 0xFFFFu; - const char* e = cm_utf8_decode_character(s, s + 4, &uc); + const char* e = cm_utf8_decode_character(s.data(), s.data() + s.size(), &uc); if (e) { report_bad(false, s); printf("expected failure, got 0x%04X\n", uc); @@ -124,23 +115,23 @@ static bool decode_bad(test_utf8_char const s) return true; } -static void report_valid(bool passed, char const* s) +static void report_valid(bool passed, test_utf8_char s) { printf("%s: validity good ", passed ? "pass" : "FAIL"); byte_array_print(s); - printf(" (%s) ", s); + printf(" (%s) ", s.data()); } -static void report_invalid(bool passed, char const* s) +static void report_invalid(bool passed, test_utf8_char s) { printf("%s: validity bad ", passed ? "pass" : "FAIL"); byte_array_print(s); printf(" "); } -static bool is_valid(const char* s) +static bool is_valid(test_utf8_char s) { - bool valid = cm_utf8_is_valid(s) != 0; + bool valid = cm_utf8_is_valid(s.data()) != 0; if (!valid) { report_valid(false, s); printf("expected valid, reported as invalid\n"); @@ -151,9 +142,9 @@ static bool is_valid(const char* s) return true; } -static bool is_invalid(const char* s) +static bool is_invalid(test_utf8_char s) { - bool valid = cm_utf8_is_valid(s) != 0; + bool valid = cm_utf8_is_valid(s.data()) != 0; if (valid) { report_invalid(false, s); printf("expected invalid, reported as valid\n"); @@ -167,7 +158,7 @@ static bool is_invalid(const char* s) int testUTF8(int /*unused*/, char* /*unused*/[]) { int result = 0; - for (test_utf8_entry const* e = good_entry; e->n; ++e) { + for (test_utf8_entry const* e = good_entry; !e->str.empty(); ++e) { if (!decode_good(*e)) { result = 1; } @@ -175,7 +166,7 @@ int testUTF8(int /*unused*/, char* /*unused*/[]) result = 1; } } - for (test_utf8_char const* c = bad_chars; (*c)[0]; ++c) { + for (test_utf8_char* c = bad_chars; !(*c).empty(); ++c) { if (!decode_bad(*c)) { result = 1; } |