blob: c4c8aadb4ec150c8487e32d761589aa87eadf210 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
|
/******************************************************************************
*
* Copyright (C) 1997-2021 by Dimitri van Heesch.
*
* Permission to use, copy, modify, and distribute this software and its
* documentation under the terms of the GNU General Public License is hereby
* granted. No representations are made about the suitability of this software
* for any purpose. It is provided "as is" without express or implied warranty.
* See the GNU General Public License for more details.
*
* Documents produced by Doxygen are derivative works derived from the
* input used in their production; they are not affected by this license.
*
*/
#ifndef UTF8_H
#define UTF8_H
#include <cstdint>
#include <string>
#include <iostream>
/** @file
* @brief Various UTF8 related helper functions.
*
* See https://en.wikipedia.org/wiki/UTF-8 for details on UTF8 encoding.
*/
/** Converts the input string into a lower case version, also taking into account
* non-ASCII characters that has a lower case variant.
*/
std::string convertUTF8ToLower(const std::string &input);
/** Converts the input string into a upper case version, also taking into account
* non-ASCII characters that has a upper case variant.
*/
std::string convertUTF8ToUpper(const std::string &input);
/** Returns the UTF8 character found at byte position pos in the input string.
* The resulting string can be a multi byte sequence.
*/
std::string getUTF8CharAt(const std::string &input,size_t pos);
/** Returns the 32bit Unicode value matching character at byte position pos in
* the UTF8 encoded input.
*/
uint32_t getUnicodeForUTF8CharAt(const std::string &input,size_t pos);
/** Returns the number of bytes making up a single UTF8 character given the first byte
* in the sequence.
*/
int getUTF8CharNumBytes(char firstByte);
/** Writes the UTF8 character pointed to by s to stream t and returns a pointer
* to the next character.
*/
const char *writeUTF8Char(std::ostream &t,const char *s);
/** Returns true iff the last character in input is a multibyte character. */
bool lastUTF8CharIsMultibyte(const std::string &input);
/** Returns true iff the input string at byte position pos holds an upper case character. */
bool isUTF8CharUpperCase(const std::string &input,size_t pos);
/** Check if the first character pointed at by input is a non-breakable whitespace character.
* Returns the byte size of the character if there is match or 0 if not.
*/
int isUTF8NonBreakableSpace(const char *input);
#endif
|