diff options
Diffstat (limited to 'Utilities/cmlibarchive/libarchive/archive_string.c')
-rw-r--r-- | Utilities/cmlibarchive/libarchive/archive_string.c | 91 |
1 files changed, 37 insertions, 54 deletions
diff --git a/Utilities/cmlibarchive/libarchive/archive_string.c b/Utilities/cmlibarchive/libarchive/archive_string.c index 87f9288..3d4be82 100644 --- a/Utilities/cmlibarchive/libarchive/archive_string.c +++ b/Utilities/cmlibarchive/libarchive/archive_string.c @@ -71,6 +71,10 @@ __FBSDID("$FreeBSD: head/lib/libarchive/archive_string.c 201095 2009-12-28 02:33 #define wmemcpy(a,b,i) (wchar_t *)memcpy((a), (b), (i) * sizeof(wchar_t)) #endif +#if !defined(HAVE_WMEMMOVE) && !defined(wmemmove) +#define wmemmove(a,b,i) (wchar_t *)memmove((a), (b), (i) * sizeof(wchar_t)) +#endif + struct archive_string_conv { struct archive_string_conv *next; char *from_charset; @@ -127,12 +131,7 @@ struct archive_string_conv { #define UNICODE_MAX 0x10FFFF #define UNICODE_R_CHAR 0xFFFD /* Replacement character. */ /* Set U+FFFD(Replacement character) in UTF-8. */ -#define UTF8_SET_R_CHAR(outp) do { \ - (outp)[0] = 0xef; \ - (outp)[1] = 0xbf; \ - (outp)[2] = 0xbd; \ -} while (0) -#define UTF8_R_CHAR_SIZE 3 +static const char utf8_replacement_char[] = {0xef, 0xbf, 0xbd}; static struct archive_string_conv *find_sconv_object(struct archive *, const char *, const char *); @@ -203,7 +202,7 @@ archive_string_append(struct archive_string *as, const char *p, size_t s) { if (archive_string_ensure(as, as->length + s + 1) == NULL) return (NULL); - memcpy(as->s + as->length, p, s); + memmove(as->s + as->length, p, s); as->length += s; as->s[as->length] = 0; return (as); @@ -214,7 +213,7 @@ archive_wstring_append(struct archive_wstring *as, const wchar_t *p, size_t s) { if (archive_wstring_ensure(as, as->length + s + 1) == NULL) return (NULL); - wmemcpy(as->s + as->length, p, s); + wmemmove(as->s + as->length, p, s); as->length += s; as->s[as->length] = 0; return (as); @@ -2037,7 +2036,7 @@ iconv_strncat_in_locale(struct archive_string *as, const void *_p, if (sc->flag & (SCONV_TO_UTF8 | SCONV_TO_UTF16)) { size_t rbytes; if (sc->flag & SCONV_TO_UTF8) - rbytes = UTF8_R_CHAR_SIZE; + rbytes = sizeof(utf8_replacement_char); else rbytes = 2; @@ -2053,7 +2052,7 @@ iconv_strncat_in_locale(struct archive_string *as, const void *_p, - as->length - to_size; } if (sc->flag & SCONV_TO_UTF8) - UTF8_SET_R_CHAR(outp); + memcpy(outp, utf8_replacement_char, sizeof(utf8_replacement_char)); else if (sc->flag & SCONV_TO_UTF16BE) archive_be16enc(outp, UNICODE_R_CHAR); else @@ -2202,9 +2201,7 @@ best_effort_strncat_in_locale(struct archive_string *as, const void *_p, size_t length, struct archive_string_conv *sc) { size_t remaining; - char *otp; const uint8_t *itp; - size_t avail; int return_value = 0; /* success */ /* @@ -2223,46 +2220,25 @@ best_effort_strncat_in_locale(struct archive_string *as, const void *_p, * byte sequence 0xEF 0xBD 0xBD, which are code point U+FFFD, * a Replacement Character in Unicode. */ - if (archive_string_ensure(as, as->length + length + 1) == NULL) - return (-1); remaining = length; itp = (const uint8_t *)_p; - otp = as->s + as->length; - avail = as->buffer_length - as->length -1; while (*itp && remaining > 0) { - if (*itp > 127 && (sc->flag & SCONV_TO_UTF8)) { - if (avail < UTF8_R_CHAR_SIZE) { - as->length = otp - as->s; - if (NULL == archive_string_ensure(as, - as->buffer_length + remaining + - UTF8_R_CHAR_SIZE)) - return (-1); - otp = as->s + as->length; - avail = as->buffer_length - as->length -1; + if (*itp > 127) { + // Non-ASCII: Substitute with suitable replacement + if (sc->flag & SCONV_TO_UTF8) { + if (archive_string_append(as, utf8_replacement_char, sizeof(utf8_replacement_char)) == NULL) { + __archive_errx(1, "Out of memory"); + } + } else { + archive_strappend_char(as, '?'); } - /* - * When coping a string in UTF-8, unknown character - * should be U+FFFD (replacement character). - */ - UTF8_SET_R_CHAR(otp); - otp += UTF8_R_CHAR_SIZE; - avail -= UTF8_R_CHAR_SIZE; - itp++; - remaining--; - return_value = -1; - } else if (*itp > 127) { - *otp++ = '?'; - itp++; - remaining--; return_value = -1; } else { - *otp++ = (char)*itp++; - remaining--; + archive_strappend_char(as, *itp); } + ++itp; } - as->length = otp - as->s; - as->s[as->length] = '\0'; return (return_value); } @@ -2488,6 +2464,9 @@ unicode_to_utf8(char *p, size_t remaining, uint32_t uc) { char *_p = p; + /* Invalid Unicode char maps to Replacement character */ + if (uc > UNICODE_MAX) + uc = UNICODE_R_CHAR; /* Translate code point to UTF8 */ if (uc <= 0x7f) { if (remaining == 0) @@ -2504,22 +2483,13 @@ unicode_to_utf8(char *p, size_t remaining, uint32_t uc) *p++ = 0xe0 | ((uc >> 12) & 0x0f); *p++ = 0x80 | ((uc >> 6) & 0x3f); *p++ = 0x80 | (uc & 0x3f); - } else if (uc <= UNICODE_MAX) { + } else { if (remaining < 4) return (0); *p++ = 0xf0 | ((uc >> 18) & 0x07); *p++ = 0x80 | ((uc >> 12) & 0x3f); *p++ = 0x80 | ((uc >> 6) & 0x3f); *p++ = 0x80 | (uc & 0x3f); - } else { - /* - * Undescribed code point should be U+FFFD - * (replacement character). - */ - if (remaining < UTF8_R_CHAR_SIZE) - return (0); - UTF8_SET_R_CHAR(p); - p += UTF8_R_CHAR_SIZE; } return (p - _p); } @@ -3887,7 +3857,7 @@ archive_mstring_get_utf8(struct archive *a, struct archive_mstring *aes, sc = archive_string_conversion_to_charset(a, "UTF-8", 1); if (sc == NULL) return (-1);/* Couldn't allocate memory for sc. */ - r = archive_strncpy_l(&(aes->aes_mbs), aes->aes_mbs.s, + r = archive_strncpy_l(&(aes->aes_utf8), aes->aes_mbs.s, aes->aes_mbs.length, sc); if (a == NULL) free_sconv_object(sc); @@ -4062,6 +4032,19 @@ archive_mstring_copy_wcs(struct archive_mstring *aes, const wchar_t *wcs) } int +archive_mstring_copy_utf8(struct archive_mstring *aes, const char *utf8) +{ + if (utf8 == NULL) { + aes->aes_set = 0; + } + aes->aes_set = AES_SET_UTF8; + archive_string_empty(&(aes->aes_mbs)); + archive_string_empty(&(aes->aes_wcs)); + archive_strncpy(&(aes->aes_utf8), utf8, strlen(utf8)); + return (int)strlen(utf8); +} + +int archive_mstring_copy_wcs_len(struct archive_mstring *aes, const wchar_t *wcs, size_t len) { |