summaryrefslogtreecommitdiffstats
path: root/Utilities/cmlibarchive/libarchive/archive_string.c
diff options
context:
space:
mode:
Diffstat (limited to 'Utilities/cmlibarchive/libarchive/archive_string.c')
-rw-r--r--Utilities/cmlibarchive/libarchive/archive_string.c91
1 files changed, 37 insertions, 54 deletions
diff --git a/Utilities/cmlibarchive/libarchive/archive_string.c b/Utilities/cmlibarchive/libarchive/archive_string.c
index 87f9288..3d4be82 100644
--- a/Utilities/cmlibarchive/libarchive/archive_string.c
+++ b/Utilities/cmlibarchive/libarchive/archive_string.c
@@ -71,6 +71,10 @@ __FBSDID("$FreeBSD: head/lib/libarchive/archive_string.c 201095 2009-12-28 02:33
#define wmemcpy(a,b,i) (wchar_t *)memcpy((a), (b), (i) * sizeof(wchar_t))
#endif
+#if !defined(HAVE_WMEMMOVE) && !defined(wmemmove)
+#define wmemmove(a,b,i) (wchar_t *)memmove((a), (b), (i) * sizeof(wchar_t))
+#endif
+
struct archive_string_conv {
struct archive_string_conv *next;
char *from_charset;
@@ -127,12 +131,7 @@ struct archive_string_conv {
#define UNICODE_MAX 0x10FFFF
#define UNICODE_R_CHAR 0xFFFD /* Replacement character. */
/* Set U+FFFD(Replacement character) in UTF-8. */
-#define UTF8_SET_R_CHAR(outp) do { \
- (outp)[0] = 0xef; \
- (outp)[1] = 0xbf; \
- (outp)[2] = 0xbd; \
-} while (0)
-#define UTF8_R_CHAR_SIZE 3
+static const char utf8_replacement_char[] = {0xef, 0xbf, 0xbd};
static struct archive_string_conv *find_sconv_object(struct archive *,
const char *, const char *);
@@ -203,7 +202,7 @@ archive_string_append(struct archive_string *as, const char *p, size_t s)
{
if (archive_string_ensure(as, as->length + s + 1) == NULL)
return (NULL);
- memcpy(as->s + as->length, p, s);
+ memmove(as->s + as->length, p, s);
as->length += s;
as->s[as->length] = 0;
return (as);
@@ -214,7 +213,7 @@ archive_wstring_append(struct archive_wstring *as, const wchar_t *p, size_t s)
{
if (archive_wstring_ensure(as, as->length + s + 1) == NULL)
return (NULL);
- wmemcpy(as->s + as->length, p, s);
+ wmemmove(as->s + as->length, p, s);
as->length += s;
as->s[as->length] = 0;
return (as);
@@ -2037,7 +2036,7 @@ iconv_strncat_in_locale(struct archive_string *as, const void *_p,
if (sc->flag & (SCONV_TO_UTF8 | SCONV_TO_UTF16)) {
size_t rbytes;
if (sc->flag & SCONV_TO_UTF8)
- rbytes = UTF8_R_CHAR_SIZE;
+ rbytes = sizeof(utf8_replacement_char);
else
rbytes = 2;
@@ -2053,7 +2052,7 @@ iconv_strncat_in_locale(struct archive_string *as, const void *_p,
- as->length - to_size;
}
if (sc->flag & SCONV_TO_UTF8)
- UTF8_SET_R_CHAR(outp);
+ memcpy(outp, utf8_replacement_char, sizeof(utf8_replacement_char));
else if (sc->flag & SCONV_TO_UTF16BE)
archive_be16enc(outp, UNICODE_R_CHAR);
else
@@ -2202,9 +2201,7 @@ best_effort_strncat_in_locale(struct archive_string *as, const void *_p,
size_t length, struct archive_string_conv *sc)
{
size_t remaining;
- char *otp;
const uint8_t *itp;
- size_t avail;
int return_value = 0; /* success */
/*
@@ -2223,46 +2220,25 @@ best_effort_strncat_in_locale(struct archive_string *as, const void *_p,
* byte sequence 0xEF 0xBD 0xBD, which are code point U+FFFD,
* a Replacement Character in Unicode.
*/
- if (archive_string_ensure(as, as->length + length + 1) == NULL)
- return (-1);
remaining = length;
itp = (const uint8_t *)_p;
- otp = as->s + as->length;
- avail = as->buffer_length - as->length -1;
while (*itp && remaining > 0) {
- if (*itp > 127 && (sc->flag & SCONV_TO_UTF8)) {
- if (avail < UTF8_R_CHAR_SIZE) {
- as->length = otp - as->s;
- if (NULL == archive_string_ensure(as,
- as->buffer_length + remaining +
- UTF8_R_CHAR_SIZE))
- return (-1);
- otp = as->s + as->length;
- avail = as->buffer_length - as->length -1;
+ if (*itp > 127) {
+ // Non-ASCII: Substitute with suitable replacement
+ if (sc->flag & SCONV_TO_UTF8) {
+ if (archive_string_append(as, utf8_replacement_char, sizeof(utf8_replacement_char)) == NULL) {
+ __archive_errx(1, "Out of memory");
+ }
+ } else {
+ archive_strappend_char(as, '?');
}
- /*
- * When coping a string in UTF-8, unknown character
- * should be U+FFFD (replacement character).
- */
- UTF8_SET_R_CHAR(otp);
- otp += UTF8_R_CHAR_SIZE;
- avail -= UTF8_R_CHAR_SIZE;
- itp++;
- remaining--;
- return_value = -1;
- } else if (*itp > 127) {
- *otp++ = '?';
- itp++;
- remaining--;
return_value = -1;
} else {
- *otp++ = (char)*itp++;
- remaining--;
+ archive_strappend_char(as, *itp);
}
+ ++itp;
}
- as->length = otp - as->s;
- as->s[as->length] = '\0';
return (return_value);
}
@@ -2488,6 +2464,9 @@ unicode_to_utf8(char *p, size_t remaining, uint32_t uc)
{
char *_p = p;
+ /* Invalid Unicode char maps to Replacement character */
+ if (uc > UNICODE_MAX)
+ uc = UNICODE_R_CHAR;
/* Translate code point to UTF8 */
if (uc <= 0x7f) {
if (remaining == 0)
@@ -2504,22 +2483,13 @@ unicode_to_utf8(char *p, size_t remaining, uint32_t uc)
*p++ = 0xe0 | ((uc >> 12) & 0x0f);
*p++ = 0x80 | ((uc >> 6) & 0x3f);
*p++ = 0x80 | (uc & 0x3f);
- } else if (uc <= UNICODE_MAX) {
+ } else {
if (remaining < 4)
return (0);
*p++ = 0xf0 | ((uc >> 18) & 0x07);
*p++ = 0x80 | ((uc >> 12) & 0x3f);
*p++ = 0x80 | ((uc >> 6) & 0x3f);
*p++ = 0x80 | (uc & 0x3f);
- } else {
- /*
- * Undescribed code point should be U+FFFD
- * (replacement character).
- */
- if (remaining < UTF8_R_CHAR_SIZE)
- return (0);
- UTF8_SET_R_CHAR(p);
- p += UTF8_R_CHAR_SIZE;
}
return (p - _p);
}
@@ -3887,7 +3857,7 @@ archive_mstring_get_utf8(struct archive *a, struct archive_mstring *aes,
sc = archive_string_conversion_to_charset(a, "UTF-8", 1);
if (sc == NULL)
return (-1);/* Couldn't allocate memory for sc. */
- r = archive_strncpy_l(&(aes->aes_mbs), aes->aes_mbs.s,
+ r = archive_strncpy_l(&(aes->aes_utf8), aes->aes_mbs.s,
aes->aes_mbs.length, sc);
if (a == NULL)
free_sconv_object(sc);
@@ -4062,6 +4032,19 @@ archive_mstring_copy_wcs(struct archive_mstring *aes, const wchar_t *wcs)
}
int
+archive_mstring_copy_utf8(struct archive_mstring *aes, const char *utf8)
+{
+ if (utf8 == NULL) {
+ aes->aes_set = 0;
+ }
+ aes->aes_set = AES_SET_UTF8;
+ archive_string_empty(&(aes->aes_mbs));
+ archive_string_empty(&(aes->aes_wcs));
+ archive_strncpy(&(aes->aes_utf8), utf8, strlen(utf8));
+ return (int)strlen(utf8);
+}
+
+int
archive_mstring_copy_wcs_len(struct archive_mstring *aes, const wchar_t *wcs,
size_t len)
{