summaryrefslogtreecommitdiffstats
path: root/libarchive/archive_read_support_format_lha.c
diff options
context:
space:
mode:
Diffstat (limited to 'libarchive/archive_read_support_format_lha.c')
-rw-r--r--libarchive/archive_read_support_format_lha.c206
1 files changed, 149 insertions, 57 deletions
diff --git a/libarchive/archive_read_support_format_lha.c b/libarchive/archive_read_support_format_lha.c
index 95c99bb..bff0f01 100644
--- a/libarchive/archive_read_support_format_lha.c
+++ b/libarchive/archive_read_support_format_lha.c
@@ -175,7 +175,9 @@ struct lha {
struct archive_string gname;
uint16_t header_crc;
uint16_t crc;
- struct archive_string_conv *sconv;
+ /* dirname and filename could be in different codepages */
+ struct archive_string_conv *sconv_dir;
+ struct archive_string_conv *sconv_fname;
struct archive_string_conv *opt_sconv;
struct archive_string dirname;
@@ -232,8 +234,8 @@ static time_t lha_dos_time(const unsigned char *);
static time_t lha_win_time(uint64_t, long *);
static unsigned char lha_calcsum(unsigned char, const void *,
int, size_t);
-static int lha_parse_linkname(struct archive_string *,
- struct archive_string *);
+static int lha_parse_linkname(struct archive_wstring *,
+ struct archive_wstring *);
static int lha_read_data_none(struct archive_read *, const void **,
size_t *, int64_t *);
static int lha_read_data_lzh(struct archive_read *, const void **,
@@ -473,13 +475,15 @@ static int
archive_read_format_lha_read_header(struct archive_read *a,
struct archive_entry *entry)
{
- struct archive_string linkname;
- struct archive_string pathname;
+ struct archive_wstring linkname;
+ struct archive_wstring pathname;
struct lha *lha;
const unsigned char *p;
const char *signature;
int err;
-
+ struct archive_mstring conv_buffer;
+ const wchar_t *conv_buffer_p;
+
lha_crc16_init();
a->archive.archive_format = ARCHIVE_FORMAT_LHA;
@@ -561,10 +565,13 @@ archive_read_format_lha_read_header(struct archive_read *a,
archive_string_empty(&lha->dirname);
archive_string_empty(&lha->filename);
lha->dos_attr = 0;
- if (lha->opt_sconv != NULL)
- lha->sconv = lha->opt_sconv;
- else
- lha->sconv = NULL;
+ if (lha->opt_sconv != NULL) {
+ lha->sconv_dir = lha->opt_sconv;
+ lha->sconv_fname = lha->opt_sconv;
+ } else {
+ lha->sconv_dir = NULL;
+ lha->sconv_fname = NULL;
+ }
switch (p[H_LEVEL_OFFSET]) {
case 0:
@@ -594,12 +601,54 @@ archive_read_format_lha_read_header(struct archive_read *a,
return (truncated_error(a));
/*
- * Make a pathname from a dirname and a filename.
- */
- archive_string_concat(&lha->dirname, &lha->filename);
+ * Make a pathname from a dirname and a filename, after converting to Unicode.
+ * This is because codepages might differ between dirname and filename.
+ */
archive_string_init(&pathname);
archive_string_init(&linkname);
- archive_string_copy(&pathname, &lha->dirname);
+ archive_string_init(&conv_buffer.aes_mbs);
+ archive_string_init(&conv_buffer.aes_mbs_in_locale);
+ archive_string_init(&conv_buffer.aes_utf8);
+ archive_string_init(&conv_buffer.aes_wcs);
+ if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->dirname.s, lha->dirname.length, lha->sconv_dir)) {
+ archive_set_error(&a->archive,
+ ARCHIVE_ERRNO_FILE_FORMAT,
+ "Pathname cannot be converted "
+ "from %s to Unicode.",
+ archive_string_conversion_charset_name(lha->sconv_dir));
+ err = ARCHIVE_FATAL;
+ } else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p))
+ err = ARCHIVE_FATAL;
+ if (err == ARCHIVE_FATAL) {
+ archive_mstring_clean(&conv_buffer);
+ archive_wstring_free(&pathname);
+ archive_wstring_free(&linkname);
+ return (err);
+ }
+ archive_wstring_copy(&pathname, &conv_buffer.aes_wcs);
+
+ archive_string_empty(&conv_buffer.aes_mbs);
+ archive_string_empty(&conv_buffer.aes_mbs_in_locale);
+ archive_string_empty(&conv_buffer.aes_utf8);
+ archive_wstring_empty(&conv_buffer.aes_wcs);
+ if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->filename.s, lha->filename.length, lha->sconv_fname)) {
+ archive_set_error(&a->archive,
+ ARCHIVE_ERRNO_FILE_FORMAT,
+ "Pathname cannot be converted "
+ "from %s to Unicode.",
+ archive_string_conversion_charset_name(lha->sconv_fname));
+ err = ARCHIVE_FATAL;
+ }
+ else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p))
+ err = ARCHIVE_FATAL;
+ if (err == ARCHIVE_FATAL) {
+ archive_mstring_clean(&conv_buffer);
+ archive_wstring_free(&pathname);
+ archive_wstring_free(&linkname);
+ return (err);
+ }
+ archive_wstring_concat(&pathname, &conv_buffer.aes_wcs);
+ archive_mstring_clean(&conv_buffer);
if ((lha->mode & AE_IFMT) == AE_IFLNK) {
/*
@@ -610,8 +659,8 @@ archive_read_format_lha_read_header(struct archive_read *a,
archive_set_error(&a->archive,
ARCHIVE_ERRNO_FILE_FORMAT,
"Unknown symlink-name");
- archive_string_free(&pathname);
- archive_string_free(&linkname);
+ archive_wstring_free(&pathname);
+ archive_wstring_free(&linkname);
return (ARCHIVE_FAILED);
}
} else {
@@ -629,39 +678,13 @@ archive_read_format_lha_read_header(struct archive_read *a,
/*
* Set basic file parameters.
*/
- if (archive_entry_copy_pathname_l(entry, pathname.s,
- pathname.length, lha->sconv) != 0) {
- if (errno == ENOMEM) {
- archive_set_error(&a->archive, ENOMEM,
- "Can't allocate memory for Pathname");
- return (ARCHIVE_FATAL);
- }
- archive_set_error(&a->archive,
- ARCHIVE_ERRNO_FILE_FORMAT,
- "Pathname cannot be converted "
- "from %s to current locale.",
- archive_string_conversion_charset_name(lha->sconv));
- err = ARCHIVE_WARN;
- }
- archive_string_free(&pathname);
+ archive_entry_copy_pathname_w(entry, pathname.s);
+ archive_wstring_free(&pathname);
if (archive_strlen(&linkname) > 0) {
- if (archive_entry_copy_symlink_l(entry, linkname.s,
- linkname.length, lha->sconv) != 0) {
- if (errno == ENOMEM) {
- archive_set_error(&a->archive, ENOMEM,
- "Can't allocate memory for Linkname");
- return (ARCHIVE_FATAL);
- }
- archive_set_error(&a->archive,
- ARCHIVE_ERRNO_FILE_FORMAT,
- "Linkname cannot be converted "
- "from %s to current locale.",
- archive_string_conversion_charset_name(lha->sconv));
- err = ARCHIVE_WARN;
- }
+ archive_entry_copy_symlink_w(entry, linkname.s);
} else
archive_entry_set_symlink(entry, NULL);
- archive_string_free(&linkname);
+ archive_wstring_free(&linkname);
/*
* When a header level is 0, there is a possibility that
* a pathname and a symlink has '\' character, a directory
@@ -1208,6 +1231,27 @@ lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
archive_strncpy(&lha->filename,
(const char *)extdheader, datasize);
break;
+ case EXT_UTF16_FILENAME:
+ if (datasize == 0) {
+ /* maybe directory header */
+ archive_string_empty(&lha->filename);
+ break;
+ } else if (datasize & 1) {
+ /* UTF-16 characters take always 2 or 4 bytes */
+ goto invalid;
+ }
+ if (extdheader[0] == '\0')
+ goto invalid;
+ archive_string_empty(&lha->filename);
+ archive_array_append(&lha->filename,
+ (const char *)extdheader, datasize);
+ /* Setup a string conversion for a filename. */
+ lha->sconv_fname =
+ archive_string_conversion_from_charset(&a->archive,
+ "UTF-16LE", 1);
+ if (lha->sconv_fname == NULL)
+ return (ARCHIVE_FATAL);
+ break;
case EXT_DIRECTORY:
if (datasize == 0 || extdheader[0] == '\0')
/* no directory name data. exit this case. */
@@ -1228,6 +1272,50 @@ lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
/* invalid directory data */
goto invalid;
break;
+ case EXT_UTF16_DIRECTORY:
+ /* UTF-16 characters take always 2 or 4 bytes */
+ if (datasize == 0 || (datasize & 1) ||
+ extdheader[0] == '\0') {
+ /* no directory name data. exit this case. */
+ goto invalid;
+ }
+
+ archive_string_empty(&lha->dirname);
+ archive_array_append(&lha->dirname,
+ (const char *)extdheader, datasize);
+ lha->sconv_dir =
+ archive_string_conversion_from_charset(&a->archive,
+ "UTF-16LE", 1);
+ if (lha->sconv_dir == NULL)
+ return (ARCHIVE_FATAL);
+ else {
+ /*
+ * Convert directory delimiter from 0xFFFF
+ * to '/' for local system.
+ */
+ uint16_t dirSep;
+ uint16_t d = 1;
+ if (archive_be16dec(&d) == 1)
+ dirSep = 0x2F00;
+ else
+ dirSep = 0x002F;
+
+ /* UTF-16LE character */
+ uint16_t *utf16name =
+ (uint16_t *)lha->dirname.s;
+ for (i = 0; i < lha->dirname.length / 2; i++) {
+ if (utf16name[i] == 0xFFFF) {
+ utf16name[i] = dirSep;
+ }
+ }
+ /* Is last character directory separator? */
+ if (utf16name[lha->dirname.length / 2 - 1] !=
+ dirSep) {
+ /* invalid directory data */
+ goto invalid;
+ }
+ }
+ break;
case EXT_DOS_ATTR:
if (datasize == 2)
lha->dos_attr = (unsigned char)
@@ -1276,11 +1364,16 @@ lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
charset = cp.s;
break;
}
- lha->sconv =
+ lha->sconv_dir =
+ archive_string_conversion_from_charset(
+ &(a->archive), charset, 1);
+ lha->sconv_fname =
archive_string_conversion_from_charset(
&(a->archive), charset, 1);
archive_string_free(&cp);
- if (lha->sconv == NULL)
+ if (lha->sconv_dir == NULL)
+ return (ARCHIVE_FATAL);
+ if (lha->sconv_fname == NULL)
return (ARCHIVE_FATAL);
}
break;
@@ -1336,8 +1429,7 @@ lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
}
break;
case EXT_TIMEZONE: /* Not supported */
- case EXT_UTF16_FILENAME: /* Not supported */
- case EXT_UTF16_DIRECTORY: /* Not supported */
+ break;
default:
break;
}
@@ -1600,19 +1692,19 @@ archive_read_format_lha_cleanup(struct archive_read *a)
* then a archived pathname is 'xxx/bbb|aaa/bb/cc'
*/
static int
-lha_parse_linkname(struct archive_string *linkname,
- struct archive_string *pathname)
+lha_parse_linkname(struct archive_wstring *linkname,
+ struct archive_wstring *pathname)
{
- char * linkptr;
+ wchar_t * linkptr;
size_t symlen;
- linkptr = strchr(pathname->s, '|');
+ linkptr = wcschr(pathname->s, L'|');
if (linkptr != NULL) {
- symlen = strlen(linkptr + 1);
- archive_strncpy(linkname, linkptr+1, symlen);
+ symlen = wcslen(linkptr + 1);
+ archive_wstrncpy(linkname, linkptr+1, symlen);
*linkptr = 0;
- pathname->length = strlen(pathname->s);
+ pathname->length = wcslen(pathname->s);
return (1);
}