42 files changed, 2235 insertions, 342 deletions
diff --git a/Utilities/cmliblzma/liblzma/common/alone_decoder.c b/Utilities/cmliblzma/liblzma/common/alone_decoder.c
index c25112e..77d0a9b 100644
--- a/Utilities/cmliblzma/liblzma/common/alone_decoder.c
+++ b/Utilities/cmliblzma/liblzma/common/alone_decoder.c
@@ -15,7 +15,7 @@
 #include "lz_decoder.h"
 
 
-struct lzma_coder_s {
+typedef struct {
 	lzma_next_coder next;
 
 	enum {
@@ -46,17 +46,19 @@ struct lzma_coder_s {
 	/// Options decoded from the header needed to initialize
 	/// the LZMA decoder
 	lzma_options_lzma options;
-};
+} lzma_alone_coder;
 
 
 static lzma_ret
-alone_decode(lzma_coder *coder,
-		lzma_allocator *allocator lzma_attribute((__unused__)),
+alone_decode(void *coder_ptr,
+		const lzma_allocator *allocator lzma_attribute((__unused__)),
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size, uint8_t *restrict out,
 		size_t *restrict out_pos, size_t out_size,
 		lzma_action action)
 {
+	lzma_alone_coder *coder = coder_ptr;
+
 	while (*out_pos < out_size
 			&& (coder->sequence == SEQ_CODE || *in_pos < in_size))
 	switch (coder->sequence) {
@@ -166,8 +168,9 @@ alone_decode(lzma_coder *coder,
 
 
 static void
-alone_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
+alone_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
 {
+	lzma_alone_coder *coder = coder_ptr;
 	lzma_next_end(&coder->next, allocator);
 	lzma_free(coder, allocator);
 	return;
@@ -175,9 +178,11 @@ alone_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
 
 
 static lzma_ret
-alone_decoder_memconfig(lzma_coder *coder, uint64_t *memusage,
+alone_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
 		uint64_t *old_memlimit, uint64_t new_memlimit)
 {
+	lzma_alone_coder *coder = coder_ptr;
+
 	*memusage = coder->memusage;
 	*old_memlimit = coder->memlimit;
 
@@ -193,34 +198,34 @@ alone_decoder_memconfig(lzma_coder *coder, uint64_t *memusage,
 
 
 extern lzma_ret
-lzma_alone_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_alone_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		uint64_t memlimit, bool picky)
 {
 	lzma_next_coder_init(&lzma_alone_decoder_init, next, allocator);
 
-	if (memlimit == 0)
-		return LZMA_PROG_ERROR;
+	lzma_alone_coder *coder = next->coder;
 
-	if (next->coder == NULL) {
-		next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
-		if (next->coder == NULL)
+	if (coder == NULL) {
+		coder = lzma_alloc(sizeof(lzma_alone_coder), allocator);
+		if (coder == NULL)
 			return LZMA_MEM_ERROR;
 
+		next->coder = coder;
 		next->code = &alone_decode;
 		next->end = &alone_decoder_end;
 		next->memconfig = &alone_decoder_memconfig;
-		next->coder->next = LZMA_NEXT_CODER_INIT;
+		coder->next = LZMA_NEXT_CODER_INIT;
 	}
 
-	next->coder->sequence = SEQ_PROPERTIES;
-	next->coder->picky = picky;
-	next->coder->pos = 0;
-	next->coder->options.dict_size = 0;
-	next->coder->options.preset_dict = NULL;
-	next->coder->options.preset_dict_size = 0;
-	next->coder->uncompressed_size = 0;
-	next->coder->memlimit = memlimit;
-	next->coder->memusage = LZMA_MEMUSAGE_BASE;
+	coder->sequence = SEQ_PROPERTIES;
+	coder->picky = picky;
+	coder->pos = 0;
+	coder->options.dict_size = 0;
+	coder->options.preset_dict = NULL;
+	coder->options.preset_dict_size = 0;
+	coder->uncompressed_size = 0;
+	coder->memlimit = my_max(1, memlimit);
+	coder->memusage = LZMA_MEMUSAGE_BASE;
 
 	return LZMA_OK;
 }
diff --git a/Utilities/cmliblzma/liblzma/common/alone_decoder.h b/Utilities/cmliblzma/liblzma/common/alone_decoder.h
index f666fc3..dfa031a 100644
--- a/Utilities/cmliblzma/liblzma/common/alone_decoder.h
+++ b/Utilities/cmliblzma/liblzma/common/alone_decoder.h
@@ -17,7 +17,7 @@
 
 
 extern lzma_ret lzma_alone_decoder_init(
-		lzma_next_coder *next, lzma_allocator *allocator,
+		lzma_next_coder *next, const lzma_allocator *allocator,
 		uint64_t memlimit, bool picky);
 
 #endif
diff --git a/Utilities/cmliblzma/liblzma/common/alone_encoder.c b/Utilities/cmliblzma/liblzma/common/alone_encoder.c
index eb1697e..4853cfd 100644
--- a/Utilities/cmliblzma/liblzma/common/alone_encoder.c
+++ b/Utilities/cmliblzma/liblzma/common/alone_encoder.c
@@ -17,7 +17,7 @@
 #define ALONE_HEADER_SIZE (1 + 4 + 8)
 
 
-struct lzma_coder_s {
+typedef struct {
 	lzma_next_coder next;
 
 	enum {
@@ -27,17 +27,19 @@ struct lzma_coder_s {
 
 	size_t header_pos;
 	uint8_t header[ALONE_HEADER_SIZE];
-};
+} lzma_alone_coder;
 
 
 static lzma_ret
-alone_encode(lzma_coder *coder,
-		lzma_allocator *allocator lzma_attribute((__unused__)),
+alone_encode(void *coder_ptr,
+		const lzma_allocator *allocator lzma_attribute((__unused__)),
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size, uint8_t *restrict out,
 		size_t *restrict out_pos, size_t out_size,
 		lzma_action action)
 {
+	lzma_alone_coder *coder = coder_ptr;
+
 	while (*out_pos < out_size)
 	switch (coder->sequence) {
 	case SEQ_HEADER:
@@ -65,8 +67,9 @@ alone_encode(lzma_coder *coder,
 
 
 static void
-alone_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
+alone_encoder_end(void *coder_ptr, const lzma_allocator *allocator)
 {
+	lzma_alone_coder *coder = coder_ptr;
 	lzma_next_end(&coder->next, allocator);
 	lzma_free(coder, allocator);
 	return;
@@ -75,28 +78,31 @@ alone_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
 
 // At least for now, this is not used by any internal function.
 static lzma_ret
-alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+alone_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_options_lzma *options)
 {
 	lzma_next_coder_init(&alone_encoder_init, next, allocator);
 
-	if (next->coder == NULL) {
-		next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
-		if (next->coder == NULL)
+	lzma_alone_coder *coder = next->coder;
+
+	if (coder == NULL) {
+		coder = lzma_alloc(sizeof(lzma_alone_coder), allocator);
+		if (coder == NULL)
 			return LZMA_MEM_ERROR;
 
+		next->coder = coder;
 		next->code = &alone_encode;
 		next->end = &alone_encoder_end;
-		next->coder->next = LZMA_NEXT_CODER_INIT;
+		coder->next = LZMA_NEXT_CODER_INIT;
 	}
 
 	// Basic initializations
-	next->coder->sequence = SEQ_HEADER;
-	next->coder->header_pos = 0;
+	coder->sequence = SEQ_HEADER;
+	coder->header_pos = 0;
 
 	// Encode the header:
 	// - Properties (1 byte)
-	if (lzma_lzma_lclppb_encode(options, next->coder->header))
+	if (lzma_lzma_lclppb_encode(options, coder->header))
 		return LZMA_OPTIONS_ERROR;
 
 	// - Dictionary size (4 bytes)
@@ -116,10 +122,10 @@ alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
 	if (d != UINT32_MAX)
 		++d;
 
-	unaligned_write32le(next->coder->header + 1, d);
+	unaligned_write32le(coder->header + 1, d);
 
 	// - Uncompressed size (always unknown and using EOPM)
-	memset(next->coder->header + 1 + 4, 0xFF, 8);
+	memset(coder->header + 1 + 4, 0xFF, 8);
 
 	// Initialize the LZMA encoder.
 	const lzma_filter_info filters[2] = {
@@ -131,13 +137,13 @@ alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
 		}
 	};
 
-	return lzma_next_filter_init(&next->coder->next, allocator, filters);
+	return lzma_next_filter_init(&coder->next, allocator, filters);
 }
 
 
 /*
 extern lzma_ret
-lzma_alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_alone_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_options_alone *options)
 {
 	lzma_next_coder_init(&alone_encoder_init, next, allocator, options);
diff --git a/Utilities/cmliblzma/liblzma/common/auto_decoder.c b/Utilities/cmliblzma/liblzma/common/auto_decoder.c
index 35c895f..6895c7c 100644
--- a/Utilities/cmliblzma/liblzma/common/auto_decoder.c
+++ b/Utilities/cmliblzma/liblzma/common/auto_decoder.c
@@ -14,7 +14,7 @@
 #include "alone_decoder.h"
 
 
-struct lzma_coder_s {
+typedef struct {
 	/// Stream decoder or LZMA_Alone decoder
 	lzma_next_coder next;
 
@@ -26,15 +26,17 @@ struct lzma_coder_s {
 		SEQ_CODE,
 		SEQ_FINISH,
 	} sequence;
-};
+} lzma_auto_coder;
 
 
 static lzma_ret
-auto_decode(lzma_coder *coder, lzma_allocator *allocator,
+auto_decode(void *coder_ptr, const lzma_allocator *allocator,
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size, uint8_t *restrict out,
 		size_t *restrict out_pos, size_t out_size, lzma_action action)
 {
+	lzma_auto_coder *coder = coder_ptr;
+
 	switch (coder->sequence) {
 	case SEQ_INIT:
 		if (*in_pos >= in_size)
@@ -100,8 +102,9 @@ auto_decode(lzma_coder *coder, lzma_allocator *allocator,
 
 
 static void
-auto_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
+auto_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
 {
+	lzma_auto_coder *coder = coder_ptr;
 	lzma_next_end(&coder->next, allocator);
 	lzma_free(coder, allocator);
 	return;
@@ -109,8 +112,10 @@ auto_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
 
 
 static lzma_check
-auto_decoder_get_check(const lzma_coder *coder)
+auto_decoder_get_check(const void *coder_ptr)
 {
+	const lzma_auto_coder *coder = coder_ptr;
+
 	// It is LZMA_Alone if get_check is NULL.
 	return coder->next.get_check == NULL ? LZMA_CHECK_NONE
 			: coder->next.get_check(coder->next.coder);
@@ -118,9 +123,11 @@ auto_decoder_get_check(const lzma_coder *coder)
 
 
 static lzma_ret
-auto_decoder_memconfig(lzma_coder *coder, uint64_t *memusage,
+auto_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
 		uint64_t *old_memlimit, uint64_t new_memlimit)
 {
+	lzma_auto_coder *coder = coder_ptr;
+
 	lzma_ret ret;
 
 	if (coder->next.memconfig != NULL) {
@@ -132,7 +139,10 @@ auto_decoder_memconfig(lzma_coder *coder, uint64_t *memusage,
 		// the current memory usage.
 		*memusage = LZMA_MEMUSAGE_BASE;
 		*old_memlimit = coder->memlimit;
+
 		ret = LZMA_OK;
+		if (new_memlimit != 0 && new_memlimit < *memusage)
+			ret = LZMA_MEMLIMIT_ERROR;
 	}
 
 	if (ret == LZMA_OK && new_memlimit != 0)
@@ -143,32 +153,31 @@ auto_decoder_memconfig(lzma_coder *coder, uint64_t *memusage,
 
 
 static lzma_ret
-auto_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+auto_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		uint64_t memlimit, uint32_t flags)
 {
 	lzma_next_coder_init(&auto_decoder_init, next, allocator);
 
-	if (memlimit == 0)
-		return LZMA_PROG_ERROR;
-
 	if (flags & ~LZMA_SUPPORTED_FLAGS)
 		return LZMA_OPTIONS_ERROR;
 
-	if (next->coder == NULL) {
-		next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
-		if (next->coder == NULL)
+	lzma_auto_coder *coder = next->coder;
+	if (coder == NULL) {
+		coder = lzma_alloc(sizeof(lzma_auto_coder), allocator);
+		if (coder == NULL)
 			return LZMA_MEM_ERROR;
 
+		next->coder = coder;
 		next->code = &auto_decode;
 		next->end = &auto_decoder_end;
 		next->get_check = &auto_decoder_get_check;
 		next->memconfig = &auto_decoder_memconfig;
-		next->coder->next = LZMA_NEXT_CODER_INIT;
+		coder->next = LZMA_NEXT_CODER_INIT;
 	}
 
-	next->coder->memlimit = memlimit;
-	next->coder->flags = flags;
-	next->coder->sequence = SEQ_INIT;
+	coder->memlimit = my_max(1, memlimit);
+	coder->flags = flags;
+	coder->sequence = SEQ_INIT;
 
 	return LZMA_OK;
 }
diff --git a/Utilities/cmliblzma/liblzma/common/block_buffer_decoder.c b/Utilities/cmliblzma/liblzma/common/block_buffer_decoder.c
index ff27a11..b0ded90 100644
--- a/Utilities/cmliblzma/liblzma/common/block_buffer_decoder.c
+++ b/Utilities/cmliblzma/liblzma/common/block_buffer_decoder.c
@@ -14,7 +14,7 @@
 
 
 extern LZMA_API(lzma_ret)
-lzma_block_buffer_decode(lzma_block *block, lzma_allocator *allocator,
+lzma_block_buffer_decode(lzma_block *block, const lzma_allocator *allocator,
 		const uint8_t *in, size_t *in_pos, size_t in_size,
 		uint8_t *out, size_t *out_pos, size_t out_size)
 {
diff --git a/Utilities/cmliblzma/liblzma/common/block_buffer_encoder.c b/Utilities/cmliblzma/liblzma/common/block_buffer_encoder.c
index 519c6a6..39e263a 100644
--- a/Utilities/cmliblzma/liblzma/common/block_buffer_encoder.c
+++ b/Utilities/cmliblzma/liblzma/common/block_buffer_encoder.c
@@ -10,6 +10,7 @@
 //
 ///////////////////////////////////////////////////////////////////////////////
 
+#include "block_buffer_encoder.h"
 #include "block_encoder.h"
 #include "filter_encoder.h"
 #include "lzma2_encoder.h"
@@ -28,8 +29,8 @@
 		+ LZMA_CHECK_SIZE_MAX + 3) & ~3)
 
 
-static lzma_vli
-lzma2_bound(lzma_vli uncompressed_size)
+static uint64_t
+lzma2_bound(uint64_t uncompressed_size)
 {
 	// Prevent integer overflow in overhead calculation.
 	if (uncompressed_size > COMPRESSED_SIZE_MAX)
@@ -39,7 +40,7 @@ lzma2_bound(lzma_vli uncompressed_size)
 	// uncompressed_size up to the next multiple of LZMA2_CHUNK_MAX,
 	// multiply by the size of per-chunk header, and add one byte for
 	// the end marker.
-	const lzma_vli overhead = ((uncompressed_size + LZMA2_CHUNK_MAX - 1)
+	const uint64_t overhead = ((uncompressed_size + LZMA2_CHUNK_MAX - 1)
 				/ LZMA2_CHUNK_MAX)
 			* LZMA2_HEADER_UNCOMPRESSED + 1;
 
@@ -51,30 +52,36 @@ lzma2_bound(lzma_vli uncompressed_size)
 }
 
 
-extern LZMA_API(size_t)
-lzma_block_buffer_bound(size_t uncompressed_size)
+extern uint64_t
+lzma_block_buffer_bound64(uint64_t uncompressed_size)
 {
-	// For now, if the data doesn't compress, we always use uncompressed
-	// chunks of LZMA2. In future we may use Subblock filter too, but
-	// but for simplicity we probably will still use the same bound
-	// calculation even though Subblock filter would have slightly less
-	// overhead.
-	lzma_vli lzma2_size = lzma2_bound(uncompressed_size);
+	// If the data doesn't compress, we always use uncompressed
+	// LZMA2 chunks.
+	uint64_t lzma2_size = lzma2_bound(uncompressed_size);
 	if (lzma2_size == 0)
 		return 0;
 
 	// Take Block Padding into account.
-	lzma2_size = (lzma2_size + 3) & ~LZMA_VLI_C(3);
+	lzma2_size = (lzma2_size + 3) & ~UINT64_C(3);
 
-#if SIZE_MAX < LZMA_VLI_MAX
-	// Catch the possible integer overflow on 32-bit systems. There's no
-	// overflow on 64-bit systems, because lzma2_bound() already takes
+	// No risk of integer overflow because lzma2_bound() already takes
 	// into account the size of the headers in the Block.
-	if (SIZE_MAX - HEADERS_BOUND < lzma2_size)
+	return HEADERS_BOUND + lzma2_size;
+}
+
+
+extern LZMA_API(size_t)
+lzma_block_buffer_bound(size_t uncompressed_size)
+{
+	uint64_t ret = lzma_block_buffer_bound64(uncompressed_size);
+
+#if SIZE_MAX < UINT64_MAX
+	// Catch the possible integer overflow on 32-bit systems.
+	if (ret > SIZE_MAX)
 		return 0;
 #endif
 
-	return HEADERS_BOUND + lzma2_size;
+	return ret;
 }
 
 
@@ -82,9 +89,6 @@ static lzma_ret
 block_encode_uncompressed(lzma_block *block, const uint8_t *in, size_t in_size,
 		uint8_t *out, size_t *out_pos, size_t out_size)
 {
-	// TODO: Figure out if the last filter is LZMA2 or Subblock and use
-	// that filter to encode the uncompressed chunks.
-
 	// Use LZMA2 uncompressed chunks. We wouldn't need a dictionary at
 	// all, but LZMA2 always requires a dictionary, so use the minimum
 	// value to minimize memory usage of the decoder.
@@ -160,16 +164,11 @@ block_encode_uncompressed(lzma_block *block, const uint8_t *in, size_t in_size,
 
 
 static lzma_ret
-block_encode_normal(lzma_block *block, lzma_allocator *allocator,
+block_encode_normal(lzma_block *block, const lzma_allocator *allocator,
 		const uint8_t *in, size_t in_size,
 		uint8_t *out, size_t *out_pos, size_t out_size)
 {
 	// Find out the size of the Block Header.
-	block->compressed_size = lzma2_bound(in_size);
-	if (block->compressed_size == 0)
-		return LZMA_DATA_ERROR;
-
-	block->uncompressed_size = in_size;
 	return_if_error(lzma_block_header_size(block));
 
 	// Reserve space for the Block Header and skip it for now.
@@ -221,10 +220,11 @@ block_encode_normal(lzma_block *block, lzma_allocator *allocator,
 }
 
 
-extern LZMA_API(lzma_ret)
-lzma_block_buffer_encode(lzma_block *block, lzma_allocator *allocator,
+static lzma_ret
+block_buffer_encode(lzma_block *block, const lzma_allocator *allocator,
 		const uint8_t *in, size_t in_size,
-		uint8_t *out, size_t *out_pos, size_t out_size)
+		uint8_t *out, size_t *out_pos, size_t out_size,
+		bool try_to_compress)
 {
 	// Validate the arguments.
 	if (block == NULL || (in == NULL && in_size != 0) || out == NULL
@@ -233,11 +233,11 @@ lzma_block_buffer_encode(lzma_block *block, lzma_allocator *allocator,
 
 	// The contents of the structure may depend on the version so
 	// check the version before validating the contents of *block.
-	if (block->version != 0)
+	if (block->version > 1)
 		return LZMA_OPTIONS_ERROR;
 
 	if ((unsigned int)(block->check) > LZMA_CHECK_ID_MAX
-			|| block->filters == NULL)
+			|| (try_to_compress && block->filters == NULL))
 		return LZMA_PROG_ERROR;
 
 	if (!lzma_check_is_supported(block->check))
@@ -258,9 +258,19 @@ lzma_block_buffer_encode(lzma_block *block, lzma_allocator *allocator,
 
 	out_size -= check_size;
 
+	// Initialize block->uncompressed_size and calculate the worst-case
+	// value for block->compressed_size.
+	block->uncompressed_size = in_size;
+	block->compressed_size = lzma2_bound(in_size);
+	if (block->compressed_size == 0)
+		return LZMA_DATA_ERROR;
+
 	// Do the actual compression.
-	const lzma_ret ret = block_encode_normal(block, allocator,
-			in, in_size, out, out_pos, out_size);
+	lzma_ret ret = LZMA_BUF_ERROR;
+	if (try_to_compress)
+		ret = block_encode_normal(block, allocator,
+				in, in_size, out, out_pos, out_size);
+
 	if (ret != LZMA_OK) {
 		// If the error was something else than output buffer
 		// becoming full, return the error now.
@@ -303,3 +313,25 @@ lzma_block_buffer_encode(lzma_block *block, lzma_allocator *allocator,
 
 	return LZMA_OK;
 }
+
+
+extern LZMA_API(lzma_ret)
+lzma_block_buffer_encode(lzma_block *block, const lzma_allocator *allocator,
+		const uint8_t *in, size_t in_size,
+		uint8_t *out, size_t *out_pos, size_t out_size)
+{
+	return block_buffer_encode(block, allocator,
+			in, in_size, out, out_pos, out_size, true);
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_block_uncomp_encode(lzma_block *block,
+		const uint8_t *in, size_t in_size,
+		uint8_t *out, size_t *out_pos, size_t out_size)
+{
+	// It won't allocate any memory from heap so no need
+	// for lzma_allocator.
+	return block_buffer_encode(block, NULL,
+			in, in_size, out, out_pos, out_size, false);
+}
diff --git a/Utilities/cmliblzma/liblzma/common/block_buffer_encoder.h b/Utilities/cmliblzma/liblzma/common/block_buffer_encoder.h
new file mode 100644
index 0000000..653207f
--- /dev/null
+++ b/Utilities/cmliblzma/liblzma/common/block_buffer_encoder.h
@@ -0,0 +1,24 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       block_buffer_encoder.h
+/// \brief      Single-call .xz Block encoder
+//
+//  Author:     Lasse Collin
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_BLOCK_BUFFER_ENCODER_H
+#define LZMA_BLOCK_BUFFER_ENCODER_H
+
+#include "common.h"
+
+
+/// uint64_t version of lzma_block_buffer_bound(). It is used by
+/// stream_encoder_mt.c. Probably the original lzma_block_buffer_bound()
+/// should have been 64-bit, but fixing it would break the ABI.
+extern uint64_t lzma_block_buffer_bound64(uint64_t uncompressed_size);
+
+#endif
diff --git a/Utilities/cmliblzma/liblzma/common/block_decoder.c b/Utilities/cmliblzma/liblzma/common/block_decoder.c
index a3ce6f4..075bd27 100644
--- a/Utilities/cmliblzma/liblzma/common/block_decoder.c
+++ b/Utilities/cmliblzma/liblzma/common/block_decoder.c
@@ -15,7 +15,7 @@
 #include "check.h"
 
 
-struct lzma_coder_s {
+typedef struct {
 	enum {
 		SEQ_CODE,
 		SEQ_PADDING,
@@ -45,7 +45,10 @@ struct lzma_coder_s {
 
 	/// Check of the uncompressed data
 	lzma_check_state check;
-};
+
+	/// True if the integrity check won't be calculated and verified.
+	bool ignore_check;
+} lzma_block_coder;
 
 
 static inline bool
@@ -71,11 +74,13 @@ is_size_valid(lzma_vli size, lzma_vli reference)
 
 
 static lzma_ret
-block_decode(lzma_coder *coder, lzma_allocator *allocator,
+block_decode(void *coder_ptr, const lzma_allocator *allocator,
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size, uint8_t *restrict out,
 		size_t *restrict out_pos, size_t out_size, lzma_action action)
 {
+	lzma_block_coder *coder = coder_ptr;
+
 	switch (coder->sequence) {
 	case SEQ_CODE: {
 		const size_t in_start = *in_pos;
@@ -97,8 +102,9 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator,
 					coder->block->uncompressed_size))
 			return LZMA_DATA_ERROR;
 
-		lzma_check_update(&coder->check, coder->block->check,
-				out + out_start, out_used);
+		if (!coder->ignore_check)
+			lzma_check_update(&coder->check, coder->block->check,
+					out + out_start, out_used);
 
 		if (ret != LZMA_STREAM_END)
 			return ret;
@@ -140,7 +146,9 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator,
 		if (coder->block->check == LZMA_CHECK_NONE)
 			return LZMA_STREAM_END;
 
-		lzma_check_finish(&coder->check, coder->block->check);
+		if (!coder->ignore_check)
+			lzma_check_finish(&coder->check, coder->block->check);
+
 		coder->sequence = SEQ_CHECK;
 
 	// Fall through
@@ -155,7 +163,8 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator,
 		// Validate the Check only if we support it.
 		// coder->check.buffer may be uninitialized
 		// when the Check ID is not supported.
-		if (lzma_check_is_supported(coder->block->check)
+		if (!coder->ignore_check
+				&& lzma_check_is_supported(coder->block->check)
 				&& memcmp(coder->block->raw_check,
 					coder->check.buffer.u8,
 					check_size) != 0)
@@ -170,8 +179,9 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator,
 
 
 static void
-block_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
+block_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
 {
+	lzma_block_coder *coder = coder_ptr;
 	lzma_next_end(&coder->next, allocator);
 	lzma_free(coder, allocator);
 	return;
@@ -179,7 +189,7 @@ block_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
 
 
 extern lzma_ret
-lzma_block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_block_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		lzma_block *block)
 {
 	lzma_next_coder_init(&lzma_block_decoder_init, next, allocator);
@@ -191,27 +201,29 @@ lzma_block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
 			|| !lzma_vli_is_valid(block->uncompressed_size))
 		return LZMA_PROG_ERROR;
 
-	// Allocate and initialize *next->coder if needed.
-	if (next->coder == NULL) {
-		next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
-		if (next->coder == NULL)
+	// Allocate *next->coder if needed.
+	lzma_block_coder *coder = next->coder;
+	if (coder == NULL) {
+		coder = lzma_alloc(sizeof(lzma_block_coder), allocator);
+		if (coder == NULL)
 			return LZMA_MEM_ERROR;
 
+		next->coder = coder;
 		next->code = &block_decode;
 		next->end = &block_decoder_end;
-		next->coder->next = LZMA_NEXT_CODER_INIT;
+		coder->next = LZMA_NEXT_CODER_INIT;
 	}
 
 	// Basic initializations
-	next->coder->sequence = SEQ_CODE;
-	next->coder->block = block;
-	next->coder->compressed_size = 0;
-	next->coder->uncompressed_size = 0;
+	coder->sequence = SEQ_CODE;
+	coder->block = block;
+	coder->compressed_size = 0;
+	coder->uncompressed_size = 0;
 
 	// If Compressed Size is not known, we calculate the maximum allowed
 	// value so that encoded size of the Block (including Block Padding)
 	// is still a valid VLI and a multiple of four.
-	next->coder->compressed_limit
+	coder->compressed_limit
 			= block->compressed_size == LZMA_VLI_UNKNOWN
 				? (LZMA_VLI_MAX & ~LZMA_VLI_C(3))
 					- block->header_size
@@ -221,11 +233,14 @@ lzma_block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
 	// Initialize the check. It's caller's problem if the Check ID is not
 	// supported, and the Block decoder cannot verify the Check field.
 	// Caller can test lzma_check_is_supported(block->check).
-	next->coder->check_pos = 0;
-	lzma_check_init(&next->coder->check, block->check);
+	coder->check_pos = 0;
+	lzma_check_init(&coder->check, block->check);
+
+	coder->ignore_check = block->version >= 1
+			? block->ignore_check : false;
 
 	// Initialize the filter chain.
-	return lzma_raw_decoder_init(&next->coder->next, allocator,
+	return lzma_raw_decoder_init(&coder->next, allocator,
 			block->filters);
 }
 
diff --git a/Utilities/cmliblzma/liblzma/common/block_decoder.h b/Utilities/cmliblzma/liblzma/common/block_decoder.h
index 7da9df6..718c5ce 100644
--- a/Utilities/cmliblzma/liblzma/common/block_decoder.h
+++ b/Utilities/cmliblzma/liblzma/common/block_decoder.h
@@ -17,6 +17,6 @@
 
 
 extern lzma_ret lzma_block_decoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, lzma_block *block);
+		const lzma_allocator *allocator, lzma_block *block);
 
 #endif
diff --git a/Utilities/cmliblzma/liblzma/common/block_encoder.c b/Utilities/cmliblzma/liblzma/common/block_encoder.c
index 1eeb502..168846a 100644
--- a/Utilities/cmliblzma/liblzma/common/block_encoder.c
+++ b/Utilities/cmliblzma/liblzma/common/block_encoder.c
@@ -15,7 +15,7 @@
 #include "check.h"
 
 
-struct lzma_coder_s {
+typedef struct {
 	/// The filters in the chain; initialized with lzma_raw_decoder_init().
 	lzma_next_coder next;
 
@@ -41,15 +41,17 @@ struct lzma_coder_s {
 
 	/// Check of the uncompressed data
 	lzma_check_state check;
-};
+} lzma_block_coder;
 
 
 static lzma_ret
-block_encode(lzma_coder *coder, lzma_allocator *allocator,
+block_encode(void *coder_ptr, const lzma_allocator *allocator,
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size, uint8_t *restrict out,
 		size_t *restrict out_pos, size_t out_size, lzma_action action)
 {
+	lzma_block_coder *coder = coder_ptr;
+
 	// Check that our amount of input stays in proper limits.
 	if (LZMA_VLI_MAX - coder->uncompressed_size < in_size - *in_pos)
 		return LZMA_DATA_ERROR;
@@ -134,8 +136,9 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator,
 
 
 static void
-block_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
+block_encoder_end(void *coder_ptr, const lzma_allocator *allocator)
 {
+	lzma_block_coder *coder = coder_ptr;
 	lzma_next_end(&coder->next, allocator);
 	lzma_free(coder, allocator);
 	return;
@@ -143,10 +146,12 @@ block_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
 
 
 static lzma_ret
-block_encoder_update(lzma_coder *coder, lzma_allocator *allocator,
+block_encoder_update(void *coder_ptr, const lzma_allocator *allocator,
 		const lzma_filter *filters lzma_attribute((__unused__)),
 		const lzma_filter *reversed_filters)
 {
+	lzma_block_coder *coder = coder_ptr;
+
 	if (coder->sequence != SEQ_CODE)
 		return LZMA_PROG_ERROR;
 
@@ -156,7 +161,7 @@ block_encoder_update(lzma_coder *coder, lzma_allocator *allocator,
 
 
 extern lzma_ret
-lzma_block_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_block_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		lzma_block *block)
 {
 	lzma_next_coder_init(&lzma_block_encoder_init, next, allocator);
@@ -166,7 +171,7 @@ lzma_block_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
 
 	// The contents of the structure may depend on the version so
 	// check the version first.
-	if (block->version != 0)
+	if (block->version > 1)
 		return LZMA_OPTIONS_ERROR;
 
 	// If the Check ID is not supported, we cannot calculate the check and
@@ -178,30 +183,31 @@ lzma_block_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
 		return LZMA_UNSUPPORTED_CHECK;
 
 	// Allocate and initialize *next->coder if needed.
-	if (next->coder == NULL) {
-		next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
-		if (next->coder == NULL)
+	lzma_block_coder *coder = next->coder;
+	if (coder == NULL) {
+		coder = lzma_alloc(sizeof(lzma_block_coder), allocator);
+		if (coder == NULL)
 			return LZMA_MEM_ERROR;
 
+		next->coder = coder;
 		next->code = &block_encode;
 		next->end = &block_encoder_end;
 		next->update = &block_encoder_update;
-		next->coder->next = LZMA_NEXT_CODER_INIT;
+		coder->next = LZMA_NEXT_CODER_INIT;
 	}
 
 	// Basic initializations
-	next->coder->sequence = SEQ_CODE;
-	next->coder->block = block;
-	next->coder->compressed_size = 0;
-	next->coder->uncompressed_size = 0;
-	next->coder->pos = 0;
+	coder->sequence = SEQ_CODE;
+	coder->block = block;
+	coder->compressed_size = 0;
+	coder->uncompressed_size = 0;
+	coder->pos = 0;
 
 	// Initialize the check
-	lzma_check_init(&next->coder->check, block->check);
+	lzma_check_init(&coder->check, block->check);
 
 	// Initialize the requested filters.
-	return lzma_raw_encoder_init(&next->coder->next, allocator,
-			block->filters);
+	return lzma_raw_encoder_init(&coder->next, allocator, block->filters);
 }
 
 
diff --git a/Utilities/cmliblzma/liblzma/common/block_encoder.h b/Utilities/cmliblzma/liblzma/common/block_encoder.h
index b9eff0b..bd97c18 100644
--- a/Utilities/cmliblzma/liblzma/common/block_encoder.h
+++ b/Utilities/cmliblzma/liblzma/common/block_encoder.h
@@ -42,6 +42,6 @@
 
 
 extern lzma_ret lzma_block_encoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, lzma_block *block);
+		const lzma_allocator *allocator, lzma_block *block);
 
 #endif
diff --git a/Utilities/cmliblzma/liblzma/common/block_header_decoder.c b/Utilities/cmliblzma/liblzma/common/block_header_decoder.c
index 2c9573e..1dd982f 100644
--- a/Utilities/cmliblzma/liblzma/common/block_header_decoder.c
+++ b/Utilities/cmliblzma/liblzma/common/block_header_decoder.c
@@ -15,7 +15,7 @@
 
 
 static void
-free_properties(lzma_block *block, lzma_allocator *allocator)
+free_properties(lzma_block *block, const lzma_allocator *allocator)
 {
 	// Free allocated filter options. The last array member is not
 	// touched after the initialization in the beginning of
@@ -32,7 +32,7 @@ free_properties(lzma_block *block, lzma_allocator *allocator)
 
 extern LZMA_API(lzma_ret)
 lzma_block_header_decode(lzma_block *block,
-		lzma_allocator *allocator, const uint8_t *in)
+		const lzma_allocator *allocator, const uint8_t *in)
 {
 	// NOTE: We consider the header to be corrupt not only when the
 	// CRC32 doesn't match, but also when variable-length integers
@@ -46,8 +46,16 @@ lzma_block_header_decode(lzma_block *block,
 		block->filters[i].options = NULL;
 	}
 
-	// Always zero for now.
-	block->version = 0;
+	// Versions 0 and 1 are supported. If a newer version was specified,
+	// we need to downgrade it.
+	if (block->version > 1)
+		block->version = 1;
+
+	// This isn't a Block Header option, but since the decompressor will
+	// read it if version >= 1, it's better to initialize it here than
+	// to expect the caller to do it since in almost all cases this
+	// should be false.
+	block->ignore_check = false;
 
 	// Validate Block Header Size and Check type. The caller must have
 	// already set these, so it is a programming error if this test fails.
diff --git a/Utilities/cmliblzma/liblzma/common/block_header_encoder.c b/Utilities/cmliblzma/liblzma/common/block_header_encoder.c
index 707dd0c..5c5f542 100644
--- a/Utilities/cmliblzma/liblzma/common/block_header_encoder.c
+++ b/Utilities/cmliblzma/liblzma/common/block_header_encoder.c
@@ -17,7 +17,7 @@
 extern LZMA_API(lzma_ret)
 lzma_block_header_size(lzma_block *block)
 {
-	if (block->version != 0)
+	if (block->version > 1)
 		return LZMA_OPTIONS_ERROR;
 
 	// Block Header Size + Block Flags + CRC32.
diff --git a/Utilities/cmliblzma/liblzma/common/block_util.c b/Utilities/cmliblzma/liblzma/common/block_util.c
index 62c9345..00c7fe8 100644
--- a/Utilities/cmliblzma/liblzma/common/block_util.c
+++ b/Utilities/cmliblzma/liblzma/common/block_util.c
@@ -51,7 +51,7 @@ lzma_block_unpadded_size(const lzma_block *block)
 	// NOTE: This function is used for validation too, so it is
 	// essential that these checks are always done even if
 	// Compressed Size is unknown.
-	if (block == NULL || block->version != 0
+	if (block == NULL || block->version > 1
 			|| block->header_size < LZMA_BLOCK_HEADER_SIZE_MIN
 			|| block->header_size > LZMA_BLOCK_HEADER_SIZE_MAX
 			|| (block->header_size & 3)
diff --git a/Utilities/cmliblzma/liblzma/common/common.c b/Utilities/cmliblzma/liblzma/common/common.c
index b9e3860..57e3f8e 100644
--- a/Utilities/cmliblzma/liblzma/common/common.c
+++ b/Utilities/cmliblzma/liblzma/common/common.c
@@ -36,7 +36,7 @@ lzma_version_string(void)
 ///////////////////////
 
 extern void * lzma_attribute((__malloc__)) lzma_attr_alloc_size(1)
-lzma_alloc(size_t size, lzma_allocator *allocator)
+lzma_alloc(size_t size, const lzma_allocator *allocator)
 {
 	// Some malloc() variants return NULL if called with size == 0.
 	if (size == 0)
@@ -53,8 +53,29 @@ lzma_alloc(size_t size, lzma_allocator *allocator)
 }
 
 
+extern void * lzma_attribute((__malloc__)) lzma_attr_alloc_size(1)
+lzma_alloc_zero(size_t size, const lzma_allocator *allocator)
+{
+	// Some calloc() variants return NULL if called with size == 0.
+	if (size == 0)
+		size = 1;
+
+	void *ptr;
+
+	if (allocator != NULL && allocator->alloc != NULL) {
+		ptr = allocator->alloc(allocator->opaque, 1, size);
+		if (ptr != NULL)
+			memzero(ptr, size);
+	} else {
+		ptr = calloc(1, size);
+	}
+
+	return ptr;
+}
+
+
 extern void
-lzma_free(void *ptr, lzma_allocator *allocator)
+lzma_free(void *ptr, const lzma_allocator *allocator)
 {
 	if (allocator != NULL && allocator->free != NULL)
 		allocator->free(allocator->opaque, ptr);
@@ -88,7 +109,7 @@ lzma_bufcpy(const uint8_t *restrict in, size_t *restrict in_pos,
 
 
 extern lzma_ret
-lzma_next_filter_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_next_filter_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter_info *filters)
 {
 	lzma_next_coder_init(filters[0].init, next, allocator);
@@ -99,7 +120,7 @@ lzma_next_filter_init(lzma_next_coder *next, lzma_allocator *allocator,
 
 
 extern lzma_ret
-lzma_next_filter_update(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_next_filter_update(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter *reversed_filters)
 {
 	// Check that the application isn't trying to change the Filter ID.
@@ -117,7 +138,7 @@ lzma_next_filter_update(lzma_next_coder *next, lzma_allocator *allocator,
 
 
 extern void
-lzma_next_end(lzma_next_coder *next, lzma_allocator *allocator)
+lzma_next_end(lzma_next_coder *next, const lzma_allocator *allocator)
 {
 	if (next->init != (uintptr_t)(NULL)) {
 		// To avoid tiny end functions that simply call
@@ -156,10 +177,8 @@ lzma_strm_init(lzma_stream *strm)
 		strm->internal->next = LZMA_NEXT_CODER_INIT;
 	}
 
-	strm->internal->supported_actions[LZMA_RUN] = false;
-	strm->internal->supported_actions[LZMA_SYNC_FLUSH] = false;
-	strm->internal->supported_actions[LZMA_FULL_FLUSH] = false;
-	strm->internal->supported_actions[LZMA_FINISH] = false;
+	memzero(strm->internal->supported_actions,
+			sizeof(strm->internal->supported_actions));
 	strm->internal->sequence = ISEQ_RUN;
 	strm->internal->allow_buf_error = false;
 
@@ -178,7 +197,7 @@ lzma_code(lzma_stream *strm, lzma_action action)
 			|| (strm->next_out == NULL && strm->avail_out != 0)
 			|| strm->internal == NULL
 			|| strm->internal->next.code == NULL
-			|| (unsigned int)(action) > LZMA_FINISH
+			|| (unsigned int)(action) > LZMA_ACTION_MAX
 			|| !strm->internal->supported_actions[action])
 		return LZMA_PROG_ERROR;
 
@@ -213,6 +232,10 @@ lzma_code(lzma_stream *strm, lzma_action action)
 		case LZMA_FINISH:
 			strm->internal->sequence = ISEQ_FINISH;
 			break;
+
+		case LZMA_FULL_BARRIER:
+			strm->internal->sequence = ISEQ_FULL_BARRIER;
+			break;
 		}
 
 		break;
@@ -240,6 +263,13 @@ lzma_code(lzma_stream *strm, lzma_action action)
 
 		break;
 
+	case ISEQ_FULL_BARRIER:
+		if (action != LZMA_FULL_BARRIER
+				|| strm->internal->avail_in != strm->avail_in)
+			return LZMA_PROG_ERROR;
+
+		break;
+
 	case ISEQ_END:
 		return LZMA_STREAM_END;
 
@@ -265,7 +295,9 @@ lzma_code(lzma_stream *strm, lzma_action action)
 
 	strm->internal->avail_in = strm->avail_in;
 
-	switch (ret) {
+	// Cast is needed to silence a warning about LZMA_TIMED_OUT, which
+	// isn't part of lzma_ret enumeration.
+	switch ((unsigned int)(ret)) {
 	case LZMA_OK:
 		// Don't return LZMA_BUF_ERROR when it happens the first time.
 		// This is to avoid returning LZMA_BUF_ERROR when avail_out
@@ -281,9 +313,16 @@ lzma_code(lzma_stream *strm, lzma_action action)
 		}
 		break;
 
+	case LZMA_TIMED_OUT:
+		strm->internal->allow_buf_error = false;
+		ret = LZMA_OK;
+		break;
+
 	case LZMA_STREAM_END:
 		if (strm->internal->sequence == ISEQ_SYNC_FLUSH
-				|| strm->internal->sequence == ISEQ_FULL_FLUSH)
+				|| strm->internal->sequence == ISEQ_FULL_FLUSH
+				|| strm->internal->sequence
+					== ISEQ_FULL_BARRIER)
 			strm->internal->sequence = ISEQ_RUN;
 		else
 			strm->internal->sequence = ISEQ_END;
@@ -323,6 +362,22 @@ lzma_end(lzma_stream *strm)
 }
 
 
+extern LZMA_API(void)
+lzma_get_progress(lzma_stream *strm,
+		uint64_t *progress_in, uint64_t *progress_out)
+{
+	if (strm->internal->next.get_progress != NULL) {
+		strm->internal->next.get_progress(strm->internal->next.coder,
+				progress_in, progress_out);
+	} else {
+		*progress_in = strm->total_in;
+		*progress_out = strm->total_out;
+	}
+
+	return;
+}
+
+
 extern LZMA_API(lzma_check)
 lzma_get_check(const lzma_stream *strm)
 {
@@ -380,8 +435,10 @@ lzma_memlimit_set(lzma_stream *strm, uint64_t new_memlimit)
 			|| strm->internal->next.memconfig == NULL)
 		return LZMA_PROG_ERROR;
 
-	if (new_memlimit != 0 && new_memlimit < LZMA_MEMUSAGE_BASE)
-		return LZMA_MEMLIMIT_ERROR;
+	// Zero is a special value that cannot be used as an actual limit.
+	// If 0 was specified, use 1 instead.
+	if (new_memlimit == 0)
+		new_memlimit = 1;
 
 	return strm->internal->next.memconfig(strm->internal->next.coder,
 			&memusage, &old_memlimit, new_memlimit);
diff --git a/Utilities/cmliblzma/liblzma/common/common.h b/Utilities/cmliblzma/liblzma/common/common.h
index 6d7412f..dde3ae0 100644
--- a/Utilities/cmliblzma/liblzma/common/common.h
+++ b/Utilities/cmliblzma/liblzma/common/common.h
@@ -48,6 +48,13 @@
 #define LZMA_BUFFER_SIZE 4096
 
 
+/// Maximum number of worker threads within one multithreaded component.
+/// The limit exists solely to make it simpler to prevent integer overflows
+/// when allocating structures etc. This should be big enough for now...
+/// the code won't scale anywhere close to this number anyway.
+#define LZMA_THREADS_MAX 16384
+
+
 /// Starting value for memory usage estimates. Instead of calculating size
 /// of _every_ structure and taking into account malloc() overhead etc., we
 /// add a base size to all memory usage estimates. It's not very accurate
@@ -65,12 +72,20 @@
 	( LZMA_TELL_NO_CHECK \
 	| LZMA_TELL_UNSUPPORTED_CHECK \
 	| LZMA_TELL_ANY_CHECK \
+	| LZMA_IGNORE_CHECK \
 	| LZMA_CONCATENATED )
 
 
-/// Type of encoder/decoder specific data; the actual structure is defined
-/// differently in different coders.
-typedef struct lzma_coder_s lzma_coder;
+/// Largest valid lzma_action value as unsigned integer.
+#define LZMA_ACTION_MAX ((unsigned int)(LZMA_FULL_BARRIER))
+
+
+/// Special return value (lzma_ret) to indicate that a timeout was reached
+/// and lzma_code() must not return LZMA_BUF_ERROR. This is converted to
+/// LZMA_OK in lzma_code(). This is not in the lzma_ret enumeration because
+/// there's no need to have it in the public API.
+#define LZMA_TIMED_OUT 32
+
 
 typedef struct lzma_next_coder_s lzma_next_coder;
 
@@ -79,7 +94,7 @@ typedef struct lzma_filter_info_s lzma_filter_info;
 
 /// Type of a function used to initialize a filter encoder or decoder
 typedef lzma_ret (*lzma_init_function)(
-		lzma_next_coder *next, lzma_allocator *allocator,
+		lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter_info *filters);
 
 /// Type of a function to do some kind of coding work (filters, Stream,
@@ -87,7 +102,7 @@ typedef lzma_ret (*lzma_init_function)(
 /// input and output buffers, but for simplicity they still use this same
 /// function prototype.
 typedef lzma_ret (*lzma_code_function)(
-		lzma_coder *coder, lzma_allocator *allocator,
+		void *coder, const lzma_allocator *allocator,
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size, uint8_t *restrict out,
 		size_t *restrict out_pos, size_t out_size,
@@ -95,7 +110,7 @@ typedef lzma_ret (*lzma_code_function)(
 
 /// Type of a function to free the memory allocated for the coder
 typedef void (*lzma_end_function)(
-		lzma_coder *coder, lzma_allocator *allocator);
+		void *coder, const lzma_allocator *allocator);
 
 
 /// Raw coder validates and converts an array of lzma_filter structures to
@@ -118,7 +133,7 @@ struct lzma_filter_info_s {
 /// Hold data and function pointers of the next filter in the chain.
 struct lzma_next_coder_s {
 	/// Pointer to coder-specific data
-	lzma_coder *coder;
+	void *coder;
 
 	/// Filter ID. This is LZMA_VLI_UNKNOWN when this structure doesn't
 	/// point to a filter coder.
@@ -138,18 +153,23 @@ struct lzma_next_coder_s {
 	/// lzma_next_coder.coder.
 	lzma_end_function end;
 
+	/// Pointer to a function to get progress information. If this is NULL,
+	/// lzma_stream.total_in and .total_out are used instead.
+	void (*get_progress)(void *coder,
+			uint64_t *progress_in, uint64_t *progress_out);
+
 	/// Pointer to function to return the type of the integrity check.
 	/// Most coders won't support this.
-	lzma_check (*get_check)(const lzma_coder *coder);
+	lzma_check (*get_check)(const void *coder);
 
 	/// Pointer to function to get and/or change the memory usage limit.
 	/// If new_memlimit == 0, the limit is not changed.
-	lzma_ret (*memconfig)(lzma_coder *coder, uint64_t *memusage,
+	lzma_ret (*memconfig)(void *coder, uint64_t *memusage,
 			uint64_t *old_memlimit, uint64_t new_memlimit);
 
 	/// Update the filter-specific options or the whole filter chain
 	/// in the encoder.
-	lzma_ret (*update)(lzma_coder *coder, lzma_allocator *allocator,
+	lzma_ret (*update)(void *coder, const lzma_allocator *allocator,
 			const lzma_filter *filters,
 			const lzma_filter *reversed_filters);
 };
@@ -163,6 +183,7 @@ struct lzma_next_coder_s {
 		.id = LZMA_VLI_UNKNOWN, \
 		.code = NULL, \
 		.end = NULL, \
+		.get_progress = NULL, \
 		.get_check = NULL, \
 		.memconfig = NULL, \
 		.update = NULL, \
@@ -184,6 +205,7 @@ struct lzma_internal_s {
 		ISEQ_SYNC_FLUSH,
 		ISEQ_FULL_FLUSH,
 		ISEQ_FINISH,
+		ISEQ_FULL_BARRIER,
 		ISEQ_END,
 		ISEQ_ERROR,
 	} sequence;
@@ -194,7 +216,7 @@ struct lzma_internal_s {
 	size_t avail_in;
 
 	/// Indicates which lzma_action values are allowed by next.code.
-	bool supported_actions[4];
+	bool supported_actions[LZMA_ACTION_MAX + 1];
 
 	/// If true, lzma_code will return LZMA_BUF_ERROR if no progress was
 	/// made (no input consumed and no output produced by next.code).
@@ -203,11 +225,17 @@ struct lzma_internal_s {
 
 
 /// Allocates memory
-extern void *lzma_alloc(size_t size, lzma_allocator *allocator)
+extern void *lzma_alloc(size_t size, const lzma_allocator *allocator)
 		lzma_attribute((__malloc__)) lzma_attr_alloc_size(1);
 
+/// Allocates memory and zeroes it (like calloc()). This can be faster
+/// than lzma_alloc() + memzero() while being backward compatible with
+/// custom allocators.
+extern void * lzma_attribute((__malloc__)) lzma_attr_alloc_size(1)
+		lzma_alloc_zero(size_t size, const lzma_allocator *allocator);
+
 /// Frees memory
-extern void lzma_free(void *ptr, lzma_allocator *allocator);
+extern void lzma_free(void *ptr, const lzma_allocator *allocator);
 
 
 /// Allocates strm->internal if it is NULL, and initializes *strm and
@@ -219,17 +247,19 @@ extern lzma_ret lzma_strm_init(lzma_stream *strm);
 /// than the filter being initialized now. This way the actual filter
 /// initialization functions don't need to use lzma_next_coder_init macro.
 extern lzma_ret lzma_next_filter_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_filter_info *filters);
+		const lzma_allocator *allocator,
+		const lzma_filter_info *filters);
 
 /// Update the next filter in the chain, if any. This checks that
 /// the application is not trying to change the Filter IDs.
 extern lzma_ret lzma_next_filter_update(
-		lzma_next_coder *next, lzma_allocator *allocator,
+		lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter *reversed_filters);
 
 /// Frees the memory allocated for next->coder either using next->end or,
 /// if next->end is NULL, using lzma_free.
-extern void lzma_next_end(lzma_next_coder *next, lzma_allocator *allocator);
+extern void lzma_next_end(lzma_next_coder *next,
+		const lzma_allocator *allocator);
 
 
 /// Copy as much data as possible from in[] to out[] and update *in_pos
diff --git a/Utilities/cmliblzma/liblzma/common/easy_buffer_encoder.c b/Utilities/cmliblzma/liblzma/common/easy_buffer_encoder.c
index c4be34c..48eb56f 100644
--- a/Utilities/cmliblzma/liblzma/common/easy_buffer_encoder.c
+++ b/Utilities/cmliblzma/liblzma/common/easy_buffer_encoder.c
@@ -15,8 +15,8 @@
 
 extern LZMA_API(lzma_ret)
 lzma_easy_buffer_encode(uint32_t preset, lzma_check check,
-		lzma_allocator *allocator, const uint8_t *in, size_t in_size,
-		uint8_t *out, size_t *out_pos, size_t out_size)
+		const lzma_allocator *allocator, const uint8_t *in,
+		size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size)
 {
 	lzma_options_easy opt_easy;
 	if (lzma_easy_preset(&opt_easy, preset))
diff --git a/Utilities/cmliblzma/liblzma/common/easy_encoder.c b/Utilities/cmliblzma/liblzma/common/easy_encoder.c
index d13ccd7..5cb492d 100644
--- a/Utilities/cmliblzma/liblzma/common/easy_encoder.c
+++ b/Utilities/cmliblzma/liblzma/common/easy_encoder.c
@@ -11,7 +11,6 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 #include "easy_preset.h"
-#include "stream_encoder.h"
 
 
 extern LZMA_API(lzma_ret)
diff --git a/Utilities/cmliblzma/liblzma/common/filter_buffer_decoder.c b/Utilities/cmliblzma/liblzma/common/filter_buffer_decoder.c
index 2d35ef8..6620986 100644
--- a/Utilities/cmliblzma/liblzma/common/filter_buffer_decoder.c
+++ b/Utilities/cmliblzma/liblzma/common/filter_buffer_decoder.c
@@ -14,7 +14,8 @@
 
 
 extern LZMA_API(lzma_ret)
-lzma_raw_buffer_decode(const lzma_filter *filters, lzma_allocator *allocator,
+lzma_raw_buffer_decode(
+		const lzma_filter *filters, const lzma_allocator *allocator,
 		const uint8_t *in, size_t *in_pos, size_t in_size,
 		uint8_t *out, size_t *out_pos, size_t out_size)
 {
diff --git a/Utilities/cmliblzma/liblzma/common/filter_buffer_encoder.c b/Utilities/cmliblzma/liblzma/common/filter_buffer_encoder.c
index 646e1b3..dda18e3 100644
--- a/Utilities/cmliblzma/liblzma/common/filter_buffer_encoder.c
+++ b/Utilities/cmliblzma/liblzma/common/filter_buffer_encoder.c
@@ -14,9 +14,10 @@
 
 
 extern LZMA_API(lzma_ret)
-lzma_raw_buffer_encode(const lzma_filter *filters, lzma_allocator *allocator,
-		const uint8_t *in, size_t in_size, uint8_t *out,
-		size_t *out_pos, size_t out_size)
+lzma_raw_buffer_encode(
+		const lzma_filter *filters, const lzma_allocator *allocator,
+		const uint8_t *in, size_t in_size,
+		uint8_t *out, size_t *out_pos, size_t out_size)
 {
 	// Validate what isn't validated later in filter_common.c.
 	if ((in == NULL && in_size != 0) || out == NULL
diff --git a/Utilities/cmliblzma/liblzma/common/filter_common.c b/Utilities/cmliblzma/liblzma/common/filter_common.c
index 7c95b05..9ad5d5d 100644
--- a/Utilities/cmliblzma/liblzma/common/filter_common.c
+++ b/Utilities/cmliblzma/liblzma/common/filter_common.c
@@ -123,7 +123,7 @@ static const struct {
 
 extern LZMA_API(lzma_ret)
 lzma_filters_copy(const lzma_filter *src, lzma_filter *dest,
-		lzma_allocator *allocator)
+		const lzma_allocator *allocator)
 {
 	if (src == NULL || dest == NULL)
 		return LZMA_PROG_ERROR;
@@ -239,7 +239,7 @@ validate_chain(const lzma_filter *filters, size_t *count)
 
 
 extern lzma_ret
-lzma_raw_coder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_raw_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter *options,
 		lzma_filter_find coder_find, bool is_encoder)
 {
diff --git a/Utilities/cmliblzma/liblzma/common/filter_common.h b/Utilities/cmliblzma/liblzma/common/filter_common.h
index cd61fc0..42a26a2 100644
--- a/Utilities/cmliblzma/liblzma/common/filter_common.h
+++ b/Utilities/cmliblzma/liblzma/common/filter_common.h
@@ -36,7 +36,7 @@ typedef const lzma_filter_coder *(*lzma_filter_find)(lzma_vli id);
 
 
 extern lzma_ret lzma_raw_coder_init(
-		lzma_next_coder *next, lzma_allocator *allocator,
+		lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter *filters,
 		lzma_filter_find coder_find, bool is_encoder);
 
diff --git a/Utilities/cmliblzma/liblzma/common/filter_decoder.c b/Utilities/cmliblzma/liblzma/common/filter_decoder.c
index 1ebbe2a..c75b0a8 100644
--- a/Utilities/cmliblzma/liblzma/common/filter_decoder.c
+++ b/Utilities/cmliblzma/liblzma/common/filter_decoder.c
@@ -35,7 +35,8 @@ typedef struct {
 	/// \return     - LZMA_OK: Properties decoded successfully.
 	///             - LZMA_OPTIONS_ERROR: Unsupported properties
 	///             - LZMA_MEM_ERROR: Memory allocation failed.
-	lzma_ret (*props_decode)(void **options, lzma_allocator *allocator,
+	lzma_ret (*props_decode)(
+			void **options, const lzma_allocator *allocator,
 			const uint8_t *props, size_t props_size);
 
 } lzma_filter_decoder;
@@ -136,7 +137,7 @@ lzma_filter_decoder_is_supported(lzma_vli id)
 
 
 extern lzma_ret
-lzma_raw_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_raw_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter *options)
 {
 	return lzma_raw_coder_init(next, allocator,
@@ -165,7 +166,7 @@ lzma_raw_decoder_memusage(const lzma_filter *filters)
 
 
 extern LZMA_API(lzma_ret)
-lzma_properties_decode(lzma_filter *filter, lzma_allocator *allocator,
+lzma_properties_decode(lzma_filter *filter, const lzma_allocator *allocator,
 		const uint8_t *props, size_t props_size)
 {
 	// Make it always NULL so that the caller can always safely free() it.
diff --git a/Utilities/cmliblzma/liblzma/common/filter_decoder.h b/Utilities/cmliblzma/liblzma/common/filter_decoder.h
index d5c68bd..a2e255f 100644
--- a/Utilities/cmliblzma/liblzma/common/filter_decoder.h
+++ b/Utilities/cmliblzma/liblzma/common/filter_decoder.h
@@ -17,7 +17,7 @@
 
 
 extern lzma_ret lzma_raw_decoder_init(
-		lzma_next_coder *next, lzma_allocator *allocator,
+		lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter *options);
 
 #endif
diff --git a/Utilities/cmliblzma/liblzma/common/filter_encoder.c b/Utilities/cmliblzma/liblzma/common/filter_encoder.c
index 635d812..c5d8f39 100644
--- a/Utilities/cmliblzma/liblzma/common/filter_encoder.c
+++ b/Utilities/cmliblzma/liblzma/common/filter_encoder.c
@@ -30,11 +30,11 @@ typedef struct {
 	/// invalid, UINT64_MAX is returned.
 	uint64_t (*memusage)(const void *options);
 
-	/// Calculates the minimum sane size for Blocks (or other types of
-	/// chunks) to which the input data can be split to make
-	/// multithreaded encoding possible. If this is NULL, it is assumed
-	/// that the encoder is fast enough with single thread.
-	lzma_vli (*chunk_size)(const void *options);
+	/// Calculates the recommended Uncompressed Size for .xz Blocks to
+	/// which the input data can be split to make multithreaded
+	/// encoding possible. If this is NULL, it is assumed that
+	/// the encoder is fast enough with single thread.
+	uint64_t (*block_size)(const void *options);
 
 	/// Tells the size of the Filter Properties field. If options are
 	/// invalid, UINT32_MAX is returned. If this is NULL, props_size_fixed
@@ -59,7 +59,7 @@ static const lzma_filter_encoder encoders[] = {
 		.id = LZMA_FILTER_LZMA1,
 		.init = &lzma_lzma_encoder_init,
 		.memusage = &lzma_lzma_encoder_memusage,
-		.chunk_size = NULL, // FIXME
+		.block_size = NULL, // FIXME
 		.props_size_get = NULL,
 		.props_size_fixed = 5,
 		.props_encode = &lzma_lzma_props_encode,
@@ -70,7 +70,7 @@ static const lzma_filter_encoder encoders[] = {
 		.id = LZMA_FILTER_LZMA2,
 		.init = &lzma_lzma2_encoder_init,
 		.memusage = &lzma_lzma2_encoder_memusage,
-		.chunk_size = NULL, // FIXME
+		.block_size = &lzma_lzma2_block_size, // FIXME
 		.props_size_get = NULL,
 		.props_size_fixed = 1,
 		.props_encode = &lzma_lzma2_props_encode,
@@ -81,7 +81,7 @@ static const lzma_filter_encoder encoders[] = {
 		.id = LZMA_FILTER_X86,
 		.init = &lzma_simple_x86_encoder_init,
 		.memusage = NULL,
-		.chunk_size = NULL,
+		.block_size = NULL,
 		.props_size_get = &lzma_simple_props_size,
 		.props_encode = &lzma_simple_props_encode,
 	},
@@ -91,7 +91,7 @@ static const lzma_filter_encoder encoders[] = {
 		.id = LZMA_FILTER_POWERPC,
 		.init = &lzma_simple_powerpc_encoder_init,
 		.memusage = NULL,
-		.chunk_size = NULL,
+		.block_size = NULL,
 		.props_size_get = &lzma_simple_props_size,
 		.props_encode = &lzma_simple_props_encode,
 	},
@@ -101,7 +101,7 @@ static const lzma_filter_encoder encoders[] = {
 		.id = LZMA_FILTER_IA64,
 		.init = &lzma_simple_ia64_encoder_init,
 		.memusage = NULL,
-		.chunk_size = NULL,
+		.block_size = NULL,
 		.props_size_get = &lzma_simple_props_size,
 		.props_encode = &lzma_simple_props_encode,
 	},
@@ -111,7 +111,7 @@ static const lzma_filter_encoder encoders[] = {
 		.id = LZMA_FILTER_ARM,
 		.init = &lzma_simple_arm_encoder_init,
 		.memusage = NULL,
-		.chunk_size = NULL,
+		.block_size = NULL,
 		.props_size_get = &lzma_simple_props_size,
 		.props_encode = &lzma_simple_props_encode,
 	},
@@ -121,7 +121,7 @@ static const lzma_filter_encoder encoders[] = {
 		.id = LZMA_FILTER_ARMTHUMB,
 		.init = &lzma_simple_armthumb_encoder_init,
 		.memusage = NULL,
-		.chunk_size = NULL,
+		.block_size = NULL,
 		.props_size_get = &lzma_simple_props_size,
 		.props_encode = &lzma_simple_props_encode,
 	},
@@ -131,7 +131,7 @@ static const lzma_filter_encoder encoders[] = {
 		.id = LZMA_FILTER_SPARC,
 		.init = &lzma_simple_sparc_encoder_init,
 		.memusage = NULL,
-		.chunk_size = NULL,
+		.block_size = NULL,
 		.props_size_get = &lzma_simple_props_size,
 		.props_encode = &lzma_simple_props_encode,
 	},
@@ -141,7 +141,7 @@ static const lzma_filter_encoder encoders[] = {
 		.id = LZMA_FILTER_DELTA,
 		.init = &lzma_delta_encoder_init,
 		.memusage = &lzma_delta_coder_memusage,
-		.chunk_size = NULL,
+		.block_size = NULL,
 		.props_size_get = NULL,
 		.props_size_fixed = 1,
 		.props_encode = &lzma_delta_props_encode,
@@ -196,7 +196,7 @@ lzma_filters_update(lzma_stream *strm, const lzma_filter *filters)
 
 
 extern lzma_ret
-lzma_raw_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_raw_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter *options)
 {
 	return lzma_raw_coder_init(next, allocator,
@@ -226,20 +226,19 @@ lzma_raw_encoder_memusage(const lzma_filter *filters)
 }
 
 
-/*
-extern LZMA_API(lzma_vli)
-lzma_chunk_size(const lzma_filter *filters)
+extern uint64_t
+lzma_mt_block_size(const lzma_filter *filters)
 {
-	lzma_vli max = 0;
+	uint64_t max = 0;
 
 	for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) {
 		const lzma_filter_encoder *const fe
 				= encoder_find(filters[i].id);
-		if (fe->chunk_size != NULL) {
-			const lzma_vli size
-					= fe->chunk_size(filters[i].options);
-			if (size == LZMA_VLI_UNKNOWN)
-				return LZMA_VLI_UNKNOWN;
+		if (fe->block_size != NULL) {
+			const uint64_t size
+					= fe->block_size(filters[i].options);
+			if (size == 0)
+				return 0;
 
 			if (size > max)
 				max = size;
@@ -248,7 +247,6 @@ lzma_chunk_size(const lzma_filter *filters)
 
 	return max;
 }
-*/
 
 
 extern LZMA_API(lzma_ret)
diff --git a/Utilities/cmliblzma/liblzma/common/filter_encoder.h b/Utilities/cmliblzma/liblzma/common/filter_encoder.h
index 5bc137f..f1d5683 100644
--- a/Utilities/cmliblzma/liblzma/common/filter_encoder.h
+++ b/Utilities/cmliblzma/liblzma/common/filter_encoder.h
@@ -16,12 +16,12 @@
 #include "common.h"
 
 
-// FIXME: Might become a part of the public API once finished.
-// extern lzma_vli lzma_chunk_size(const lzma_filter *filters);
+// FIXME: Might become a part of the public API.
+extern uint64_t lzma_mt_block_size(const lzma_filter *filters);
 
 
 extern lzma_ret lzma_raw_encoder_init(
-		lzma_next_coder *next, lzma_allocator *allocator,
+		lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter *filters);
 
 #endif
diff --git a/Utilities/cmliblzma/liblzma/common/filter_flags_decoder.c b/Utilities/cmliblzma/liblzma/common/filter_flags_decoder.c
index caae10c..ddfb085 100644
--- a/Utilities/cmliblzma/liblzma/common/filter_flags_decoder.c
+++ b/Utilities/cmliblzma/liblzma/common/filter_flags_decoder.c
@@ -15,7 +15,7 @@
 
 extern LZMA_API(lzma_ret)
 lzma_filter_flags_decode(
-		lzma_filter *filter, lzma_allocator *allocator,
+		lzma_filter *filter, const lzma_allocator *allocator,
 		const uint8_t *in, size_t *in_pos, size_t in_size)
 {
 	// Set the pointer to NULL so the caller can always safely free it.
diff --git a/Utilities/cmliblzma/liblzma/common/stream_encoder.h b/Utilities/cmliblzma/liblzma/common/hardware_cputhreads.c
index 46a7aed..f468366 100644
--- a/Utilities/cmliblzma/liblzma/common/stream_encoder.h
+++ b/Utilities/cmliblzma/liblzma/common/hardware_cputhreads.c
@@ -1,7 +1,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 //
-/// \file       stream_encoder.h
-/// \brief      Encodes .xz Streams
+/// \file       hardware_cputhreads.c
+/// \brief      Get the number of CPU threads or cores
 //
 //  Author:     Lasse Collin
 //
@@ -10,14 +10,13 @@
 //
 ///////////////////////////////////////////////////////////////////////////////
 
-#ifndef LZMA_STREAM_ENCODER_H
-#define LZMA_STREAM_ENCODER_H
-
 #include "common.h"
 
+#include "tuklib_cpucores.h"
 
-extern lzma_ret lzma_stream_encoder_init(
-		lzma_next_coder *next, lzma_allocator *allocator,
-		const lzma_filter *filters, lzma_check check);
 
-#endif
+extern LZMA_API(uint32_t)
+lzma_cputhreads(void)
+{
+	return tuklib_cpucores();
+}
diff --git a/Utilities/cmliblzma/liblzma/common/index.c b/Utilities/cmliblzma/liblzma/common/index.c
index 9af4bc1..26e4e51 100644
--- a/Utilities/cmliblzma/liblzma/common/index.c
+++ b/Utilities/cmliblzma/liblzma/common/index.c
@@ -191,8 +191,8 @@ index_tree_init(index_tree *tree)
 
 /// Helper for index_tree_end()
 static void
-index_tree_node_end(index_tree_node *node, lzma_allocator *allocator,
-		void (*free_func)(void *node, lzma_allocator *allocator))
+index_tree_node_end(index_tree_node *node, const lzma_allocator *allocator,
+		void (*free_func)(void *node, const lzma_allocator *allocator))
 {
 	// The tree won't ever be very huge, so recursion should be fine.
 	// 20 levels in the tree is likely quite a lot already in practice.
@@ -202,22 +202,21 @@ index_tree_node_end(index_tree_node *node, lzma_allocator *allocator,
 	if (node->right != NULL)
 		index_tree_node_end(node->right, allocator, free_func);
 
-	if (free_func != NULL)
-		free_func(node, allocator);
-
-	lzma_free(node, allocator);
+	free_func(node, allocator);
 	return;
 }
 
 
-/// Free the meory allocated for a tree. If free_func is not NULL,
-/// it is called on each node before freeing the node. This is used
-/// to free the Record groups from each index_stream before freeing
-/// the index_stream itself.
+/// Free the memory allocated for a tree. Each node is freed using the
+/// given free_func which is either &lzma_free or &index_stream_end.
+/// The latter is used to free the Record groups from each index_stream
+/// before freeing the index_stream itself.
 static void
-index_tree_end(index_tree *tree, lzma_allocator *allocator,
-		void (*free_func)(void *node, lzma_allocator *allocator))
+index_tree_end(index_tree *tree, const lzma_allocator *allocator,
+		void (*free_func)(void *node, const lzma_allocator *allocator))
 {
+	assert(free_func != NULL);
+
 	if (tree->root != NULL)
 		index_tree_node_end(tree->root, allocator, free_func);
 
@@ -339,8 +338,8 @@ index_tree_locate(const index_tree *tree, lzma_vli target)
 /// Allocate and initialize a new Stream using the given base offsets.
 static index_stream *
 index_stream_init(lzma_vli compressed_base, lzma_vli uncompressed_base,
-		lzma_vli stream_number, lzma_vli block_number_base,
-		lzma_allocator *allocator)
+		uint32_t stream_number, lzma_vli block_number_base,
+		const lzma_allocator *allocator)
 {
 	index_stream *s = lzma_alloc(sizeof(index_stream), allocator);
 	if (s == NULL)
@@ -368,16 +367,17 @@ index_stream_init(lzma_vli compressed_base, lzma_vli uncompressed_base,
 
 /// Free the memory allocated for a Stream and its Record groups.
 static void
-index_stream_end(void *node, lzma_allocator *allocator)
+index_stream_end(void *node, const lzma_allocator *allocator)
 {
 	index_stream *s = node;
-	index_tree_end(&s->groups, allocator, NULL);
+	index_tree_end(&s->groups, allocator, &lzma_free);
+	lzma_free(s, allocator);
 	return;
 }
 
 
 static lzma_index *
-index_init_plain(lzma_allocator *allocator)
+index_init_plain(const lzma_allocator *allocator)
 {
 	lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator);
 	if (i != NULL) {
@@ -395,7 +395,7 @@ index_init_plain(lzma_allocator *allocator)
 
 
 extern LZMA_API(lzma_index *)
-lzma_index_init(lzma_allocator *allocator)
+lzma_index_init(const lzma_allocator *allocator)
 {
 	lzma_index *i = index_init_plain(allocator);
 	if (i == NULL)
@@ -414,7 +414,7 @@ lzma_index_init(lzma_allocator *allocator)
 
 
 extern LZMA_API(void)
-lzma_index_end(lzma_index *i, lzma_allocator *allocator)
+lzma_index_end(lzma_index *i, const lzma_allocator *allocator)
 {
 	// NOTE: If you modify this function, check also the bottom
 	// of lzma_index_cat().
@@ -637,7 +637,7 @@ lzma_index_stream_padding(lzma_index *i, lzma_vli stream_padding)
 
 
 extern LZMA_API(lzma_ret)
-lzma_index_append(lzma_index *i, lzma_allocator *allocator,
+lzma_index_append(lzma_index *i, const lzma_allocator *allocator,
 		lzma_vli unpadded_size, lzma_vli uncompressed_size)
 {
 	// Validate.
@@ -765,7 +765,7 @@ index_cat_helper(const index_cat_info *info, index_stream *this)
 
 extern LZMA_API(lzma_ret)
 lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
-		lzma_allocator *allocator)
+		const lzma_allocator *allocator)
 {
 	const lzma_vli dest_file_size = lzma_index_file_size(dest);
 
@@ -829,6 +829,9 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
 				s->groups.rightmost = &newg->node;
 
 			lzma_free(g, allocator);
+
+			// NOTE: newg isn't leaked here because
+			// newg == (void *)&newg->node.
 		}
 	}
 
@@ -859,7 +862,7 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
 
 /// Duplicate an index_stream.
 static index_stream *
-index_dup_stream(const index_stream *src, lzma_allocator *allocator)
+index_dup_stream(const index_stream *src, const lzma_allocator *allocator)
 {
 	// Catch a somewhat theoretical integer overflow.
 	if (src->record_count > PREALLOC_MAX)
@@ -869,11 +872,8 @@ index_dup_stream(const index_stream *src, lzma_allocator *allocator)
 	index_stream *dest = index_stream_init(src->node.compressed_base,
 			src->node.uncompressed_base, src->number,
 			src->block_number_base, allocator);
-
-	// Return immediately if allocation failed or if there are
-	// no groups to duplicate.
-	if (dest == NULL || src->groups.leftmost == NULL)
-		return dest;
+	if (dest == NULL)
+		return NULL;
 
 	// Copy the overall information.
 	dest->record_count = src->record_count;
@@ -881,6 +881,10 @@ index_dup_stream(const index_stream *src, lzma_allocator *allocator)
 	dest->stream_flags = src->stream_flags;
 	dest->stream_padding = src->stream_padding;
 
+	// Return if there are no groups to duplicate.
+	if (src->groups.leftmost == NULL)
+		return dest;
+
 	// Allocate memory for the Records. We put all the Records into
 	// a single group. It's simplest and also tends to make
 	// lzma_index_locate() a little bit faster with very big Indexes.
@@ -919,7 +923,7 @@ index_dup_stream(const index_stream *src, lzma_allocator *allocator)
 
 
 extern LZMA_API(lzma_index *)
-lzma_index_dup(const lzma_index *src, lzma_allocator *allocator)
+lzma_index_dup(const lzma_index *src, const lzma_allocator *allocator)
 {
 	// Allocate the base structure (no initial Stream).
 	lzma_index *dest = index_init_plain(allocator);
@@ -1008,6 +1012,8 @@ iter_set_info(lzma_index_iter *iter)
 		iter->internal[ITER_GROUP].p = NULL;
 	}
 
+	// NOTE: lzma_index_iter.stream.number is lzma_vli but we use uint32_t
+	// internally.
 	iter->stream.number = stream->number;
 	iter->stream.block_count = stream->record_count;
 	iter->stream.compressed_offset = stream->node.compressed_base;
diff --git a/Utilities/cmliblzma/liblzma/common/index_decoder.c b/Utilities/cmliblzma/liblzma/common/index_decoder.c
index 83c8a3a..cc07a1b 100644
--- a/Utilities/cmliblzma/liblzma/common/index_decoder.c
+++ b/Utilities/cmliblzma/liblzma/common/index_decoder.c
@@ -14,7 +14,7 @@
 #include "check.h"
 
 
-struct lzma_coder_s {
+typedef struct {
 	enum {
 		SEQ_INDICATOR,
 		SEQ_COUNT,
@@ -50,11 +50,11 @@ struct lzma_coder_s {
 
 	/// CRC32 of the List of Records field
 	uint32_t crc32;
-};
+} lzma_index_coder;
 
 
 static lzma_ret
-index_decode(lzma_coder *coder, lzma_allocator *allocator,
+index_decode(void *coder_ptr, const lzma_allocator *allocator,
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size,
 		uint8_t *restrict out lzma_attribute((__unused__)),
@@ -62,6 +62,8 @@ index_decode(lzma_coder *coder, lzma_allocator *allocator,
 		size_t out_size lzma_attribute((__unused__)),
 		lzma_action action lzma_attribute((__unused__)))
 {
+	lzma_index_coder *coder = coder_ptr;
+
 	// Similar optimization as in index_encoder.c
 	const size_t in_start = *in_pos;
 	lzma_ret ret = LZMA_OK;
@@ -207,8 +209,9 @@ out:
 
 
 static void
-index_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
+index_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
 {
+	lzma_index_coder *coder = coder_ptr;
 	lzma_index_end(coder->index, allocator);
 	lzma_free(coder, allocator);
 	return;
@@ -216,9 +219,11 @@ index_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
 
 
 static lzma_ret
-index_decoder_memconfig(lzma_coder *coder, uint64_t *memusage,
+index_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
 		uint64_t *old_memlimit, uint64_t new_memlimit)
 {
+	lzma_index_coder *coder = coder_ptr;
+
 	*memusage = lzma_index_memusage(1, coder->count);
 	*old_memlimit = coder->memlimit;
 
@@ -234,7 +239,7 @@ index_decoder_memconfig(lzma_coder *coder, uint64_t *memusage,
 
 
 static lzma_ret
-index_decoder_reset(lzma_coder *coder, lzma_allocator *allocator,
+index_decoder_reset(lzma_index_coder *coder, const lzma_allocator *allocator,
 		lzma_index **i, uint64_t memlimit)
 {
 	// Remember the pointer given by the application. We will set it
@@ -251,7 +256,7 @@ index_decoder_reset(lzma_coder *coder, lzma_allocator *allocator,
 
 	// Initialize the rest.
 	coder->sequence = SEQ_INDICATOR;
-	coder->memlimit = memlimit;
+	coder->memlimit = my_max(1, memlimit);
 	coder->count = 0; // Needs to be initialized due to _memconfig().
 	coder->pos = 0;
 	coder->crc32 = 0;
@@ -261,28 +266,30 @@ index_decoder_reset(lzma_coder *coder, lzma_allocator *allocator,
 
 
 static lzma_ret
-index_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		lzma_index **i, uint64_t memlimit)
 {
 	lzma_next_coder_init(&index_decoder_init, next, allocator);
 
-	if (i == NULL || memlimit == 0)
+	if (i == NULL)
 		return LZMA_PROG_ERROR;
 
-	if (next->coder == NULL) {
-		next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
-		if (next->coder == NULL)
+	lzma_index_coder *coder = next->coder;
+	if (coder == NULL) {
+		coder = lzma_alloc(sizeof(lzma_index_coder), allocator);
+		if (coder == NULL)
 			return LZMA_MEM_ERROR;
 
+		next->coder = coder;
 		next->code = &index_decode;
 		next->end = &index_decoder_end;
 		next->memconfig = &index_decoder_memconfig;
-		next->coder->index = NULL;
+		coder->index = NULL;
 	} else {
-		lzma_index_end(next->coder->index, allocator);
+		lzma_index_end(coder->index, allocator);
 	}
 
-	return index_decoder_reset(next->coder, allocator, i, memlimit);
+	return index_decoder_reset(coder, allocator, i, memlimit);
 }
 
 
@@ -299,8 +306,8 @@ lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit)
 
 
 extern LZMA_API(lzma_ret)
-lzma_index_buffer_decode(
-		lzma_index **i, uint64_t *memlimit, lzma_allocator *allocator,
+lzma_index_buffer_decode(lzma_index **i, uint64_t *memlimit,
+		const lzma_allocator *allocator,
 		const uint8_t *in, size_t *in_pos, size_t in_size)
 {
 	// Sanity checks
@@ -309,7 +316,7 @@ lzma_index_buffer_decode(
 		return LZMA_PROG_ERROR;
 
 	// Initialize the decoder.
-	lzma_coder coder;
+	lzma_index_coder coder;
 	return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit));
 
 	// Store the input start position so that we can restore it in case
diff --git a/Utilities/cmliblzma/liblzma/common/index_encoder.c b/Utilities/cmliblzma/liblzma/common/index_encoder.c
index 45919f0..ac97d0c 100644
--- a/Utilities/cmliblzma/liblzma/common/index_encoder.c
+++ b/Utilities/cmliblzma/liblzma/common/index_encoder.c
@@ -15,7 +15,7 @@
 #include "check.h"
 
 
-struct lzma_coder_s {
+typedef struct {
 	enum {
 		SEQ_INDICATOR,
 		SEQ_COUNT,
@@ -37,12 +37,12 @@ struct lzma_coder_s {
 
 	/// CRC32 of the List of Records field
 	uint32_t crc32;
-};
+} lzma_index_coder;
 
 
 static lzma_ret
-index_encode(lzma_coder *coder,
-		lzma_allocator *allocator lzma_attribute((__unused__)),
+index_encode(void *coder_ptr,
+		const lzma_allocator *allocator lzma_attribute((__unused__)),
 		const uint8_t *restrict in lzma_attribute((__unused__)),
 		size_t *restrict in_pos lzma_attribute((__unused__)),
 		size_t in_size lzma_attribute((__unused__)),
@@ -50,6 +50,8 @@ index_encode(lzma_coder *coder,
 		size_t out_size,
 		lzma_action action lzma_attribute((__unused__)))
 {
+	lzma_index_coder *coder = coder_ptr;
+
 	// Position where to start calculating CRC32. The idea is that we
 	// need to call lzma_crc32() only once per call to index_encode().
 	const size_t out_start = *out_pos;
@@ -159,7 +161,7 @@ out:
 
 
 static void
-index_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
+index_encoder_end(void *coder, const lzma_allocator *allocator)
 {
 	lzma_free(coder, allocator);
 	return;
@@ -167,7 +169,7 @@ index_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
 
 
 static void
-index_encoder_reset(lzma_coder *coder, const lzma_index *i)
+index_encoder_reset(lzma_index_coder *coder, const lzma_index *i)
 {
 	lzma_index_iter_init(&coder->iter, i);
 
@@ -181,7 +183,7 @@ index_encoder_reset(lzma_coder *coder, const lzma_index *i)
 
 
 extern lzma_ret
-lzma_index_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_index_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_index *i)
 {
 	lzma_next_coder_init(&lzma_index_encoder_init, next, allocator);
@@ -190,7 +192,7 @@ lzma_index_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
 		return LZMA_PROG_ERROR;
 
 	if (next->coder == NULL) {
-		next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
+		next->coder = lzma_alloc(sizeof(lzma_index_coder), allocator);
 		if (next->coder == NULL)
 			return LZMA_MEM_ERROR;
 
@@ -230,7 +232,7 @@ lzma_index_buffer_encode(const lzma_index *i,
 
 	// The Index encoder needs just one small data structure so we can
 	// allocate it on stack.
-	lzma_coder coder;
+	lzma_index_coder coder;
 	index_encoder_reset(&coder, i);
 
 	// Do the actual encoding. This should never fail, but store
diff --git a/Utilities/cmliblzma/liblzma/common/index_encoder.h b/Utilities/cmliblzma/liblzma/common/index_encoder.h
index a13c94d..4d55cd1 100644
--- a/Utilities/cmliblzma/liblzma/common/index_encoder.h
+++ b/Utilities/cmliblzma/liblzma/common/index_encoder.h
@@ -17,7 +17,7 @@
 
 
 extern lzma_ret lzma_index_encoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_index *i);
+		const lzma_allocator *allocator, const lzma_index *i);
 
 
 #endif
diff --git a/Utilities/cmliblzma/liblzma/common/index_hash.c b/Utilities/cmliblzma/liblzma/common/index_hash.c
index e3e9386..d7a0344 100644
--- a/Utilities/cmliblzma/liblzma/common/index_hash.c
+++ b/Utilities/cmliblzma/liblzma/common/index_hash.c
@@ -70,7 +70,8 @@ struct lzma_index_hash_s {
 
 
 extern LZMA_API(lzma_index_hash *)
-lzma_index_hash_init(lzma_index_hash *index_hash, lzma_allocator *allocator)
+lzma_index_hash_init(lzma_index_hash *index_hash,
+		const lzma_allocator *allocator)
 {
 	if (index_hash == NULL) {
 		index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator);
@@ -101,7 +102,8 @@ lzma_index_hash_init(lzma_index_hash *index_hash, lzma_allocator *allocator)
 
 
 extern LZMA_API(void)
-lzma_index_hash_end(lzma_index_hash *index_hash, lzma_allocator *allocator)
+lzma_index_hash_end(lzma_index_hash *index_hash,
+		const lzma_allocator *allocator)
 {
 	lzma_free(index_hash, allocator);
 	return;
diff --git a/Utilities/cmliblzma/liblzma/common/memcmplen.h b/Utilities/cmliblzma/liblzma/common/memcmplen.h
new file mode 100644
index 0000000..c1efc9e
--- /dev/null
+++ b/Utilities/cmliblzma/liblzma/common/memcmplen.h
@@ -0,0 +1,175 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       memcmplen.h
+/// \brief      Optimized comparison of two buffers
+//
+//  Author:     Lasse Collin
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_MEMCMPLEN_H
+#define LZMA_MEMCMPLEN_H
+
+#include "common.h"
+
+#ifdef HAVE_IMMINTRIN_H
+#	include <immintrin.h>
+#endif
+
+
+/// Find out how many equal bytes the two buffers have.
+///
+/// \param      buf1    First buffer
+/// \param      buf2    Second buffer
+/// \param      len     How many bytes have already been compared and will
+///                     be assumed to match
+/// \param      limit   How many bytes to compare at most, including the
+///                     already-compared bytes. This must be significantly
+///                     smaller than UINT32_MAX to avoid integer overflows.
+///                     Up to LZMA_MEMCMPLEN_EXTRA bytes may be read past
+///                     the specified limit from both buf1 and buf2.
+///
+/// \return     Number of equal bytes in the buffers is returned.
+///             This is always at least len and at most limit.
+///
+/// \note       LZMA_MEMCMPLEN_EXTRA defines how many extra bytes may be read.
+///             It's rounded up to 2^n. This extra amount needs to be
+///             allocated in the buffers being used. It needs to be
+///             initialized too to keep Valgrind quiet.
+static inline uint32_t lzma_attribute((__always_inline__))
+lzma_memcmplen(const uint8_t *buf1, const uint8_t *buf2,
+		uint32_t len, uint32_t limit)
+{
+	assert(len <= limit);
+	assert(limit <= UINT32_MAX / 2);
+
+#if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
+		&& ((TUKLIB_GNUC_REQ(3, 4) && defined(__x86_64__)) \
+			|| (defined(__INTEL_COMPILER) && defined(__x86_64__)) \
+			|| (defined(__INTEL_COMPILER) && defined(_M_X64)) \
+			|| (defined(_MSC_VER) && defined(_M_X64)))
+	// NOTE: This will use 64-bit unaligned access which
+	// TUKLIB_FAST_UNALIGNED_ACCESS wasn't meant to permit, but
+	// it's convenient here at least as long as it's x86-64 only.
+	//
+	// I keep this x86-64 only for now since that's where I know this
+	// to be a good method. This may be fine on other 64-bit CPUs too.
+	// On big endian one should use xor instead of subtraction and switch
+	// to __builtin_clzll().
+#define LZMA_MEMCMPLEN_EXTRA 8
+	while (len < limit) {
+		const uint64_t x = *(const uint64_t *)(buf1 + len)
+				- *(const uint64_t *)(buf2 + len);
+		if (x != 0) {
+#	if defined(_M_X64) // MSVC or Intel C compiler on Windows
+			unsigned long tmp;
+			_BitScanForward64(&tmp, x);
+			len += (uint32_t)tmp >> 3;
+#	else // GCC, clang, or Intel C compiler
+			len += (uint32_t)__builtin_ctzll(x) >> 3;
+#	endif
+			return my_min(len, limit);
+		}
+
+		len += 8;
+	}
+
+	return limit;
+
+#elif defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
+		&& defined(HAVE__MM_MOVEMASK_EPI8) \
+		&& ((defined(__GNUC__) && defined(__SSE2_MATH__)) \
+			|| (defined(__INTEL_COMPILER) && defined(__SSE2__)) \
+			|| (defined(_MSC_VER) && defined(_M_IX86_FP) \
+				&& _M_IX86_FP >= 2))
+	// NOTE: Like above, this will use 128-bit unaligned access which
+	// TUKLIB_FAST_UNALIGNED_ACCESS wasn't meant to permit.
+	//
+	// SSE2 version for 32-bit and 64-bit x86. On x86-64 the above
+	// version is sometimes significantly faster and sometimes
+	// slightly slower than this SSE2 version, so this SSE2
+	// version isn't used on x86-64.
+#	define LZMA_MEMCMPLEN_EXTRA 16
+	while (len < limit) {
+		const uint32_t x = 0xFFFF ^ _mm_movemask_epi8(_mm_cmpeq_epi8(
+			_mm_loadu_si128((const __m128i *)(buf1 + len)),
+			_mm_loadu_si128((const __m128i *)(buf2 + len))));
+
+		if (x != 0) {
+#	if defined(__INTEL_COMPILER)
+			len += _bit_scan_forward(x);
+#	elif defined(_MSC_VER)
+			unsigned long tmp;
+			_BitScanForward(&tmp, x);
+			len += tmp;
+#	else
+			len += __builtin_ctz(x);
+#	endif
+			return my_min(len, limit);
+		}
+
+		len += 16;
+	}
+
+	return limit;
+
+#elif defined(TUKLIB_FAST_UNALIGNED_ACCESS) && !defined(WORDS_BIGENDIAN)
+	// Generic 32-bit little endian method
+#	define LZMA_MEMCMPLEN_EXTRA 4
+	while (len < limit) {
+		uint32_t x = *(const uint32_t *)(buf1 + len)
+				- *(const uint32_t *)(buf2 + len);
+		if (x != 0) {
+			if ((x & 0xFFFF) == 0) {
+				len += 2;
+				x >>= 16;
+			}
+
+			if ((x & 0xFF) == 0)
+				++len;
+
+			return my_min(len, limit);
+		}
+
+		len += 4;
+	}
+
+	return limit;
+
+#elif defined(TUKLIB_FAST_UNALIGNED_ACCESS) && defined(WORDS_BIGENDIAN)
+	// Generic 32-bit big endian method
+#	define LZMA_MEMCMPLEN_EXTRA 4
+	while (len < limit) {
+		uint32_t x = *(const uint32_t *)(buf1 + len)
+				^ *(const uint32_t *)(buf2 + len);
+		if (x != 0) {
+			if ((x & 0xFFFF0000) == 0) {
+				len += 2;
+				x <<= 16;
+			}
+
+			if ((x & 0xFF000000) == 0)
+				++len;
+
+			return my_min(len, limit);
+		}
+
+		len += 4;
+	}
+
+	return limit;
+
+#else
+	// Simple portable version that doesn't use unaligned access.
+#	define LZMA_MEMCMPLEN_EXTRA 0
+	while (len < limit && buf1[len] == buf2[len])
+		++len;
+
+	return len;
+#endif
+}
+
+#endif
diff --git a/Utilities/cmliblzma/liblzma/common/outqueue.c b/Utilities/cmliblzma/liblzma/common/outqueue.c
new file mode 100644
index 0000000..2dc8a38
--- /dev/null
+++ b/Utilities/cmliblzma/liblzma/common/outqueue.c
@@ -0,0 +1,184 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       outqueue.c
+/// \brief      Output queue handling in multithreaded coding
+//
+//  Author:     Lasse Collin
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "outqueue.h"
+
+
+/// This is to ease integer overflow checking: We may allocate up to
+/// 2 * LZMA_THREADS_MAX buffers and we need some extra memory for other
+/// data structures (that's the second /2).
+#define BUF_SIZE_MAX (UINT64_MAX / LZMA_THREADS_MAX / 2 / 2)
+
+
+static lzma_ret
+get_options(uint64_t *bufs_alloc_size, uint32_t *bufs_count,
+		uint64_t buf_size_max, uint32_t threads)
+{
+	if (threads > LZMA_THREADS_MAX || buf_size_max > BUF_SIZE_MAX)
+		return LZMA_OPTIONS_ERROR;
+
+	// The number of buffers is twice the number of threads.
+	// This wastes RAM but keeps the threads busy when buffers
+	// finish out of order.
+	//
+	// NOTE: If this is changed, update BUF_SIZE_MAX too.
+	*bufs_count = threads * 2;
+	*bufs_alloc_size = *bufs_count * buf_size_max;
+
+	return LZMA_OK;
+}
+
+
+extern uint64_t
+lzma_outq_memusage(uint64_t buf_size_max, uint32_t threads)
+{
+	uint64_t bufs_alloc_size;
+	uint32_t bufs_count;
+
+	if (get_options(&bufs_alloc_size, &bufs_count, buf_size_max, threads)
+			!= LZMA_OK)
+		return UINT64_MAX;
+
+	return sizeof(lzma_outq) + bufs_count * sizeof(lzma_outbuf)
+			+ bufs_alloc_size;
+}
+
+
+extern lzma_ret
+lzma_outq_init(lzma_outq *outq, const lzma_allocator *allocator,
+		uint64_t buf_size_max, uint32_t threads)
+{
+	uint64_t bufs_alloc_size;
+	uint32_t bufs_count;
+
+	// Set bufs_count and bufs_alloc_size.
+	return_if_error(get_options(&bufs_alloc_size, &bufs_count,
+			buf_size_max, threads));
+
+	// Allocate memory if needed.
+	if (outq->buf_size_max != buf_size_max
+			|| outq->bufs_allocated != bufs_count) {
+		lzma_outq_end(outq, allocator);
+
+#if SIZE_MAX < UINT64_MAX
+		if (bufs_alloc_size > SIZE_MAX)
+			return LZMA_MEM_ERROR;
+#endif
+
+		outq->bufs = lzma_alloc(bufs_count * sizeof(lzma_outbuf),
+				allocator);
+		outq->bufs_mem = lzma_alloc((size_t)(bufs_alloc_size),
+				allocator);
+
+		if (outq->bufs == NULL || outq->bufs_mem == NULL) {
+			lzma_outq_end(outq, allocator);
+			return LZMA_MEM_ERROR;
+		}
+	}
+
+	// Initialize the rest of the main structure. Initialization of
+	// outq->bufs[] is done when they are actually needed.
+	outq->buf_size_max = (size_t)(buf_size_max);
+	outq->bufs_allocated = bufs_count;
+	outq->bufs_pos = 0;
+	outq->bufs_used = 0;
+	outq->read_pos = 0;
+
+	return LZMA_OK;
+}
+
+
+extern void
+lzma_outq_end(lzma_outq *outq, const lzma_allocator *allocator)
+{
+	lzma_free(outq->bufs, allocator);
+	outq->bufs = NULL;
+
+	lzma_free(outq->bufs_mem, allocator);
+	outq->bufs_mem = NULL;
+
+	return;
+}
+
+
+extern lzma_outbuf *
+lzma_outq_get_buf(lzma_outq *outq)
+{
+	// Caller must have checked it with lzma_outq_has_buf().
+	assert(outq->bufs_used < outq->bufs_allocated);
+
+	// Initialize the new buffer.
+	lzma_outbuf *buf = &outq->bufs[outq->bufs_pos];
+	buf->buf = outq->bufs_mem + outq->bufs_pos * outq->buf_size_max;
+	buf->size = 0;
+	buf->finished = false;
+
+	// Update the queue state.
+	if (++outq->bufs_pos == outq->bufs_allocated)
+		outq->bufs_pos = 0;
+
+	++outq->bufs_used;
+
+	return buf;
+}
+
+
+extern bool
+lzma_outq_is_readable(const lzma_outq *outq)
+{
+	uint32_t i = outq->bufs_pos - outq->bufs_used;
+	if (outq->bufs_pos < outq->bufs_used)
+		i += outq->bufs_allocated;
+
+	return outq->bufs[i].finished;
+}
+
+
+extern lzma_ret
+lzma_outq_read(lzma_outq *restrict outq, uint8_t *restrict out,
+		size_t *restrict out_pos, size_t out_size,
+		lzma_vli *restrict unpadded_size,
+		lzma_vli *restrict uncompressed_size)
+{
+	// There must be at least one buffer from which to read.
+	if (outq->bufs_used == 0)
+		return LZMA_OK;
+
+	// Get the buffer.
+	uint32_t i = outq->bufs_pos - outq->bufs_used;
+	if (outq->bufs_pos < outq->bufs_used)
+		i += outq->bufs_allocated;
+
+	lzma_outbuf *buf = &outq->bufs[i];
+
+	// If it isn't finished yet, we cannot read from it.
+	if (!buf->finished)
+		return LZMA_OK;
+
+	// Copy from the buffer to output.
+	lzma_bufcpy(buf->buf, &outq->read_pos, buf->size,
+			out, out_pos, out_size);
+
+	// Return if we didn't get all the data from the buffer.
+	if (outq->read_pos < buf->size)
+		return LZMA_OK;
+
+	// The buffer was finished. Tell the caller its size information.
+	*unpadded_size = buf->unpadded_size;
+	*uncompressed_size = buf->uncompressed_size;
+
+	// Free this buffer for further use.
+	--outq->bufs_used;
+	outq->read_pos = 0;
+
+	return LZMA_STREAM_END;
+}
diff --git a/Utilities/cmliblzma/liblzma/common/outqueue.h b/Utilities/cmliblzma/liblzma/common/outqueue.h
new file mode 100644
index 0000000..079634d
--- /dev/null
+++ b/Utilities/cmliblzma/liblzma/common/outqueue.h
@@ -0,0 +1,156 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       outqueue.h
+/// \brief      Output queue handling in multithreaded coding
+//
+//  Author:     Lasse Collin
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "common.h"
+
+
+/// Output buffer for a single thread
+typedef struct {
+	/// Pointer to the output buffer of lzma_outq.buf_size_max bytes
+	uint8_t *buf;
+
+	/// Amount of data written to buf
+	size_t size;
+
+	/// Additional size information
+	lzma_vli unpadded_size;
+	lzma_vli uncompressed_size;
+
+	/// True when no more data will be written into this buffer.
+	///
+	/// \note       This is read by another thread and thus access
+	///             to this variable needs a mutex.
+	bool finished;
+
+} lzma_outbuf;
+
+
+typedef struct {
+	/// Array of buffers that are used cyclically.
+	lzma_outbuf *bufs;
+
+	/// Memory allocated for all the buffers
+	uint8_t *bufs_mem;
+
+	/// Amount of buffer space available in each buffer
+	size_t buf_size_max;
+
+	/// Number of buffers allocated
+	uint32_t bufs_allocated;
+
+	/// Position in the bufs array. The next buffer to be taken
+	/// into use is bufs[bufs_pos].
+	uint32_t bufs_pos;
+
+	/// Number of buffers in use
+	uint32_t bufs_used;
+
+	/// Position in the buffer in lzma_outq_read()
+	size_t read_pos;
+
+} lzma_outq;
+
+
+/**
+ * \brief       Calculate the memory usage of an output queue
+ *
+ * \return      Approximate memory usage in bytes or UINT64_MAX on error.
+ */
+extern uint64_t lzma_outq_memusage(uint64_t buf_size_max, uint32_t threads);
+
+
+/// \brief      Initialize an output queue
+///
+/// \param      outq            Pointer to an output queue. Before calling
+///                             this function the first time, *outq should
+///                             have been zeroed with memzero() so that this
+///                             function knows that there are no previous
+///                             allocations to free.
+/// \param      allocator       Pointer to allocator or NULL
+/// \param      buf_size_max    Maximum amount of data that a single buffer
+///                             in the queue may need to store.
+/// \param      threads         Number of buffers that may be in use
+///                             concurrently. Note that more than this number
+///                             of buffers will actually get allocated to
+///                             improve performance when buffers finish
+///                             out of order.
+///
+/// \return     - LZMA_OK
+///             - LZMA_MEM_ERROR
+///
+extern lzma_ret lzma_outq_init(
+		lzma_outq *outq, const lzma_allocator *allocator,
+		uint64_t buf_size_max, uint32_t threads);
+
+
+/// \brief      Free the memory associated with the output queue
+extern void lzma_outq_end(lzma_outq *outq, const lzma_allocator *allocator);
+
+
+/// \brief      Get a new buffer
+///
+/// lzma_outq_has_buf() must be used to check that there is a buffer
+/// available before calling lzma_outq_get_buf().
+///
+extern lzma_outbuf *lzma_outq_get_buf(lzma_outq *outq);
+
+
+/// \brief      Test if there is data ready to be read
+///
+/// Call to this function must be protected with the same mutex that
+/// is used to protect lzma_outbuf.finished.
+///
+extern bool lzma_outq_is_readable(const lzma_outq *outq);
+
+
+/// \brief      Read finished data
+///
+/// \param      outq            Pointer to an output queue
+/// \param      out             Beginning of the output buffer
+/// \param      out_pos         The next byte will be written to
+///                             out[*out_pos].
+/// \param      out_size        Size of the out buffer; the first byte into
+///                             which no data is written to is out[out_size].
+/// \param      unpadded_size   Unpadded Size from the Block encoder
+/// \param      uncompressed_size Uncompressed Size from the Block encoder
+///
+/// \return     - LZMA: All OK. Either no data was available or the buffer
+///               being read didn't become empty yet.
+///             - LZMA_STREAM_END: The buffer being read was finished.
+///               *unpadded_size and *uncompressed_size were set.
+///
+/// \note       This reads lzma_outbuf.finished variables and thus call
+///             to this function needs to be protected with a mutex.
+///
+extern lzma_ret lzma_outq_read(lzma_outq *restrict outq,
+		uint8_t *restrict out, size_t *restrict out_pos,
+		size_t out_size, lzma_vli *restrict unpadded_size,
+		lzma_vli *restrict uncompressed_size);
+
+
+/// \brief      Test if there is at least one buffer free
+///
+/// This must be used before getting a new buffer with lzma_outq_get_buf().
+///
+static inline bool
+lzma_outq_has_buf(const lzma_outq *outq)
+{
+	return outq->bufs_used < outq->bufs_allocated;
+}
+
+
+/// \brief      Test if the queue is completely empty
+static inline bool
+lzma_outq_is_empty(const lzma_outq *outq)
+{
+	return outq->bufs_used == 0;
+}
diff --git a/Utilities/cmliblzma/liblzma/common/stream_buffer_decoder.c b/Utilities/cmliblzma/liblzma/common/stream_buffer_decoder.c
index ae75315..b9745b5 100644
--- a/Utilities/cmliblzma/liblzma/common/stream_buffer_decoder.c
+++ b/Utilities/cmliblzma/liblzma/common/stream_buffer_decoder.c
@@ -15,7 +15,7 @@
 
 extern LZMA_API(lzma_ret)
 lzma_stream_buffer_decode(uint64_t *memlimit, uint32_t flags,
-		lzma_allocator *allocator,
+		const lzma_allocator *allocator,
 		const uint8_t *in, size_t *in_pos, size_t in_size,
 		uint8_t *out, size_t *out_pos, size_t out_size)
 {
diff --git a/Utilities/cmliblzma/liblzma/common/stream_buffer_encoder.c b/Utilities/cmliblzma/liblzma/common/stream_buffer_encoder.c
index 2450ee2..af49554 100644
--- a/Utilities/cmliblzma/liblzma/common/stream_buffer_encoder.c
+++ b/Utilities/cmliblzma/liblzma/common/stream_buffer_encoder.c
@@ -42,7 +42,8 @@ lzma_stream_buffer_bound(size_t uncompressed_size)
 
 extern LZMA_API(lzma_ret)
 lzma_stream_buffer_encode(lzma_filter *filters, lzma_check check,
-		lzma_allocator *allocator, const uint8_t *in, size_t in_size,
+		const lzma_allocator *allocator,
+		const uint8_t *in, size_t in_size,
 		uint8_t *out, size_t *out_pos_ptr, size_t out_size)
 {
 	// Sanity checks
diff --git a/Utilities/cmliblzma/liblzma/common/stream_decoder.c b/Utilities/cmliblzma/liblzma/common/stream_decoder.c
index 37ea71e..fdd8ff2 100644
--- a/Utilities/cmliblzma/liblzma/common/stream_decoder.c
+++ b/Utilities/cmliblzma/liblzma/common/stream_decoder.c
@@ -14,7 +14,7 @@
 #include "block_decoder.h"
 
 
-struct lzma_coder_s {
+typedef struct {
 	enum {
 		SEQ_STREAM_HEADER,
 		SEQ_BLOCK_HEADER,
@@ -57,6 +57,10 @@ struct lzma_coder_s {
 	/// If true, LZMA_GET_CHECK is returned after decoding Stream Header.
 	bool tell_any_check;
 
+	/// If true, we will tell the Block decoder to skip calculating
+	/// and verifying the integrity check.
+	bool ignore_check;
+
 	/// If true, we will decode concatenated Streams that possibly have
 	/// Stream Padding between or after them. LZMA_STREAM_END is returned
 	/// once the application isn't giving us any new input, and we aren't
@@ -76,11 +80,11 @@ struct lzma_coder_s {
 	/// Buffer to hold Stream Header, Block Header, and Stream Footer.
 	/// Block Header has biggest maximum size.
 	uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX];
-};
+} lzma_stream_coder;
 
 
 static lzma_ret
-stream_decoder_reset(lzma_coder *coder, lzma_allocator *allocator)
+stream_decoder_reset(lzma_stream_coder *coder, const lzma_allocator *allocator)
 {
 	// Initialize the Index hash used to verify the Index.
 	coder->index_hash = lzma_index_hash_init(coder->index_hash, allocator);
@@ -96,11 +100,13 @@ stream_decoder_reset(lzma_coder *coder, lzma_allocator *allocator)
 
 
 static lzma_ret
-stream_decode(lzma_coder *coder, lzma_allocator *allocator,
+stream_decode(void *coder_ptr, const lzma_allocator *allocator,
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size, uint8_t *restrict out,
 		size_t *restrict out_pos, size_t out_size, lzma_action action)
 {
+	lzma_stream_coder *coder = coder_ptr;
+
 	// When decoding the actual Block, it may be able to produce more
 	// output even if we don't give it any new input.
 	while (true)
@@ -182,8 +188,8 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator,
 
 		coder->pos = 0;
 
-		// Version 0 is currently the only possible version.
-		coder->block_options.version = 0;
+		// Version 1 is needed to support the .ignore_check option.
+		coder->block_options.version = 1;
 
 		// Set up a buffer to hold the filter chain. Block Header
 		// decoder will initialize all members of this array so
@@ -195,6 +201,11 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator,
 		return_if_error(lzma_block_header_decode(&coder->block_options,
 				allocator, coder->buffer));
 
+		// If LZMA_IGNORE_CHECK was used, this flag needs to be set.
+		// It has to be set after lzma_block_header_decode() because
+		// it always resets this to false.
+		coder->block_options.ignore_check = coder->ignore_check;
+
 		// Check the memory usage limit.
 		const uint64_t memusage = lzma_raw_decoder_memusage(filters);
 		lzma_ret ret;
@@ -366,8 +377,9 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator,
 
 
 static void
-stream_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
+stream_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
 {
+	lzma_stream_coder *coder = coder_ptr;
 	lzma_next_end(&coder->block_decoder, allocator);
 	lzma_index_hash_end(coder->index_hash, allocator);
 	lzma_free(coder, allocator);
@@ -376,16 +388,19 @@ stream_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
 
 
 static lzma_check
-stream_decoder_get_check(const lzma_coder *coder)
+stream_decoder_get_check(const void *coder_ptr)
 {
+	const lzma_stream_coder *coder = coder_ptr;
 	return coder->stream_flags.check;
 }
 
 
 static lzma_ret
-stream_decoder_memconfig(lzma_coder *coder, uint64_t *memusage,
+stream_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
 		uint64_t *old_memlimit, uint64_t new_memlimit)
 {
+	lzma_stream_coder *coder = coder_ptr;
+
 	*memusage = coder->memusage;
 	*old_memlimit = coder->memlimit;
 
@@ -401,41 +416,42 @@ stream_decoder_memconfig(lzma_coder *coder, uint64_t *memusage,
 
 
 extern lzma_ret
-lzma_stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+lzma_stream_decoder_init(
+		lzma_next_coder *next, const lzma_allocator *allocator,
 		uint64_t memlimit, uint32_t flags)
 {
 	lzma_next_coder_init(&lzma_stream_decoder_init, next, allocator);
 
-	if (memlimit == 0)
-		return LZMA_PROG_ERROR;
-
 	if (flags & ~LZMA_SUPPORTED_FLAGS)
 		return LZMA_OPTIONS_ERROR;
 
-	if (next->coder == NULL) {
-		next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
-		if (next->coder == NULL)
+	lzma_stream_coder *coder = next->coder;
+	if (coder == NULL) {
+		coder = lzma_alloc(sizeof(lzma_stream_coder), allocator);
+		if (coder == NULL)
 			return LZMA_MEM_ERROR;
 
+		next->coder = coder;
 		next->code = &stream_decode;
 		next->end = &stream_decoder_end;
 		next->get_check = &stream_decoder_get_check;
 		next->memconfig = &stream_decoder_memconfig;
 
-		next->coder->block_decoder = LZMA_NEXT_CODER_INIT;
-		next->coder->index_hash = NULL;
+		coder->block_decoder = LZMA_NEXT_CODER_INIT;
+		coder->index_hash = NULL;
 	}
 
-	next->coder->memlimit = memlimit;
-	next->coder->memusage = LZMA_MEMUSAGE_BASE;
-	next->coder->tell_no_check = (flags & LZMA_TELL_NO_CHECK) != 0;
-	next->coder->tell_unsupported_check
+	coder->memlimit = my_max(1, memlimit);
+	coder->memusage = LZMA_MEMUSAGE_BASE;
+	coder->tell_no_check = (flags & LZMA_TELL_NO_CHECK) != 0;
+	coder->tell_unsupported_check
 			= (flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0;
-	next->coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0;
-	next->coder->concatenated = (flags & LZMA_CONCATENATED) != 0;
-	next->coder->first_stream = true;
+	coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0;
+	coder->ignore_check = (flags & LZMA_IGNORE_CHECK) != 0;
+	coder->concatenated = (flags & LZMA_CONCATENATED) != 0;
+	coder->first_stream = true;
 
-	return stream_decoder_reset(next->coder, allocator);
+	return stream_decoder_reset(coder, allocator);
 }
 
 
diff --git a/Utilities/cmliblzma/liblzma/common/stream_decoder.h b/Utilities/cmliblzma/liblzma/common/stream_decoder.h
index e54ac28..c13c6ba 100644
--- a/Utilities/cmliblzma/liblzma/common/stream_decoder.h
+++ b/Utilities/cmliblzma/liblzma/common/stream_decoder.h
@@ -15,7 +15,8 @@
 
 #include "common.h"
 
-extern lzma_ret lzma_stream_decoder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, uint64_t memlimit, uint32_t flags);
+extern lzma_ret lzma_stream_decoder_init(
+		lzma_next_coder *next, const lzma_allocator *allocator,
+		uint64_t memlimit, uint32_t flags);
 
 #endif
diff --git a/Utilities/cmliblzma/liblzma/common/stream_encoder.c b/Utilities/cmliblzma/liblzma/common/stream_encoder.c
index 97a7a23..858cba4 100644
--- a/Utilities/cmliblzma/liblzma/common/stream_encoder.c
+++ b/Utilities/cmliblzma/liblzma/common/stream_encoder.c
@@ -10,12 +10,11 @@
 //
 ///////////////////////////////////////////////////////////////////////////////
 
-#include "stream_encoder.h"
 #include "block_encoder.h"
 #include "index_encoder.h"
 
 
-struct lzma_coder_s {
+typedef struct {
 	enum {
 		SEQ_STREAM_HEADER,
 		SEQ_BLOCK_INIT,
@@ -26,7 +25,7 @@ struct lzma_coder_s {
 	} sequence;
 
 	/// True if Block encoder has been initialized by
-	/// lzma_stream_encoder_init() or stream_encoder_update()
+	/// stream_encoder_init() or stream_encoder_update()
 	/// and thus doesn't need to be initialized in stream_encode().
 	bool block_encoder_is_initialized;
 
@@ -56,11 +55,11 @@ struct lzma_coder_s {
 	/// Buffer to hold Stream Header, Block Header, and Stream Footer.
 	/// Block Header has biggest maximum size.
 	uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX];
-};
+} lzma_stream_coder;
 
 
 static lzma_ret
-block_encoder_init(lzma_coder *coder, lzma_allocator *allocator)
+block_encoder_init(lzma_stream_coder *coder, const lzma_allocator *allocator)
 {
 	// Prepare the Block options. Even though Block encoder doesn't need
 	// compressed_size, uncompressed_size, and header_size to be
@@ -79,11 +78,13 @@ block_encoder_init(lzma_coder *coder, lzma_allocator *allocator)
 
 
 static lzma_ret
-stream_encode(lzma_coder *coder, lzma_allocator *allocator,
+stream_encode(void *coder_ptr, const lzma_allocator *allocator,
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size, uint8_t *restrict out,
 		size_t *restrict out_pos, size_t out_size, lzma_action action)
 {
+	lzma_stream_coder *coder = coder_ptr;
+
 	// Main loop
 	while (*out_pos < out_size)
 	switch (coder->sequence) {
@@ -126,7 +127,7 @@ stream_encode(lzma_coder *coder, lzma_allocator *allocator,
 		}
 
 		// Initialize the Block encoder unless it was already
-		// initialized by lzma_stream_encoder_init() or
+		// initialized by stream_encoder_init() or
 		// stream_encoder_update().
 		if (!coder->block_encoder_is_initialized)
 			return_if_error(block_encoder_init(coder, allocator));
@@ -147,11 +148,12 @@ stream_encode(lzma_coder *coder, lzma_allocator *allocator,
 	}
 
 	case SEQ_BLOCK_ENCODE: {
-		static const lzma_action convert[4] = {
+		static const lzma_action convert[LZMA_ACTION_MAX + 1] = {
 			LZMA_RUN,
 			LZMA_SYNC_FLUSH,
 			LZMA_FINISH,
 			LZMA_FINISH,
+			LZMA_FINISH,
 		};
 
 		const lzma_ret ret = coder->block_encoder.code(
@@ -209,8 +211,10 @@ stream_encode(lzma_coder *coder, lzma_allocator *allocator,
 
 
 static void
-stream_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
+stream_encoder_end(void *coder_ptr, const lzma_allocator *allocator)
 {
+	lzma_stream_coder *coder = coder_ptr;
+
 	lzma_next_end(&coder->block_encoder, allocator);
 	lzma_next_end(&coder->index_encoder, allocator);
 	lzma_index_end(coder->index, allocator);
@@ -224,10 +228,12 @@ stream_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
 
 
 static lzma_ret
-stream_encoder_update(lzma_coder *coder, lzma_allocator *allocator,
+stream_encoder_update(void *coder_ptr, const lzma_allocator *allocator,
 		const lzma_filter *filters,
 		const lzma_filter *reversed_filters)
 {
+	lzma_stream_coder *coder = coder_ptr;
+
 	if (coder->sequence <= SEQ_BLOCK_INIT) {
 		// There is no incomplete Block waiting to be finished,
 		// thus we can change the whole filter chain. Start by
@@ -262,39 +268,42 @@ stream_encoder_update(lzma_coder *coder, lzma_allocator *allocator,
 }
 
 
-extern lzma_ret
-lzma_stream_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+static lzma_ret
+stream_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const lzma_filter *filters, lzma_check check)
 {
-	lzma_next_coder_init(&lzma_stream_encoder_init, next, allocator);
+	lzma_next_coder_init(&stream_encoder_init, next, allocator);
 
 	if (filters == NULL)
 		return LZMA_PROG_ERROR;
 
-	if (next->coder == NULL) {
-		next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
-		if (next->coder == NULL)
+	lzma_stream_coder *coder = next->coder;
+
+	if (coder == NULL) {
+		coder = lzma_alloc(sizeof(lzma_stream_coder), allocator);
+		if (coder == NULL)
 			return LZMA_MEM_ERROR;
 
+		next->coder = coder;
 		next->code = &stream_encode;
 		next->end = &stream_encoder_end;
 		next->update = &stream_encoder_update;
 
-		next->coder->filters[0].id = LZMA_VLI_UNKNOWN;
-		next->coder->block_encoder = LZMA_NEXT_CODER_INIT;
-		next->coder->index_encoder = LZMA_NEXT_CODER_INIT;
-		next->coder->index = NULL;
+		coder->filters[0].id = LZMA_VLI_UNKNOWN;
+		coder->block_encoder = LZMA_NEXT_CODER_INIT;
+		coder->index_encoder = LZMA_NEXT_CODER_INIT;
+		coder->index = NULL;
 	}
 
 	// Basic initializations
-	next->coder->sequence = SEQ_STREAM_HEADER;
-	next->coder->block_options.version = 0;
-	next->coder->block_options.check = check;
+	coder->sequence = SEQ_STREAM_HEADER;
+	coder->block_options.version = 0;
+	coder->block_options.check = check;
 
 	// Initialize the Index
-	lzma_index_end(next->coder->index, allocator);
-	next->coder->index = lzma_index_init(allocator);
-	if (next->coder->index == NULL)
+	lzma_index_end(coder->index, allocator);
+	coder->index = lzma_index_init(allocator);
+	if (coder->index == NULL)
 		return LZMA_MEM_ERROR;
 
 	// Encode the Stream Header
@@ -303,16 +312,15 @@ lzma_stream_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
 		.check = check,
 	};
 	return_if_error(lzma_stream_header_encode(
-			&stream_flags, next->coder->buffer));
+			&stream_flags, coder->buffer));
 
-	next->coder->buffer_pos = 0;
-	next->coder->buffer_size = LZMA_STREAM_HEADER_SIZE;
+	coder->buffer_pos = 0;
+	coder->buffer_size = LZMA_STREAM_HEADER_SIZE;
 
 	// Initialize the Block encoder. This way we detect unsupported
 	// filter chains when initializing the Stream encoder instead of
 	// giving an error after Stream Header has already written out.
-	return stream_encoder_update(
-			next->coder, allocator, filters, NULL);
+	return stream_encoder_update(coder, allocator, filters, NULL);
 }
 
 
@@ -320,11 +328,12 @@ extern LZMA_API(lzma_ret)
 lzma_stream_encoder(lzma_stream *strm,
 		const lzma_filter *filters, lzma_check check)
 {
-	lzma_next_strm_init(lzma_stream_encoder_init, strm, filters, check);
+	lzma_next_strm_init(stream_encoder_init, strm, filters, check);
 
 	strm->internal->supported_actions[LZMA_RUN] = true;
 	strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true;
 	strm->internal->supported_actions[LZMA_FULL_FLUSH] = true;
+	strm->internal->supported_actions[LZMA_FULL_BARRIER] = true;
 	strm->internal->supported_actions[LZMA_FINISH] = true;
 
 	return LZMA_OK;
diff --git a/Utilities/cmliblzma/liblzma/common/stream_encoder_mt.c b/Utilities/cmliblzma/liblzma/common/stream_encoder_mt.c
new file mode 100644
index 0000000..2efe44c
--- /dev/null
+++ b/Utilities/cmliblzma/liblzma/common/stream_encoder_mt.c
@@ -0,0 +1,1143 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       stream_encoder_mt.c
+/// \brief      Multithreaded .xz Stream encoder
+//
+//  Author:     Lasse Collin
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "filter_encoder.h"
+#include "easy_preset.h"
+#include "block_encoder.h"
+#include "block_buffer_encoder.h"
+#include "index_encoder.h"
+#include "outqueue.h"
+
+
+/// Maximum supported block size. This makes it simpler to prevent integer
+/// overflows if we are given unusually large block size.
+#define BLOCK_SIZE_MAX (UINT64_MAX / LZMA_THREADS_MAX)
+
+
+typedef enum {
+	/// Waiting for work.
+	THR_IDLE,
+
+	/// Encoding is in progress.
+	THR_RUN,
+
+	/// Encoding is in progress but no more input data will
+	/// be read.
+	THR_FINISH,
+
+	/// The main thread wants the thread to stop whatever it was doing
+	/// but not exit.
+	THR_STOP,
+
+	/// The main thread wants the thread to exit. We could use
+	/// cancellation but since there's stopped anyway, this is lazier.
+	THR_EXIT,
+
+} worker_state;
+
+typedef struct lzma_stream_coder_s lzma_stream_coder;
+
+typedef struct worker_thread_s worker_thread;
+struct worker_thread_s {
+	worker_state state;
+
+	/// Input buffer of coder->block_size bytes. The main thread will
+	/// put new input into this and update in_size accordingly. Once
+	/// no more input is coming, state will be set to THR_FINISH.
+	uint8_t *in;
+
+	/// Amount of data available in the input buffer. This is modified
+	/// only by the main thread.
+	size_t in_size;
+
+	/// Output buffer for this thread. This is set by the main
+	/// thread every time a new Block is started with this thread
+	/// structure.
+	lzma_outbuf *outbuf;
+
+	/// Pointer to the main structure is needed when putting this
+	/// thread back to the stack of free threads.
+	lzma_stream_coder *coder;
+
+	/// The allocator is set by the main thread. Since a copy of the
+	/// pointer is kept here, the application must not change the
+	/// allocator before calling lzma_end().
+	const lzma_allocator *allocator;
+
+	/// Amount of uncompressed data that has already been compressed.
+	uint64_t progress_in;
+
+	/// Amount of compressed data that is ready.
+	uint64_t progress_out;
+
+	/// Block encoder
+	lzma_next_coder block_encoder;
+
+	/// Compression options for this Block
+	lzma_block block_options;
+
+	/// Next structure in the stack of free worker threads.
+	worker_thread *next;
+
+	mythread_mutex mutex;
+	mythread_cond cond;
+
+	/// The ID of this thread is used to join the thread
+	/// when it's not needed anymore.
+	mythread thread_id;
+};
+
+
+struct lzma_stream_coder_s {
+	enum {
+		SEQ_STREAM_HEADER,
+		SEQ_BLOCK,
+		SEQ_INDEX,
+		SEQ_STREAM_FOOTER,
+	} sequence;
+
+	/// Start a new Block every block_size bytes of input unless
+	/// LZMA_FULL_FLUSH or LZMA_FULL_BARRIER is used earlier.
+	size_t block_size;
+
+	/// The filter chain currently in use
+	lzma_filter filters[LZMA_FILTERS_MAX + 1];
+
+
+	/// Index to hold sizes of the Blocks
+	lzma_index *index;
+
+	/// Index encoder
+	lzma_next_coder index_encoder;
+
+
+	/// Stream Flags for encoding the Stream Header and Stream Footer.
+	lzma_stream_flags stream_flags;
+
+	/// Buffer to hold Stream Header and Stream Footer.
+	uint8_t header[LZMA_STREAM_HEADER_SIZE];
+
+	/// Read position in header[]
+	size_t header_pos;
+
+
+	/// Output buffer queue for compressed data
+	lzma_outq outq;
+
+
+	/// Maximum wait time if cannot use all the input and cannot
+	/// fill the output buffer. This is in milliseconds.
+	uint32_t timeout;
+
+
+	/// Error code from a worker thread
+	lzma_ret thread_error;
+
+	/// Array of allocated thread-specific structures
+	worker_thread *threads;
+
+	/// Number of structures in "threads" above. This is also the
+	/// number of threads that will be created at maximum.
+	uint32_t threads_max;
+
+	/// Number of thread structures that have been initialized, and
+	/// thus the number of worker threads actually created so far.
+	uint32_t threads_initialized;
+
+	/// Stack of free threads. When a thread finishes, it puts itself
+	/// back into this stack. This starts as empty because threads
+	/// are created only when actually needed.
+	worker_thread *threads_free;
+
+	/// The most recent worker thread to which the main thread writes
+	/// the new input from the application.
+	worker_thread *thr;
+
+
+	/// Amount of uncompressed data in Blocks that have already
+	/// been finished.
+	uint64_t progress_in;
+
+	/// Amount of compressed data in Stream Header + Blocks that
+	/// have already been finished.
+	uint64_t progress_out;
+
+
+	mythread_mutex mutex;
+	mythread_cond cond;
+};
+
+
+/// Tell the main thread that something has gone wrong.
+static void
+worker_error(worker_thread *thr, lzma_ret ret)
+{
+	assert(ret != LZMA_OK);
+	assert(ret != LZMA_STREAM_END);
+
+	mythread_sync(thr->coder->mutex) {
+		if (thr->coder->thread_error == LZMA_OK)
+			thr->coder->thread_error = ret;
+
+		mythread_cond_signal(&thr->coder->cond);
+	}
+
+	return;
+}
+
+
+static worker_state
+worker_encode(worker_thread *thr, worker_state state)
+{
+	assert(thr->progress_in == 0);
+	assert(thr->progress_out == 0);
+
+	// Set the Block options.
+	thr->block_options = (lzma_block){
+		.version = 0,
+		.check = thr->coder->stream_flags.check,
+		.compressed_size = thr->coder->outq.buf_size_max,
+		.uncompressed_size = thr->coder->block_size,
+
+		// TODO: To allow changing the filter chain, the filters
+		// array must be copied to each worker_thread.
+		.filters = thr->coder->filters,
+	};
+
+	// Calculate maximum size of the Block Header. This amount is
+	// reserved in the beginning of the buffer so that Block Header
+	// along with Compressed Size and Uncompressed Size can be
+	// written there.
+	lzma_ret ret = lzma_block_header_size(&thr->block_options);
+	if (ret != LZMA_OK) {
+		worker_error(thr, ret);
+		return THR_STOP;
+	}
+
+	// Initialize the Block encoder.
+	ret = lzma_block_encoder_init(&thr->block_encoder,
+			thr->allocator, &thr->block_options);
+	if (ret != LZMA_OK) {
+		worker_error(thr, ret);
+		return THR_STOP;
+	}
+
+	size_t in_pos = 0;
+	size_t in_size = 0;
+
+	thr->outbuf->size = thr->block_options.header_size;
+	const size_t out_size = thr->coder->outq.buf_size_max;
+
+	do {
+		mythread_sync(thr->mutex) {
+			// Store in_pos and out_pos into *thr so that
+			// an application may read them via
+			// lzma_get_progress() to get progress information.
+			//
+			// NOTE: These aren't updated when the encoding
+			// finishes. Instead, the final values are taken
+			// later from thr->outbuf.
+			thr->progress_in = in_pos;
+			thr->progress_out = thr->outbuf->size;
+
+			while (in_size == thr->in_size
+					&& thr->state == THR_RUN)
+				mythread_cond_wait(&thr->cond, &thr->mutex);
+
+			state = thr->state;
+			in_size = thr->in_size;
+		}
+
+		// Return if we were asked to stop or exit.
+		if (state >= THR_STOP)
+			return state;
+
+		lzma_action action = state == THR_FINISH
+				? LZMA_FINISH : LZMA_RUN;
+
+		// Limit the amount of input given to the Block encoder
+		// at once. This way this thread can react fairly quickly
+		// if the main thread wants us to stop or exit.
+		static const size_t in_chunk_max = 16384;
+		size_t in_limit = in_size;
+		if (in_size - in_pos > in_chunk_max) {
+			in_limit = in_pos + in_chunk_max;
+			action = LZMA_RUN;
+		}
+
+		ret = thr->block_encoder.code(
+				thr->block_encoder.coder, thr->allocator,
+				thr->in, &in_pos, in_limit, thr->outbuf->buf,
+				&thr->outbuf->size, out_size, action);
+	} while (ret == LZMA_OK && thr->outbuf->size < out_size);
+
+	switch (ret) {
+	case LZMA_STREAM_END:
+		assert(state == THR_FINISH);
+
+		// Encode the Block Header. By doing it after
+		// the compression, we can store the Compressed Size
+		// and Uncompressed Size fields.
+		ret = lzma_block_header_encode(&thr->block_options,
+				thr->outbuf->buf);
+		if (ret != LZMA_OK) {
+			worker_error(thr, ret);
+			return THR_STOP;
+		}
+
+		break;
+
+	case LZMA_OK:
+		// The data was incompressible. Encode it using uncompressed
+		// LZMA2 chunks.
+		//
+		// First wait that we have gotten all the input.
+		mythread_sync(thr->mutex) {
+			while (thr->state == THR_RUN)
+				mythread_cond_wait(&thr->cond, &thr->mutex);
+
+			state = thr->state;
+			in_size = thr->in_size;
+		}
+
+		if (state >= THR_STOP)
+			return state;
+
+		// Do the encoding. This takes care of the Block Header too.
+		thr->outbuf->size = 0;
+		ret = lzma_block_uncomp_encode(&thr->block_options,
+				thr->in, in_size, thr->outbuf->buf,
+				&thr->outbuf->size, out_size);
+
+		// It shouldn't fail.
+		if (ret != LZMA_OK) {
+			worker_error(thr, LZMA_PROG_ERROR);
+			return THR_STOP;
+		}
+
+		break;
+
+	default:
+		worker_error(thr, ret);
+		return THR_STOP;
+	}
+
+	// Set the size information that will be read by the main thread
+	// to write the Index field.
+	thr->outbuf->unpadded_size
+			= lzma_block_unpadded_size(&thr->block_options);
+	assert(thr->outbuf->unpadded_size != 0);
+	thr->outbuf->uncompressed_size = thr->block_options.uncompressed_size;
+
+	return THR_FINISH;
+}
+
+
+static MYTHREAD_RET_TYPE
+worker_start(void *thr_ptr)
+{
+	worker_thread *thr = thr_ptr;
+	worker_state state = THR_IDLE; // Init to silence a warning
+
+	while (true) {
+		// Wait for work.
+		mythread_sync(thr->mutex) {
+			while (true) {
+				// The thread is already idle so if we are
+				// requested to stop, just set the state.
+				if (thr->state == THR_STOP) {
+					thr->state = THR_IDLE;
+					mythread_cond_signal(&thr->cond);
+				}
+
+				state = thr->state;
+				if (state != THR_IDLE)
+					break;
+
+				mythread_cond_wait(&thr->cond, &thr->mutex);
+			}
+		}
+
+		assert(state != THR_IDLE);
+		assert(state != THR_STOP);
+
+		if (state <= THR_FINISH)
+			state = worker_encode(thr, state);
+
+		if (state == THR_EXIT)
+			break;
+
+		// Mark the thread as idle unless the main thread has
+		// told us to exit. Signal is needed for the case
+		// where the main thread is waiting for the threads to stop.
+		mythread_sync(thr->mutex) {
+			if (thr->state != THR_EXIT) {
+				thr->state = THR_IDLE;
+				mythread_cond_signal(&thr->cond);
+			}
+		}
+
+		mythread_sync(thr->coder->mutex) {
+			// Mark the output buffer as finished if
+			// no errors occurred.
+			thr->outbuf->finished = state == THR_FINISH;
+
+			// Update the main progress info.
+			thr->coder->progress_in
+					+= thr->outbuf->uncompressed_size;
+			thr->coder->progress_out += thr->outbuf->size;
+			thr->progress_in = 0;
+			thr->progress_out = 0;
+
+			// Return this thread to the stack of free threads.
+			thr->next = thr->coder->threads_free;
+			thr->coder->threads_free = thr;
+
+			mythread_cond_signal(&thr->coder->cond);
+		}
+	}
+
+	// Exiting, free the resources.
+	mythread_mutex_destroy(&thr->mutex);
+	mythread_cond_destroy(&thr->cond);
+
+	lzma_next_end(&thr->block_encoder, thr->allocator);
+	lzma_free(thr->in, thr->allocator);
+	return MYTHREAD_RET_VALUE;
+}
+
+
+/// Make the threads stop but not exit. Optionally wait for them to stop.
+static void
+threads_stop(lzma_stream_coder *coder, bool wait_for_threads)
+{
+	// Tell the threads to stop.
+	for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
+		mythread_sync(coder->threads[i].mutex) {
+			coder->threads[i].state = THR_STOP;
+			mythread_cond_signal(&coder->threads[i].cond);
+		}
+	}
+
+	if (!wait_for_threads)
+		return;
+
+	// Wait for the threads to settle in the idle state.
+	for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
+		mythread_sync(coder->threads[i].mutex) {
+			while (coder->threads[i].state != THR_IDLE)
+				mythread_cond_wait(&coder->threads[i].cond,
+						&coder->threads[i].mutex);
+		}
+	}
+
+	return;
+}
+
+
+/// Stop the threads and free the resources associated with them.
+/// Wait until the threads have exited.
+static void
+threads_end(lzma_stream_coder *coder, const lzma_allocator *allocator)
+{
+	for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
+		mythread_sync(coder->threads[i].mutex) {
+			coder->threads[i].state = THR_EXIT;
+			mythread_cond_signal(&coder->threads[i].cond);
+		}
+	}
+
+	for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
+		int ret = mythread_join(coder->threads[i].thread_id);
+		assert(ret == 0);
+		(void)ret;
+	}
+
+	lzma_free(coder->threads, allocator);
+	return;
+}
+
+
+/// Initialize a new worker_thread structure and create a new thread.
+static lzma_ret
+initialize_new_thread(lzma_stream_coder *coder,
+		const lzma_allocator *allocator)
+{
+	worker_thread *thr = &coder->threads[coder->threads_initialized];
+
+	thr->in = lzma_alloc(coder->block_size, allocator);
+	if (thr->in == NULL)
+		return LZMA_MEM_ERROR;
+
+	if (mythread_mutex_init(&thr->mutex))
+		goto error_mutex;
+
+	if (mythread_cond_init(&thr->cond))
+		goto error_cond;
+
+	thr->state = THR_IDLE;
+	thr->allocator = allocator;
+	thr->coder = coder;
+	thr->progress_in = 0;
+	thr->progress_out = 0;
+	thr->block_encoder = LZMA_NEXT_CODER_INIT;
+
+	if (mythread_create(&thr->thread_id, &worker_start, thr))
+		goto error_thread;
+
+	++coder->threads_initialized;
+	coder->thr = thr;
+
+	return LZMA_OK;
+
+error_thread:
+	mythread_cond_destroy(&thr->cond);
+
+error_cond:
+	mythread_mutex_destroy(&thr->mutex);
+
+error_mutex:
+	lzma_free(thr->in, allocator);
+	return LZMA_MEM_ERROR;
+}
+
+
+static lzma_ret
+get_thread(lzma_stream_coder *coder, const lzma_allocator *allocator)
+{
+	// If there are no free output subqueues, there is no
+	// point to try getting a thread.
+	if (!lzma_outq_has_buf(&coder->outq))
+		return LZMA_OK;
+
+	// If there is a free structure on the stack, use it.
+	mythread_sync(coder->mutex) {
+		if (coder->threads_free != NULL) {
+			coder->thr = coder->threads_free;
+			coder->threads_free = coder->threads_free->next;
+		}
+	}
+
+	if (coder->thr == NULL) {
+		// If there are no uninitialized structures left, return.
+		if (coder->threads_initialized == coder->threads_max)
+			return LZMA_OK;
+
+		// Initialize a new thread.
+		return_if_error(initialize_new_thread(coder, allocator));
+	}
+
+	// Reset the parts of the thread state that have to be done
+	// in the main thread.
+	mythread_sync(coder->thr->mutex) {
+		coder->thr->state = THR_RUN;
+		coder->thr->in_size = 0;
+		coder->thr->outbuf = lzma_outq_get_buf(&coder->outq);
+		mythread_cond_signal(&coder->thr->cond);
+	}
+
+	return LZMA_OK;
+}
+
+
+static lzma_ret
+stream_encode_in(lzma_stream_coder *coder, const lzma_allocator *allocator,
+		const uint8_t *restrict in, size_t *restrict in_pos,
+		size_t in_size, lzma_action action)
+{
+	while (*in_pos < in_size
+			|| (coder->thr != NULL && action != LZMA_RUN)) {
+		if (coder->thr == NULL) {
+			// Get a new thread.
+			const lzma_ret ret = get_thread(coder, allocator);
+			if (coder->thr == NULL)
+				return ret;
+		}
+
+		// Copy the input data to thread's buffer.
+		size_t thr_in_size = coder->thr->in_size;
+		lzma_bufcpy(in, in_pos, in_size, coder->thr->in,
+				&thr_in_size, coder->block_size);
+
+		// Tell the Block encoder to finish if
+		//  - it has got block_size bytes of input; or
+		//  - all input was used and LZMA_FINISH, LZMA_FULL_FLUSH,
+		//    or LZMA_FULL_BARRIER was used.
+		//
+		// TODO: LZMA_SYNC_FLUSH and LZMA_SYNC_BARRIER.
+		const bool finish = thr_in_size == coder->block_size
+				|| (*in_pos == in_size && action != LZMA_RUN);
+
+		bool block_error = false;
+
+		mythread_sync(coder->thr->mutex) {
+			if (coder->thr->state == THR_IDLE) {
+				// Something has gone wrong with the Block
+				// encoder. It has set coder->thread_error
+				// which we will read a few lines later.
+				block_error = true;
+			} else {
+				// Tell the Block encoder its new amount
+				// of input and update the state if needed.
+				coder->thr->in_size = thr_in_size;
+
+				if (finish)
+					coder->thr->state = THR_FINISH;
+
+				mythread_cond_signal(&coder->thr->cond);
+			}
+		}
+
+		if (block_error) {
+			lzma_ret ret;
+
+			mythread_sync(coder->mutex) {
+				ret = coder->thread_error;
+			}
+
+			return ret;
+		}
+
+		if (finish)
+			coder->thr = NULL;
+	}
+
+	return LZMA_OK;
+}
+
+
+/// Wait until more input can be consumed, more output can be read, or
+/// an optional timeout is reached.
+static bool
+wait_for_work(lzma_stream_coder *coder, mythread_condtime *wait_abs,
+		bool *has_blocked, bool has_input)
+{
+	if (coder->timeout != 0 && !*has_blocked) {
+		// Every time when stream_encode_mt() is called via
+		// lzma_code(), *has_blocked starts as false. We set it
+		// to true here and calculate the absolute time when
+		// we must return if there's nothing to do.
+		//
+		// The idea of *has_blocked is to avoid unneeded calls
+		// to mythread_condtime_set(), which may do a syscall
+		// depending on the operating system.
+		*has_blocked = true;
+		mythread_condtime_set(wait_abs, &coder->cond, coder->timeout);
+	}
+
+	bool timed_out = false;
+
+	mythread_sync(coder->mutex) {
+		// There are four things that we wait. If one of them
+		// becomes possible, we return.
+		//  - If there is input left, we need to get a free
+		//    worker thread and an output buffer for it.
+		//  - Data ready to be read from the output queue.
+		//  - A worker thread indicates an error.
+		//  - Time out occurs.
+		while ((!has_input || coder->threads_free == NULL
+					|| !lzma_outq_has_buf(&coder->outq))
+				&& !lzma_outq_is_readable(&coder->outq)
+				&& coder->thread_error == LZMA_OK
+				&& !timed_out) {
+			if (coder->timeout != 0)
+				timed_out = mythread_cond_timedwait(
+						&coder->cond, &coder->mutex,
+						wait_abs) != 0;
+			else
+				mythread_cond_wait(&coder->cond,
+						&coder->mutex);
+		}
+	}
+
+	return timed_out;
+}
+
+
+static lzma_ret
+stream_encode_mt(void *coder_ptr, const lzma_allocator *allocator,
+		const uint8_t *restrict in, size_t *restrict in_pos,
+		size_t in_size, uint8_t *restrict out,
+		size_t *restrict out_pos, size_t out_size, lzma_action action)
+{
+	lzma_stream_coder *coder = coder_ptr;
+
+	switch (coder->sequence) {
+	case SEQ_STREAM_HEADER:
+		lzma_bufcpy(coder->header, &coder->header_pos,
+				sizeof(coder->header),
+				out, out_pos, out_size);
+		if (coder->header_pos < sizeof(coder->header))
+			return LZMA_OK;
+
+		coder->header_pos = 0;
+		coder->sequence = SEQ_BLOCK;
+
+	// Fall through
+
+	case SEQ_BLOCK: {
+		// Initialized to silence warnings.
+		lzma_vli unpadded_size = 0;
+		lzma_vli uncompressed_size = 0;
+		lzma_ret ret = LZMA_OK;
+
+		// These are for wait_for_work().
+		bool has_blocked = false;
+		mythread_condtime wait_abs;
+
+		while (true) {
+			mythread_sync(coder->mutex) {
+				// Check for Block encoder errors.
+				ret = coder->thread_error;
+				if (ret != LZMA_OK) {
+					assert(ret != LZMA_STREAM_END);
+					break;
+				}
+
+				// Try to read compressed data to out[].
+				ret = lzma_outq_read(&coder->outq,
+						out, out_pos, out_size,
+						&unpadded_size,
+						&uncompressed_size);
+			}
+
+			if (ret == LZMA_STREAM_END) {
+				// End of Block. Add it to the Index.
+				ret = lzma_index_append(coder->index,
+						allocator, unpadded_size,
+						uncompressed_size);
+
+				// If we didn't fill the output buffer yet,
+				// try to read more data. Maybe the next
+				// outbuf has been finished already too.
+				if (*out_pos < out_size)
+					continue;
+			}
+
+			if (ret != LZMA_OK) {
+				// coder->thread_error was set or
+				// lzma_index_append() failed.
+				threads_stop(coder, false);
+				return ret;
+			}
+
+			// Try to give uncompressed data to a worker thread.
+			ret = stream_encode_in(coder, allocator,
+					in, in_pos, in_size, action);
+			if (ret != LZMA_OK) {
+				threads_stop(coder, false);
+				return ret;
+			}
+
+			// See if we should wait or return.
+			//
+			// TODO: LZMA_SYNC_FLUSH and LZMA_SYNC_BARRIER.
+			if (*in_pos == in_size) {
+				// LZMA_RUN: More data is probably coming
+				// so return to let the caller fill the
+				// input buffer.
+				if (action == LZMA_RUN)
+					return LZMA_OK;
+
+				// LZMA_FULL_BARRIER: The same as with
+				// LZMA_RUN but tell the caller that the
+				// barrier was completed.
+				if (action == LZMA_FULL_BARRIER)
+					return LZMA_STREAM_END;
+
+				// Finishing or flushing isn't completed until
+				// all input data has been encoded and copied
+				// to the output buffer.
+				if (lzma_outq_is_empty(&coder->outq)) {
+					// LZMA_FINISH: Continue to encode
+					// the Index field.
+					if (action == LZMA_FINISH)
+						break;
+
+					// LZMA_FULL_FLUSH: Return to tell
+					// the caller that flushing was
+					// completed.
+					if (action == LZMA_FULL_FLUSH)
+						return LZMA_STREAM_END;
+				}
+			}
+
+			// Return if there is no output space left.
+			// This check must be done after testing the input
+			// buffer, because we might want to use a different
+			// return code.
+			if (*out_pos == out_size)
+				return LZMA_OK;
+
+			// Neither in nor out has been used completely.
+			// Wait until there's something we can do.
+			if (wait_for_work(coder, &wait_abs, &has_blocked,
+					*in_pos < in_size))
+				return LZMA_TIMED_OUT;
+		}
+
+		// All Blocks have been encoded and the threads have stopped.
+		// Prepare to encode the Index field.
+		return_if_error(lzma_index_encoder_init(
+				&coder->index_encoder, allocator,
+				coder->index));
+		coder->sequence = SEQ_INDEX;
+
+		// Update the progress info to take the Index and
+		// Stream Footer into account. Those are very fast to encode
+		// so in terms of progress information they can be thought
+		// to be ready to be copied out.
+		coder->progress_out += lzma_index_size(coder->index)
+				+ LZMA_STREAM_HEADER_SIZE;
+	}
+
+	// Fall through
+
+	case SEQ_INDEX: {
+		// Call the Index encoder. It doesn't take any input, so
+		// those pointers can be NULL.
+		const lzma_ret ret = coder->index_encoder.code(
+				coder->index_encoder.coder, allocator,
+				NULL, NULL, 0,
+				out, out_pos, out_size, LZMA_RUN);
+		if (ret != LZMA_STREAM_END)
+			return ret;
+
+		// Encode the Stream Footer into coder->buffer.
+		coder->stream_flags.backward_size
+				= lzma_index_size(coder->index);
+		if (lzma_stream_footer_encode(&coder->stream_flags,
+				coder->header) != LZMA_OK)
+			return LZMA_PROG_ERROR;
+
+		coder->sequence = SEQ_STREAM_FOOTER;
+	}
+
+	// Fall through
+
+	case SEQ_STREAM_FOOTER:
+		lzma_bufcpy(coder->header, &coder->header_pos,
+				sizeof(coder->header),
+				out, out_pos, out_size);
+		return coder->header_pos < sizeof(coder->header)
+				? LZMA_OK : LZMA_STREAM_END;
+	}
+
+	assert(0);
+	return LZMA_PROG_ERROR;
+}
+
+
+static void
+stream_encoder_mt_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+	lzma_stream_coder *coder = coder_ptr;
+
+	// Threads must be killed before the output queue can be freed.
+	threads_end(coder, allocator);
+	lzma_outq_end(&coder->outq, allocator);
+
+	for (size_t i = 0; coder->filters[i].id != LZMA_VLI_UNKNOWN; ++i)
+		lzma_free(coder->filters[i].options, allocator);
+
+	lzma_next_end(&coder->index_encoder, allocator);
+	lzma_index_end(coder->index, allocator);
+
+	mythread_cond_destroy(&coder->cond);
+	mythread_mutex_destroy(&coder->mutex);
+
+	lzma_free(coder, allocator);
+	return;
+}
+
+
+/// Options handling for lzma_stream_encoder_mt_init() and
+/// lzma_stream_encoder_mt_memusage()
+static lzma_ret
+get_options(const lzma_mt *options, lzma_options_easy *opt_easy,
+		const lzma_filter **filters, uint64_t *block_size,
+		uint64_t *outbuf_size_max)
+{
+	// Validate some of the options.
+	if (options == NULL)
+		return LZMA_PROG_ERROR;
+
+	if (options->flags != 0 || options->threads == 0
+			|| options->threads > LZMA_THREADS_MAX)
+		return LZMA_OPTIONS_ERROR;
+
+	if (options->filters != NULL) {
+		// Filter chain was given, use it as is.
+		*filters = options->filters;
+	} else {
+		// Use a preset.
+		if (lzma_easy_preset(opt_easy, options->preset))
+			return LZMA_OPTIONS_ERROR;
+
+		*filters = opt_easy->filters;
+	}
+
+	// Block size
+	if (options->block_size > 0) {
+		if (options->block_size > BLOCK_SIZE_MAX)
+			return LZMA_OPTIONS_ERROR;
+
+		*block_size = options->block_size;
+	} else {
+		// Determine the Block size from the filter chain.
+		*block_size = lzma_mt_block_size(*filters);
+		if (*block_size == 0)
+			return LZMA_OPTIONS_ERROR;
+
+		assert(*block_size <= BLOCK_SIZE_MAX);
+	}
+
+	// Calculate the maximum amount output that a single output buffer
+	// may need to hold. This is the same as the maximum total size of
+	// a Block.
+	*outbuf_size_max = lzma_block_buffer_bound64(*block_size);
+	if (*outbuf_size_max == 0)
+		return LZMA_MEM_ERROR;
+
+	return LZMA_OK;
+}
+
+
+static void
+get_progress(void *coder_ptr, uint64_t *progress_in, uint64_t *progress_out)
+{
+	lzma_stream_coder *coder = coder_ptr;
+
+	// Lock coder->mutex to prevent finishing threads from moving their
+	// progress info from the worker_thread structure to lzma_stream_coder.
+	mythread_sync(coder->mutex) {
+		*progress_in = coder->progress_in;
+		*progress_out = coder->progress_out;
+
+		for (size_t i = 0; i < coder->threads_initialized; ++i) {
+			mythread_sync(coder->threads[i].mutex) {
+				*progress_in += coder->threads[i].progress_in;
+				*progress_out += coder->threads[i]
+						.progress_out;
+			}
+		}
+	}
+
+	return;
+}
+
+
+static lzma_ret
+stream_encoder_mt_init(lzma_next_coder *next, const lzma_allocator *allocator,
+		const lzma_mt *options)
+{
+	lzma_next_coder_init(&stream_encoder_mt_init, next, allocator);
+
+	// Get the filter chain.
+	lzma_options_easy easy;
+	const lzma_filter *filters;
+	uint64_t block_size;
+	uint64_t outbuf_size_max;
+	return_if_error(get_options(options, &easy, &filters,
+			&block_size, &outbuf_size_max));
+
+#if SIZE_MAX < UINT64_MAX
+	if (block_size > SIZE_MAX)
+		return LZMA_MEM_ERROR;
+#endif
+
+	// Validate the filter chain so that we can give an error in this
+	// function instead of delaying it to the first call to lzma_code().
+	// The memory usage calculation verifies the filter chain as
+	// a side effect so we take advatange of that.
+	if (lzma_raw_encoder_memusage(filters) == UINT64_MAX)
+		return LZMA_OPTIONS_ERROR;
+
+	// Validate the Check ID.
+	if ((unsigned int)(options->check) > LZMA_CHECK_ID_MAX)
+		return LZMA_PROG_ERROR;
+
+	if (!lzma_check_is_supported(options->check))
+		return LZMA_UNSUPPORTED_CHECK;
+
+	// Allocate and initialize the base structure if needed.
+	lzma_stream_coder *coder = next->coder;
+	if (coder == NULL) {
+		coder = lzma_alloc(sizeof(lzma_stream_coder), allocator);
+		if (coder == NULL)
+			return LZMA_MEM_ERROR;
+
+		next->coder = coder;
+
+		// For the mutex and condition variable initializations
+		// the error handling has to be done here because
+		// stream_encoder_mt_end() doesn't know if they have
+		// already been initialized or not.
+		if (mythread_mutex_init(&coder->mutex)) {
+			lzma_free(coder, allocator);
+			next->coder = NULL;
+			return LZMA_MEM_ERROR;
+		}
+
+		if (mythread_cond_init(&coder->cond)) {
+			mythread_mutex_destroy(&coder->mutex);
+			lzma_free(coder, allocator);
+			next->coder = NULL;
+			return LZMA_MEM_ERROR;
+		}
+
+		next->code = &stream_encode_mt;
+		next->end = &stream_encoder_mt_end;
+		next->get_progress = &get_progress;
+// 		next->update = &stream_encoder_mt_update;
+
+		coder->filters[0].id = LZMA_VLI_UNKNOWN;
+		coder->index_encoder = LZMA_NEXT_CODER_INIT;
+		coder->index = NULL;
+		memzero(&coder->outq, sizeof(coder->outq));
+		coder->threads = NULL;
+		coder->threads_max = 0;
+		coder->threads_initialized = 0;
+	}
+
+	// Basic initializations
+	coder->sequence = SEQ_STREAM_HEADER;
+	coder->block_size = (size_t)(block_size);
+	coder->thread_error = LZMA_OK;
+	coder->thr = NULL;
+
+	// Allocate the thread-specific base structures.
+	assert(options->threads > 0);
+	if (coder->threads_max != options->threads) {
+		threads_end(coder, allocator);
+
+		coder->threads = NULL;
+		coder->threads_max = 0;
+
+		coder->threads_initialized = 0;
+		coder->threads_free = NULL;
+
+		coder->threads = lzma_alloc(
+				options->threads * sizeof(worker_thread),
+				allocator);
+		if (coder->threads == NULL)
+			return LZMA_MEM_ERROR;
+
+		coder->threads_max = options->threads;
+	} else {
+		// Reuse the old structures and threads. Tell the running
+		// threads to stop and wait until they have stopped.
+		threads_stop(coder, true);
+	}
+
+	// Output queue
+	return_if_error(lzma_outq_init(&coder->outq, allocator,
+			outbuf_size_max, options->threads));
+
+	// Timeout
+	coder->timeout = options->timeout;
+
+	// Free the old filter chain and copy the new one.
+	for (size_t i = 0; coder->filters[i].id != LZMA_VLI_UNKNOWN; ++i)
+		lzma_free(coder->filters[i].options, allocator);
+
+	return_if_error(lzma_filters_copy(
+			filters, coder->filters, allocator));
+
+	// Index
+	lzma_index_end(coder->index, allocator);
+	coder->index = lzma_index_init(allocator);
+	if (coder->index == NULL)
+		return LZMA_MEM_ERROR;
+
+	// Stream Header
+	coder->stream_flags.version = 0;
+	coder->stream_flags.check = options->check;
+	return_if_error(lzma_stream_header_encode(
+			&coder->stream_flags, coder->header));
+
+	coder->header_pos = 0;
+
+	// Progress info
+	coder->progress_in = 0;
+	coder->progress_out = LZMA_STREAM_HEADER_SIZE;
+
+	return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_stream_encoder_mt(lzma_stream *strm, const lzma_mt *options)
+{
+	lzma_next_strm_init(stream_encoder_mt_init, strm, options);
+
+	strm->internal->supported_actions[LZMA_RUN] = true;
+// 	strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true;
+	strm->internal->supported_actions[LZMA_FULL_FLUSH] = true;
+	strm->internal->supported_actions[LZMA_FULL_BARRIER] = true;
+	strm->internal->supported_actions[LZMA_FINISH] = true;
+
+	return LZMA_OK;
+}
+
+
+// This function name is a monster but it's consistent with the older
+// monster names. :-( 31 chars is the max that C99 requires so in that
+// sense it's not too long. ;-)
+extern LZMA_API(uint64_t)
+lzma_stream_encoder_mt_memusage(const lzma_mt *options)
+{
+	lzma_options_easy easy;
+	const lzma_filter *filters;
+	uint64_t block_size;
+	uint64_t outbuf_size_max;
+
+	if (get_options(options, &easy, &filters, &block_size,
+			&outbuf_size_max) != LZMA_OK)
+		return UINT64_MAX;
+
+	// Memory usage of the input buffers
+	const uint64_t inbuf_memusage = options->threads * block_size;
+
+	// Memory usage of the filter encoders
+	uint64_t filters_memusage = lzma_raw_encoder_memusage(filters);
+	if (filters_memusage == UINT64_MAX)
+		return UINT64_MAX;
+
+	filters_memusage *= options->threads;
+
+	// Memory usage of the output queue
+	const uint64_t outq_memusage = lzma_outq_memusage(
+			outbuf_size_max, options->threads);
+	if (outq_memusage == UINT64_MAX)
+		return UINT64_MAX;
+
+	// Sum them with overflow checking.
+	uint64_t total_memusage = LZMA_MEMUSAGE_BASE
+			+ sizeof(lzma_stream_coder)
+			+ options->threads * sizeof(worker_thread);
+
+	if (UINT64_MAX - total_memusage < inbuf_memusage)
+		return UINT64_MAX;
+
+	total_memusage += inbuf_memusage;
+
+	if (UINT64_MAX - total_memusage < filters_memusage)
+		return UINT64_MAX;
+
+	total_memusage += filters_memusage;
+
+	if (UINT64_MAX - total_memusage < outq_memusage)
+		return UINT64_MAX;
+
+	return total_memusage + outq_memusage;
+}