summaryrefslogtreecommitdiffstats
path: root/liblzma/lzma/lzma2_encoder.c
blob: 992720ca6d5aca80e161863904bf5ea707a49d87 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
///////////////////////////////////////////////////////////////////////////////
//
/// \file       lzma2_encoder.c
/// \brief      LZMA2 encoder
///
//  Authors:    Igor Pavlov
//              Lasse Collin
//
//  This file has been put into the public domain.
//  You can do whatever you want with this file.
//
///////////////////////////////////////////////////////////////////////////////

#include "lz_encoder.h"
#include "lzma_encoder.h"
#include "fastpos.h"
#include "lzma2_encoder.h"


struct lzma_coder_s {
	enum {
		SEQ_INIT,
		SEQ_LZMA_ENCODE,
		SEQ_LZMA_COPY,
		SEQ_UNCOMPRESSED_HEADER,
		SEQ_UNCOMPRESSED_COPY,
	} sequence;

	/// LZMA encoder
	lzma_coder *lzma;

	/// LZMA options currently in use.
	lzma_options_lzma opt_cur;

	bool need_properties;
	bool need_state_reset;
	bool need_dictionary_reset;

	/// Uncompressed size of a chunk
	size_t uncompressed_size;

	/// Compressed size of a chunk (excluding headers); this is also used
	/// to indicate the end of buf[] in SEQ_LZMA_COPY.
	size_t compressed_size;

	/// Read position in buf[]
	size_t buf_pos;

	/// Buffer to hold the chunk header and LZMA compressed data
	uint8_t buf[LZMA2_HEADER_MAX + LZMA2_CHUNK_MAX];
};


static void
lzma2_header_lzma(lzma_coder *coder)
{
	assert(coder->uncompressed_size > 0);
	assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX);
	assert(coder->compressed_size > 0);
	assert(coder->compressed_size <= LZMA2_CHUNK_MAX);

	size_t pos;

	if (coder->need_properties) {
		pos = 0;

		if (coder->need_dictionary_reset)
			coder->buf[pos] = 0x80 + (3 << 5);
		else
			coder->buf[pos] = 0x80 + (2 << 5);
	} else {
		pos = 1;

		if (coder->need_state_reset)
			coder->buf[pos] = 0x80 + (1 << 5);
		else
			coder->buf[pos] = 0x80;
	}

	// Set the start position for copying.
	coder->buf_pos = pos;

	// Uncompressed size
	size_t size = coder->uncompressed_size - 1;
	coder->buf[pos++] += size >> 16;
	coder->buf[pos++] = (size >> 8) & 0xFF;
	coder->buf[pos++] = size & 0xFF;

	// Compressed size
	size = coder->compressed_size - 1;
	coder->buf[pos++] = size >> 8;
	coder->buf[pos++] = size & 0xFF;

	// Properties, if needed
	if (coder->need_properties)
		lzma_lzma_lclppb_encode(&coder->opt_cur, coder->buf + pos);

	coder->need_properties = false;
	coder->need_state_reset = false;
	coder->need_dictionary_reset = false;

	// The copying code uses coder->compressed_size to indicate the end
	// of coder->buf[], so we need add the maximum size of the header here.
	coder->compressed_size += LZMA2_HEADER_MAX;

	return;
}


static void
lzma2_header_uncompressed(lzma_coder *coder)
{
	assert(coder->uncompressed_size > 0);
	assert(coder->uncompressed_size <= LZMA2_CHUNK_MAX);

	// If this is the first chunk, we need to include dictionary
	// reset indicator.
	if (coder->need_dictionary_reset)
		coder->buf[0] = 1;
	else
		coder->buf[0] = 2;

	coder->need_dictionary_reset = false;

	// "Compressed" size
	coder->buf[1] = (coder->uncompressed_size - 1) >> 8;
	coder->buf[2] = (coder->uncompressed_size - 1) & 0xFF;

	// Set the start position for copying.
	coder->buf_pos = 0;
	return;
}


static lzma_ret
lzma2_encode(lzma_coder *restrict coder, lzma_mf *restrict mf,
		uint8_t *restrict out, size_t *restrict out_pos,
		size_t out_size)
{
	while (*out_pos < out_size)
	switch (coder->sequence) {
	case SEQ_INIT:
		// If there's no input left and we are flushing or finishing,
		// don't start a new chunk.
		if (mf_unencoded(mf) == 0) {
			// Write end of payload marker if finishing.
			if (mf->action == LZMA_FINISH)
				out[(*out_pos)++] = 0;

			return mf->action == LZMA_RUN
					? LZMA_OK : LZMA_STREAM_END;
		}

		if (coder->need_state_reset)
			return_if_error(lzma_lzma_encoder_reset(
					coder->lzma, &coder->opt_cur));

		coder->uncompressed_size = 0;
		coder->compressed_size = 0;
		coder->sequence = SEQ_LZMA_ENCODE;

	// Fall through

	case SEQ_LZMA_ENCODE: {
		// Calculate how much more uncompressed data this chunk
		// could accept.
		const uint32_t left = LZMA2_UNCOMPRESSED_MAX
				- coder->uncompressed_size;
		uint32_t limit;

		if (left < mf->match_len_max) {
			// Must flush immediately since the next LZMA symbol
			// could make the uncompressed size of the chunk too
			// big.
			limit = 0;
		} else {
			// Calculate maximum read_limit that is OK from point
			// of view of LZMA2 chunk size.
			limit = mf->read_pos - mf->read_ahead
					+ left - mf->match_len_max;
		}

		// Save the start position so that we can update
		// coder->uncompressed_size.
		const uint32_t read_start = mf->read_pos - mf->read_ahead;

		// Call the LZMA encoder until the chunk is finished.
		const lzma_ret ret = lzma_lzma_encode(coder->lzma, mf,
				coder->buf + LZMA2_HEADER_MAX,
				&coder->compressed_size,
				LZMA2_CHUNK_MAX, limit);

		coder->uncompressed_size += mf->read_pos - mf->read_ahead
				- read_start;

		assert(coder->compressed_size <= LZMA2_CHUNK_MAX);
		assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX);

		if (ret != LZMA_STREAM_END)
			return LZMA_OK;

		// See if the chunk compressed. If it didn't, we encode it
		// as uncompressed chunk. This saves a few bytes of space
		// and makes decoding faster.
		if (coder->compressed_size >= coder->uncompressed_size) {
			coder->uncompressed_size += mf->read_ahead;
			assert(coder->uncompressed_size
					<= LZMA2_UNCOMPRESSED_MAX);
			mf->read_ahead = 0;
			lzma2_header_uncompressed(coder);
			coder->need_state_reset = true;
			coder->sequence = SEQ_UNCOMPRESSED_HEADER;
			break;
		}

		// The chunk did compress at least by one byte, so we store
		// the chunk as LZMA.
		lzma2_header_lzma(coder);

		coder->sequence = SEQ_LZMA_COPY;
	}

	// Fall through

	case SEQ_LZMA_COPY:
		// Copy the compressed chunk along its headers to the
		// output buffer.
		lzma_bufcpy(coder->buf, &coder->buf_pos,
				coder->compressed_size,
				out, out_pos, out_size);
		if (coder->buf_pos != coder->compressed_size)
			return LZMA_OK;

		coder->sequence = SEQ_INIT;
		break;

	case SEQ_UNCOMPRESSED_HEADER:
		// Copy the three-byte header to indicate uncompressed chunk.
		lzma_bufcpy(coder->buf, &coder->buf_pos,
				LZMA2_HEADER_UNCOMPRESSED,
				out, out_pos, out_size);
		if (coder->buf_pos != LZMA2_HEADER_UNCOMPRESSED)
			return LZMA_OK;

		coder->sequence = SEQ_UNCOMPRESSED_COPY;

	// Fall through

	case SEQ_UNCOMPRESSED_COPY:
		// Copy the uncompressed data as is from the dictionary
		// to the output buffer.
		mf_read(mf, out, out_pos, out_size, &coder->uncompressed_size);
		if (coder->uncompressed_size != 0)
			return LZMA_OK;

		coder->sequence = SEQ_INIT;
		break;
	}

	return LZMA_OK;
}


static void
lzma2_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
{
	lzma_free(coder->lzma, allocator);
	lzma_free(coder, allocator);
	return;
}


static lzma_ret
lzma2_encoder_options_update(lzma_coder *coder, const lzma_filter *filter)
{
	// New options can be set only when there is no incomplete chunk.
	// This is the case at the beginning of the raw stream and right
	// after LZMA_SYNC_FLUSH.
	if (filter->options == NULL || coder->sequence != SEQ_INIT)
		return LZMA_PROG_ERROR;

	// Look if there are new options. At least for now,
	// only lc/lp/pb can be changed.
	const lzma_options_lzma *opt = filter->options;
	if (coder->opt_cur.lc != opt->lc || coder->opt_cur.lp != opt->lp
			|| coder->opt_cur.pb != opt->pb) {
		// Validate the options.
		if (opt->lc > LZMA_LCLP_MAX || opt->lp > LZMA_LCLP_MAX
				|| opt->lc + opt->lp > LZMA_LCLP_MAX
				|| opt->pb > LZMA_PB_MAX)
			return LZMA_OPTIONS_ERROR;

		// The new options will be used when the encoder starts
		// a new LZMA2 chunk.
		coder->opt_cur.lc = opt->lc;
		coder->opt_cur.lp = opt->lp;
		coder->opt_cur.pb = opt->pb;
		coder->need_properties = true;
		coder->need_state_reset = true;
	}

	return LZMA_OK;
}


static lzma_ret
lzma2_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator,
		const void *options, lzma_lz_options *lz_options)
{
	if (options == NULL)
		return LZMA_PROG_ERROR;

	if (lz->coder == NULL) {
		lz->coder = lzma_alloc(sizeof(lzma_coder), allocator);
		if (lz->coder == NULL)
			return LZMA_MEM_ERROR;

		lz->code = &lzma2_encode;
		lz->end = &lzma2_encoder_end;
		lz->options_update = &lzma2_encoder_options_update;

		lz->coder->lzma = NULL;
	}

	lz->coder->opt_cur = *(const lzma_options_lzma *)(options);

	lz->coder->sequence = SEQ_INIT;
	lz->coder->need_properties = true;
	lz->coder->need_state_reset = false;
	lz->coder->need_dictionary_reset
			= lz->coder->opt_cur.preset_dict == NULL
			|| lz->coder->opt_cur.preset_dict_size == 0;

	// Initialize LZMA encoder
	return_if_error(lzma_lzma_encoder_create(&lz->coder->lzma, allocator,
			&lz->coder->opt_cur, lz_options));

	// Make sure that we will always have enough history available in
	// case we need to use uncompressed chunks. They are used when the
	// compressed size of a chunk is not smaller than the uncompressed
	// size, so we need to have at least LZMA2_COMPRESSED_MAX bytes
	// history available.
	if (lz_options->before_size + lz_options->dict_size < LZMA2_CHUNK_MAX)
		lz_options->before_size
				= LZMA2_CHUNK_MAX - lz_options->dict_size;

	return LZMA_OK;
}


extern lzma_ret
lzma_lzma2_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
		const lzma_filter_info *filters)
{
	return lzma_lz_encoder_init(
			next, allocator, filters, &lzma2_encoder_init);
}


extern uint64_t
lzma_lzma2_encoder_memusage(const void *options)
{
	const uint64_t lzma_mem = lzma_lzma_encoder_memusage(options);
	if (lzma_mem == UINT64_MAX)
		return UINT64_MAX;

	return sizeof(lzma_coder) + lzma_mem;
}


extern lzma_ret
lzma_lzma2_props_encode(const void *options, uint8_t *out)
{
	const lzma_options_lzma *const opt = options;
	uint32_t d = my_max(opt->dict_size, LZMA_DICT_SIZE_MIN);

	// Round up to the next 2^n - 1 or 2^n + 2^(n - 1) - 1 depending
	// on which one is the next:
	--d;
	d |= d >> 2;
	d |= d >> 3;
	d |= d >> 4;
	d |= d >> 8;
	d |= d >> 16;

	// Get the highest two bits using the proper encoding:
	if (d == UINT32_MAX)
		out[0] = 40;
	else
		out[0] = get_pos_slot(d + 1) - 24;

	return LZMA_OK;
}