summaryrefslogtreecommitdiffstats
path: root/doc/TclZlib.3
blob: a257a39a4fc10fcb8f98d7530fc7dfff15052313 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
'\"
'\" Copyright (c) 2008 Donal K. Fellows
'\"
'\" See the file "license.terms" for information on usage and redistribution
'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
'\"
.TH TclZlib 3 8.6 Tcl "Tcl Library Procedures"
.so man.macros
.BS
'\" Note:  do not modify the .SH NAME line immediately below!
.SH NAME
Tcl_ZlibAdler32, Tcl_ZlibCRC32, Tcl_ZlibDeflate, Tcl_ZlibInflate, Tcl_ZlibStreamChecksum, Tcl_ZlibStreamClose, Tcl_ZlibStreamEof, Tcl_ZlibStreamGet, Tcl_ZlibStreamGetCommandName, Tcl_ZlibStreamInit, Tcl_ZlibStreamPut \- compression and decompression functions
.SH SYNOPSIS
.nf
#include <tcl.h>
.sp
int
\fBTcl_ZlibDeflate\fR(\fIinterp, format, dataObj, level, dictObj\fR)
.sp
int
\fBTcl_ZlibInflate\fR(\fIinterp, format, dataObj, bufferSize, dictObj\fR)
.sp
unsigned int
\fBTcl_ZlibCRC32\fR(\fIinitValue, bytes, length\fR)
.sp
unsigned int
\fBTcl_ZlibAdler32\fR(\fIinitValue, bytes, length\fR)
.sp
int
\fBTcl_ZlibStreamInit\fR(\fIinterp, mode, format, level, dictObj, zshandlePtr\fR)
.sp
Tcl_Obj *
\fBTcl_ZlibStreamGetCommandName\fR(\fIzshandle\fR)
.sp
int
\fBTcl_ZlibStreamEof\fR(\fIzshandle\fR)
.sp
int
\fBTcl_ZlibStreamClose\fR(\fIzshandle\fR)
.sp
int
\fBTcl_ZlibStreamReset\fR(\fIzshandle\fR)
.sp
int
\fBTcl_ZlibStreamChecksum\fR(\fIzshandle\fR)
.sp
int
\fBTcl_ZlibStreamPut\fR(\fIzshandle, dataObj, flush\fR)
.sp
int
\fBTcl_ZlibStreamGet\fR(\fIzshandle, dataObj, count\fR)
.sp
\fBTcl_ZlibStreamSetCompressionDictionary\fR(\fIzshandle, compDict\fR)
.fi
.SH ARGUMENTS
.AS Tcl_ZlibStream zshandle in
.AP Tcl_Interp *interp in
The interpreter to store resulting compressed or uncompressed data in. Also
where any error messages are written. For \fBTcl_ZlibStreamInit\fR, this can
be NULL to create a stream that is not bound to a command.
.AP int format in
What format of compressed data to work with. Must be one of
\fBTCL_ZLIB_FORMAT_ZLIB\fR for zlib-format data, \fBTCL_ZLIB_FORMAT_GZIP\fR
for gzip-format data, or \fBTCL_ZLIB_FORMAT_RAW\fR for raw compressed data. In
addition, for decompression only, \fBTCL_ZLIB_FORMAT_AUTO\fR may also be
chosen which can automatically detect whether the compressed data was in zlib
or gzip format.
.AP Tcl_Obj *dataObj in/out
A byte-array value containing the data to be compressed or decompressed, or
to which the data extracted from the stream is appended when passed to
\fBTcl_ZlibStreamGet\fR.
.AP int level in
What level of compression to use. Should be a number from 0 to 9 or one of the
following: \fBTCL_ZLIB_COMPRESS_NONE\fR for no compression,
\fBTCL_ZLIB_COMPRESS_FAST\fR for fast but inefficient compression,
\fBTCL_ZLIB_COMPRESS_BEST\fR for slow but maximal compression, or
\fBTCL_ZLIB_COMPRESS_DEFAULT\fR for the level recommended by the zlib library.
.AP Tcl_Obj *dictObj in/out
A dictionary that contains, or which will be updated to contain, a description
of the gzip header associated with the compressed data. Only useful when the
\fIformat\fR is \fBTCL_ZLIB_FORMAT_GZIP\fR or \fBTCL_ZLIB_FORMAT_AUTO\fR. If
a NULL is passed, a default header will be used on compression and the header
will be ignored (apart from integrity checks) on decompression. See the
section \fBGZIP OPTIONS DICTIONARY\fR for details about the contents of this
dictionary.
.AP "unsigned int" initValue in
The initial value for the checksum algorithm.
.AP "Tcl_Size" bufferSize in
A hint as to what size of buffer is to be used to receive the data.
Use 0 to use a geric guess based on the input data.
.AP "unsigned char" *bytes in
An array of bytes to run the checksum algorithm over, or NULL to get the
recommended initial value for the checksum algorithm.
.AP int length in
The number of bytes in the array.
.AP int mode in
What mode to operate the stream in. Should be either
\fBTCL_ZLIB_STREAM_DEFLATE\fR for a compressing stream or
\fBTCL_ZLIB_STREAM_INFLATE\fR for a decompressing stream.
.AP Tcl_ZlibStream *zshandlePtr out
A pointer to a variable in which to write the abstract token for the stream
upon successful creation.
.AP Tcl_ZlibStream zshandle in
The abstract token for the stream to operate on.
.AP int flush in
Whether and how to flush the stream after writing the data to it. Must be one
of: \fBTCL_ZLIB_NO_FLUSH\fR if no flushing is to be done, \fBTCL_ZLIB_FLUSH\fR
if the currently compressed data must be made available for access using
\fBTcl_ZlibStreamGet\fR, \fBTCL_ZLIB_FULLFLUSH\fR if the stream must be put
into a state where the decompressor can recover from on corruption, or
\fBTCL_ZLIB_FINALIZE\fR to ensure that the stream is finished and that any
trailer demanded by the format is written.
.AP int count in
The maximum number of bytes to get from the stream, or -1 to get all remaining
bytes from the stream's buffers.
.AP Tcl_Obj *compDict in
A byte array value that is the compression dictionary to use with the stream.
Note that this is \fInot a Tcl dictionary\fR, and it is recommended that this
only ever be used with streams that were created with their \fIformat\fR set
to \fBTCL_ZLIB_FORMAT_ZLIB\fR because the other formats have no mechanism to
indicate whether a compression dictionary was present other than to fail on
decompression.
.BE
.SH DESCRIPTION
These functions form the interface from the Tcl library to the Zlib
library by Jean-loup Gailly and Mark Adler.
.PP
\fBTcl_ZlibDeflate\fR and \fBTcl_ZlibInflate\fR respectively compress and
decompress the data contained in the \fIdataObj\fR argument, according to the
\fIformat\fR and, for compression, \fIlevel\fR arguments. The dictionary in
the \fIdictObj\fR parameter is used to convey additional header information
about the compressed data when the compression format supports it; currently,
the dictionary is only used when the \fIformat\fR parameter is
\fBTCL_ZLIB_FORMAT_GZIP\fR or \fBTCL_ZLIB_FORMAT_AUTO\fR. For details of the
contents of the dictionary, see the \fBGZIP OPTIONS DICTIONARY\fR section
below. Upon success, both functions leave the resulting compressed or
decompressed data in a byte-array value that is the Tcl interpreter's result;
the returned value is a standard Tcl result code.
.PP
\fBTcl_ZlibAdler32\fR and \fBTcl_ZlibCRC32\fR compute checksums on arrays of
bytes, returning the computed checksum. Checksums are computed incrementally,
allowing data to be processed one block at a time, but this requires the
caller to maintain the current checksum and pass it in as the \fIinitValue\fR
parameter; the initial value to use for this can be obtained by using NULL for
the \fIbytes\fR parameter instead of a pointer to the array of bytes to
compute the checksum over. Thus, typical usage in the single data block case
is like this:
.PP
.CS
checksum = \fBTcl_ZlibCRC32\fR(\fBTcl_ZlibCRC32\fR(0,NULL,0), data, length);
.CE
.PP
Note that the Adler-32 algorithm is not a real checksum, but instead is a
related type of hash that works best on longer data.
.SS "ZLIB STREAMS"
.PP
\fBTcl_ZlibStreamInit\fR creates a compressing or decompressing stream that is
linked to a Tcl command, according to its arguments, and provides an abstract
token for the stream and returns a normal Tcl result code;
\fBTcl_ZlibStreamGetCommandName\fR returns the name of that command given the
stream token, or NULL if the stream has no command. Streams are not designed
to be thread-safe; each stream should only ever be used from the thread that
created it. When working with gzip streams, a dictionary (fields as given in
the \fBGZIP OPTIONS DICTIONARY\fR section below) can be given via the
\fIdictObj\fR parameter that on compression allows control over the generated
headers, and on decompression allows discovery of the existing headers. Note
that the dictionary will be written to on decompression once sufficient data
has been read to have a complete header. This means that the dictionary must
be an unshared value in that case; a blank value created with
\fBTcl_NewObj\fR is suggested.
.PP
Once a stream has been constructed, \fBTcl_ZlibStreamPut\fR is used to add
data to the stream and \fBTcl_ZlibStreamGet\fR is used to retrieve data from
the stream after processing. Both return normal Tcl result codes and leave an
error message in the result of the interpreter that the stream is registered
with in the error case (if such a registration has been performed). With
\fBTcl_ZlibStreamPut\fR, the data buffer value passed to it should not be
modified afterwards. With \fBTcl_ZlibStreamGet\fR, the data buffer value
passed to it will have the data bytes appended to it. Internally to the
stream, data is kept compressed so as to minimize the cost of buffer space.
.PP
\fBTcl_ZlibStreamChecksum\fR returns the checksum computed over the
uncompressed data according to the format, and \fBTcl_ZlibStreamEof\fR returns
a boolean value indicating whether the end of the uncompressed data has been
reached.
.PP
\fBTcl_ZlibStreamSetCompressionDictionary\fR is used to control the
compression dictionary used with the stream, a compression dictionary being an
array of bytes (such as might be created with \fBTcl_NewByteArrayObj\fR) that
is used to initialize the compression engine rather than leaving it to create
it on the fly from the data being compressed. Setting a compression dictionary
allows for more efficient compression in the case where the start of the data
is highly regular, but it does require both the compressor and the
decompressor to agree on the value to use. Compression dictionaries are only
fully supported for zlib-format data; on compression, they must be set before
any data is sent in with \fBTcl_ZlibStreamPut\fR, and on decompression they
should be set when \fBTcl_ZlibStreamGet\fR produces an \fBerror\fR with its
\fB\-errorcode\fR set to
.QW "\fBZLIB NEED_DICT\fI code\fR" ;
the \fIcode\fR will be the Adler-32 checksum (see \fBTcl_ZlibAdler32\fR) of
the compression dictionary sought. (Note that this is only true for
zlib-format streams; gzip streams ignore compression dictionaries as the
format specification doesn't permit them, and raw streams just produce a data
error if the compression dictionary is missing or incorrect.)
.PP
If you wish to clear a stream and reuse it for a new compression or
decompression action, \fBTcl_ZlibStreamReset\fR will do this and return a
normal Tcl result code to indicate whether it was successful; if the stream is
registered with an interpreter, an error message will be left in the
interpreter result when this function returns TCL_ERROR.
Finally, \fBTcl_ZlibStreamClose\fR will clean up the stream and delete the
associated command: using \fBTcl_DeleteCommand\fR on the stream's command is
equivalent (when such a command exists).
.SH "GZIP OPTIONS DICTIONARY"
.PP
The \fIdictObj\fR parameter to \fBTcl_ZlibDeflate\fR, \fBTcl_ZlibInflate\fR
and \fBTcl_ZlibStreamInit\fR is used to pass a dictionary of options about
that is used to describe the gzip header in the compressed data. When creating
compressed data, the dictionary is read and when unpacking compressed data the
dictionary is written (in which case the \fIdictObj\fR parameter must refer to
an unshared dictionary value).
.PP
The following fields in the dictionary value are understood. All other fields
are ignored. No field is required when creating a gzip-format stream.
.IP \fBcomment\fR
This holds the comment field of the header, if present. If absent, no comment
was supplied (on decompression) or will be created (on compression).
.IP \fBcrc\fR
A boolean value describing whether a CRC of the header is computed. Note that
the \fBgzip\fR program does \fInot\fR use or allow a CRC on the header.
.IP \fBfilename\fR
The name of the file that held the uncompressed data. This should not contain
any directory separators, and should be sanitized before use on decompression
with \fBfile tail\fR.
.IP \fBos\fR
The operating system type code field from the header (if not the
.QW unknown
value). See RFC 1952 for the meaning of these codes. On compression, if this
is absent then the field will be set to the
.QW unknown
value.
.IP \fBsize\fR
The size of the uncompressed data. This is ignored on compression; the size
of the data compressed depends on how much data is supplied to the
compression engine.
.IP \fBtime\fR
The time field from the header if non-zero, expected to be the time that the
file named by the \fBfilename\fR field was modified. Suitable for use with
\fBclock format\fR. On creation, the right value to use is that from
\fBclock seconds\fR or \fBfile mtime\fR.
.IP \fBtype\fR
The type of the uncompressed data (either \fBbinary\fR or \fBtext\fR) if
known.
.SH "REFERENCE COUNT MANAGEMENT"
.PP
\fBTcl_ZlibDeflate\fR and \fBTcl_ZlibInflate\fR take a value with arbitrary
reference count for their \fIdataObj\fR and \fIdictObj\fR arguments (the
latter often being NULL instead), and set the interpreter result with their
output value (or an error).  The existing interpreter result should not be
passed as any argument value unless an additional reference is held.
.PP
\fBTcl_ZlibStreamInit\fR takes a value with arbitrary reference count for its
\fIdictObj\fR argument; it only reads from it. The existing interpreter result
should not be passed unless an additional reference is held.
.PP
\fBTcl_ZlibStreamGetCommandName\fR returns a zero reference count value, much
like \fBTcl_NewObj\fR.
.PP
The \fIdataObj\fR argument to \fBTcl_ZlibStreamPut\fR is a value with
arbitrary reference count; it is only ever read from.
.PP
The \fIdataObj\fR argument to \fBTcl_ZlibStreamGet\fR is an unshared value
(see \fBTcl_IsShared\fR) that will be updated by the function.
.PP
The \fIcompDict\fR argument to \fBTcl_ZlibStreamSetCompressionDictionary\fR,
if non-NULL, may be duplicated or may have its reference count incremented.
Using a zero reference count value is not recommended.

.SH "PORTABILITY NOTES"
These functions will fail gracefully if Tcl is not linked with the zlib
library.
.SH "SEE ALSO"
Tcl_NewByteArrayObj(3), zlib(n)
'\"Tcl_StackChannel(3)
.SH "KEYWORDS"
compress, decompress, deflate, gzip, inflate
'\" Local Variables:
'\" mode: nroff
'\" fill-column: 78
'\" End: