diff options
Diffstat (limited to 'Doc/lib/libstruct.tex')
-rw-r--r-- | Doc/lib/libstruct.tex | 269 |
1 files changed, 0 insertions, 269 deletions
diff --git a/Doc/lib/libstruct.tex b/Doc/lib/libstruct.tex deleted file mode 100644 index 68c5c9a..0000000 --- a/Doc/lib/libstruct.tex +++ /dev/null @@ -1,269 +0,0 @@ -\section{\module{struct} --- - Interpret strings as packed binary data} -\declaremodule{builtin}{struct} - -\modulesynopsis{Interpret strings as packed binary data.} - -\indexii{C}{structures} -\indexiii{packing}{binary}{data} - -This module performs conversions between Python values and C -structs represented as Python strings. It uses \dfn{format strings} -(explained below) as compact descriptions of the lay-out of the C -structs and the intended conversion to/from Python values. This can -be used in handling binary data stored in files or from network -connections, among other sources. - -The module defines the following exception and functions: - - -\begin{excdesc}{error} - Exception raised on various occasions; argument is a string - describing what is wrong. -\end{excdesc} - -\begin{funcdesc}{pack}{fmt, v1, v2, \textrm{\ldots}} - Return a string containing the values - \code{\var{v1}, \var{v2}, \textrm{\ldots}} packed according to the given - format. The arguments must match the values required by the format - exactly. -\end{funcdesc} - -\begin{funcdesc}{pack_into}{fmt, buffer, offset, v1, v2, \moreargs} - Pack the values \code{\var{v1}, \var{v2}, \textrm{\ldots}} according to the given - format, write the packed bytes into the writable \var{buffer} starting at - \var{offset}. - Note that the offset is not an optional argument. - - \versionadded{2.5} -\end{funcdesc} - -\begin{funcdesc}{unpack}{fmt, string} - Unpack the string (presumably packed by \code{pack(\var{fmt}, - \textrm{\ldots})}) according to the given format. The result is a - tuple even if it contains exactly one item. The string must contain - exactly the amount of data required by the format - (\code{len(\var{string})} must equal \code{calcsize(\var{fmt})}). -\end{funcdesc} - -\begin{funcdesc}{unpack_from}{fmt, buffer\optional{,offset \code{= 0}}} - Unpack the \var{buffer} according to tthe given format. - The result is a tuple even if it contains exactly one item. The - \var{buffer} must contain at least the amount of data required by the - format (\code{len(buffer[offset:])} must be at least - \code{calcsize(\var{fmt})}). - - \versionadded{2.5} -\end{funcdesc} - -\begin{funcdesc}{calcsize}{fmt} - Return the size of the struct (and hence of the string) - corresponding to the given format. -\end{funcdesc} - -Format characters have the following meaning; the conversion between -C and Python values should be obvious given their types: - -\begin{tableiv}{c|l|l|c}{samp}{Format}{C Type}{Python}{Notes} - \lineiv{x}{pad byte}{no value}{} - \lineiv{c}{\ctype{char}}{string of length 1}{} - \lineiv{b}{\ctype{signed char}}{integer}{} - \lineiv{B}{\ctype{unsigned char}}{integer}{} - \lineiv{t}{\ctype{_Bool}}{bool}{(1)} - \lineiv{h}{\ctype{short}}{integer}{} - \lineiv{H}{\ctype{unsigned short}}{integer}{} - \lineiv{i}{\ctype{int}}{integer}{} - \lineiv{I}{\ctype{unsigned int}}{long}{} - \lineiv{l}{\ctype{long}}{integer}{} - \lineiv{L}{\ctype{unsigned long}}{long}{} - \lineiv{q}{\ctype{long long}}{long}{(2)} - \lineiv{Q}{\ctype{unsigned long long}}{long}{(2)} - \lineiv{f}{\ctype{float}}{float}{} - \lineiv{d}{\ctype{double}}{float}{} - \lineiv{s}{\ctype{char[]}}{string}{} - \lineiv{p}{\ctype{char[]}}{string}{} - \lineiv{P}{\ctype{void *}}{integer}{} -\end{tableiv} - -\noindent -Notes: - -\begin{description} -\item[(1)] - The \character{t} conversion code corresponds to the \ctype{_Bool} type - defined by C99. If this type is not available, it is simulated using a - \ctype{char}. In standard mode, it is always represented by one byte. - \versionadded{2.6} -\item[(2)] - The \character{q} and \character{Q} conversion codes are available in - native mode only if the platform C compiler supports C \ctype{long long}, - or, on Windows, \ctype{__int64}. They are always available in standard - modes. - \versionadded{2.2} -\end{description} - - -A format character may be preceded by an integral repeat count. For -example, the format string \code{'4h'} means exactly the same as -\code{'hhhh'}. - -Whitespace characters between formats are ignored; a count and its -format must not contain whitespace though. - -For the \character{s} format character, the count is interpreted as the -size of the string, not a repeat count like for the other format -characters; for example, \code{'10s'} means a single 10-byte string, while -\code{'10c'} means 10 characters. For packing, the string is -truncated or padded with null bytes as appropriate to make it fit. -For unpacking, the resulting string always has exactly the specified -number of bytes. As a special case, \code{'0s'} means a single, empty -string (while \code{'0c'} means 0 characters). - -The \character{p} format character encodes a "Pascal string", meaning -a short variable-length string stored in a fixed number of bytes. -The count is the total number of bytes stored. The first byte stored is -the length of the string, or 255, whichever is smaller. The bytes -of the string follow. If the string passed in to \function{pack()} is too -long (longer than the count minus 1), only the leading count-1 bytes of the -string are stored. If the string is shorter than count-1, it is padded -with null bytes so that exactly count bytes in all are used. Note that -for \function{unpack()}, the \character{p} format character consumes count -bytes, but that the string returned can never contain more than 255 -characters. - -For the \character{I}, \character{L}, \character{q} and \character{Q} -format characters, the return value is a Python long integer. - -For the \character{P} format character, the return value is a Python -integer or long integer, depending on the size needed to hold a -pointer when it has been cast to an integer type. A \NULL{} pointer will -always be returned as the Python integer \code{0}. When packing pointer-sized -values, Python integer or long integer objects may be used. For -example, the Alpha and Merced processors use 64-bit pointer values, -meaning a Python long integer will be used to hold the pointer; other -platforms use 32-bit pointers and will use a Python integer. - -For the \character{t} format character, the return value is either -\constant{True} or \constant{False}. When packing, the truth value -of the argument object is used. Either 0 or 1 in the native or standard -bool representation will be packed, and any non-zero value will be True -when unpacking. - -By default, C numbers are represented in the machine's native format -and byte order, and properly aligned by skipping pad bytes if -necessary (according to the rules used by the C compiler). - -Alternatively, the first character of the format string can be used to -indicate the byte order, size and alignment of the packed data, -according to the following table: - -\begin{tableiii}{c|l|l}{samp}{Character}{Byte order}{Size and alignment} - \lineiii{@}{native}{native} - \lineiii{=}{native}{standard} - \lineiii{<}{little-endian}{standard} - \lineiii{>}{big-endian}{standard} - \lineiii{!}{network (= big-endian)}{standard} -\end{tableiii} - -If the first character is not one of these, \character{@} is assumed. - -Native byte order is big-endian or little-endian, depending on the -host system. For example, Motorola and Sun processors are big-endian; -Intel and DEC processors are little-endian. - -Native size and alignment are determined using the C compiler's -\keyword{sizeof} expression. This is always combined with native byte -order. - -Standard size and alignment are as follows: no alignment is required -for any type (so you have to use pad bytes); -\ctype{short} is 2 bytes; -\ctype{int} and \ctype{long} are 4 bytes; -\ctype{long long} (\ctype{__int64} on Windows) is 8 bytes; -\ctype{float} and \ctype{double} are 32-bit and 64-bit -IEEE floating point numbers, respectively. -\ctype{_Bool} is 1 byte. - -Note the difference between \character{@} and \character{=}: both use -native byte order, but the size and alignment of the latter is -standardized. - -The form \character{!} is available for those poor souls who claim they -can't remember whether network byte order is big-endian or -little-endian. - -There is no way to indicate non-native byte order (force -byte-swapping); use the appropriate choice of \character{<} or -\character{>}. - -The \character{P} format character is only available for the native -byte ordering (selected as the default or with the \character{@} byte -order character). The byte order character \character{=} chooses to -use little- or big-endian ordering based on the host system. The -struct module does not interpret this as native ordering, so the -\character{P} format is not available. - -Examples (all using native byte order, size and alignment, on a -big-endian machine): - -\begin{verbatim} ->>> from struct import * ->>> pack('hhl', 1, 2, 3) -'\x00\x01\x00\x02\x00\x00\x00\x03' ->>> unpack('hhl', '\x00\x01\x00\x02\x00\x00\x00\x03') -(1, 2, 3) ->>> calcsize('hhl') -8 -\end{verbatim} - -Hint: to align the end of a structure to the alignment requirement of -a particular type, end the format with the code for that type with a -repeat count of zero. For example, the format \code{'llh0l'} -specifies two pad bytes at the end, assuming longs are aligned on -4-byte boundaries. This only works when native size and alignment are -in effect; standard size and alignment does not enforce any alignment. - -\begin{seealso} - \seemodule{array}{Packed binary storage of homogeneous data.} - \seemodule{xdrlib}{Packing and unpacking of XDR data.} -\end{seealso} - -\subsection{Struct Objects \label{struct-objects}} - -The \module{struct} module also defines the following type: - -\begin{classdesc}{Struct}{format} - Return a new Struct object which writes and reads binary data according to - the format string \var{format}. Creating a Struct object once and calling - its methods is more efficient than calling the \module{struct} functions - with the same format since the format string only needs to be compiled once. - - \versionadded{2.5} -\end{classdesc} - -Compiled Struct objects support the following methods and attributes: - -\begin{methoddesc}[Struct]{pack}{v1, v2, \moreargs} - Identical to the \function{pack()} function, using the compiled format. - (\code{len(result)} will equal \member{self.size}.) -\end{methoddesc} - -\begin{methoddesc}[Struct]{pack_into}{buffer, offset, v1, v2, \moreargs} - Identical to the \function{pack_into()} function, using the compiled format. -\end{methoddesc} - -\begin{methoddesc}[Struct]{unpack}{string} - Identical to the \function{unpack()} function, using the compiled format. - (\code{len(string)} must equal \member{self.size}). -\end{methoddesc} - -\begin{methoddesc}[Struct]{unpack_from}{buffer\optional{,offset - \code{= 0}}} - Identical to the \function{unpack_from()} function, using the compiled format. - (\code{len(buffer[offset:])} must be at least \member{self.size}). -\end{methoddesc} - -\begin{memberdesc}[Struct]{format} - The format string used to construct this Struct object. -\end{memberdesc} - |