summaryrefslogtreecommitdiffstats
path: root/Doc/lib/libascii.tex
blob: 003bd9544de9bd69a3c20323f52cdeeda2f80fef (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
\section{\module{curses.ascii} ---
         Utilities for ASCII characters}

\declaremodule{standard}{curses.ascii}
\modulesynopsis{Constants and set-membership functions for
                \ASCII\ characters.}
\moduleauthor{Eric S. Raymond}{esr@thyrsus.com}
\sectionauthor{Eric S. Raymond}{esr@thyrsus.com}

\versionadded{1.6}

The \module{curses.ascii} module supplies name constants for
\ASCII{} characters and functions to test membership in various
\ASCII{} character classes.  The constants supplied are names for
control characters as follows:

\begin{tableii}{l|l}{constant}{Name}{Meaning}
  \lineii{NUL}{}
  \lineii{SOH}{Start of heading, console interrupt}
  \lineii{STX}{Start of text}
  \lineii{ETX}{End of text}
  \lineii{EOT}{End of transmission}
  \lineii{ENQ}{Enquiry, goes with \constant{ACK} flow control}
  \lineii{ACK}{Acknowledgement}
  \lineii{BEL}{Bell}
  \lineii{BS}{Backspace}
  \lineii{TAB}{Tab}
  \lineii{HT}{Alias for \constant{TAB}: ``Horizontal tab''}
  \lineii{LF}{Line feed}
  \lineii{NL}{Alias for \constant{LF}: ``New line''}
  \lineii{VT}{Vertical tab}
  \lineii{FF}{Form feed}
  \lineii{CR}{Carriage return}
  \lineii{SO}{Shift-out, begin alternate character set}
  \lineii{SI}{Shift-in, resume default character set}
  \lineii{DLE}{Data-link escape}
  \lineii{DC1}{XON, for flow control}
  \lineii{DC2}{Device control 2, block-mode flow control}
  \lineii{DC3}{XOFF, for flow control}
  \lineii{DC4}{Device control 4}
  \lineii{NAK}{Negative acknowledgement}
  \lineii{SYN}{Synchronous idle}
  \lineii{ETB}{End transmission block}
  \lineii{CAN}{Cancel}
  \lineii{EM}{End of medium}
  \lineii{SUB}{Substitute}
  \lineii{ESC}{Escape}
  \lineii{FS}{File separator}
  \lineii{GS}{Group separator}
  \lineii{RS}{Record separator, block-mode terminator}
  \lineii{US}{Unit separator}
  \lineii{SP}{Space}
  \lineii{DEL}{Delete}
\end{tableii}

Note that many of these have little practical significance in modern
usage.  The mnemonics derive from teleprinter conventions that predate
digital computers.

The module supplies the following functions, patterned on those in the
standard C library:


\begin{funcdesc}{isalnum}{c}
Checks for an \ASCII{} alphanumeric character; it is equivalent to
\samp{isalpha(\var{c}) or isdigit(\var{c})}.
\end{funcdesc}

\begin{funcdesc}{isalpha}{c}
Checks for an \ASCII{} alphabetic character; it is equivalent to
\samp{isupper(\var{c}) or islower(\var{c})}.
\end{funcdesc}

\begin{funcdesc}{isascii}{c}
Checks for a character value that fits in the 7-bit \ASCII{} set.
\end{funcdesc}

\begin{funcdesc}{isblank}{c}
Checks for an \ASCII{} whitespace character.
\end{funcdesc}

\begin{funcdesc}{iscntrl}{c}
Checks for an \ASCII{} control character (in the range 0x00 to 0x1f).
\end{funcdesc}

\begin{funcdesc}{isdigit}{c}
Checks for an \ASCII{} decimal digit, \character{0} through
\character{9}.  This is equivalent to \samp{\var{c} in string.digits}.
\end{funcdesc}

\begin{funcdesc}{isgraph}{c}
Checks for \ASCII{} any printable character except space.
\end{funcdesc}

\begin{funcdesc}{islower}{c}
Checks for an \ASCII{} lower-case character.
\end{funcdesc}

\begin{funcdesc}{isprint}{c}
Checks for any \ASCII{} printable character including space.
\end{funcdesc}

\begin{funcdesc}{ispunct}{c}
Checks for any printable \ASCII{} character which is not a space or an
alphanumeric character.
\end{funcdesc}

\begin{funcdesc}{isspace}{c}
Checks for \ASCII{} white-space characters; space, line feed,
carriage return, form feed, horizontal tab, vertical tab.
\end{funcdesc}

\begin{funcdesc}{isupper}{c}
Checks for an \ASCII{} uppercase letter.
\end{funcdesc}

\begin{funcdesc}{isxdigit}{c}
Checks for an \ASCII{} hexadecimal digit.  This is equivalent to
\samp{\var{c} in string.hexdigits}.
\end{funcdesc}

\begin{funcdesc}{isctrl}{c}
Checks for an \ASCII{} control character (ordinal values 0 to 31).
\end{funcdesc}

\begin{funcdesc}{ismeta}{c}
Checks for a non-\ASCII{} character (ordinal values 0x80 and above).
\end{funcdesc}

These functions accept either integers or strings; when the argument
is a string, it is first converted using the built-in function
\function{ord()}.

Note that all these functions check ordinal bit values derived from the 
first character of the string you pass in; they do not actually know
anything about the host machine's character encoding.  For functions 
that know about the character encoding (and handle
internationalization properly) see the \refmodule{string} module.

The following two functions take either a single-character string or
integer byte value; they return a value of the same type.

\begin{funcdesc}{ascii}{c}
Return the ASCII value corresponding to the low 7 bits of \var{c}.
\end{funcdesc}

\begin{funcdesc}{ctrl}{c}
Return the control character corresponding to the given character
(the character bit value is bitwise-anded with 0x1f).
\end{funcdesc}

\begin{funcdesc}{alt}{c}
Return the 8-bit character corresponding to the given ASCII character
(the character bit value is bitwise-ored with 0x80).
\end{funcdesc}

The following function takes either a single-character string or
integer value; it returns a string.

\begin{funcdesc}{unctrl}{c}
Return a string representation of the \ASCII{} character \var{c}.  If
\var{c} is printable, this string is the character itself.  If the
character is a control character (0x00-0x1f) the string consists of a
caret (\character{\^}) followed by the corresponding uppercase letter.
If the character is an \ASCII{} delete (0x7f) the string is
\code{'\^{}?'}.  If the character has its meta bit (0x80) set, the meta
bit is stripped, the preceding rules applied, and
\character{!} prepended to the result.
\end{funcdesc}

\begin{datadesc}{controlnames}
A 33-element string array that contains the \ASCII{} mnemonics for the
thirty-two \ASCII{} control characters from 0 (NUL) to 0x1f (US), in
order, plus the mnemonic \samp{SP} for the space character.
\end{datadesc}