summaryrefslogtreecommitdiffstats
path: root/Doc/lib/libcodecs.tex
diff options
context:
space:
mode:
Diffstat (limited to 'Doc/lib/libcodecs.tex')
-rw-r--r--Doc/lib/libcodecs.tex72
1 files changed, 71 insertions, 1 deletions
diff --git a/Doc/lib/libcodecs.tex b/Doc/lib/libcodecs.tex
index caaaaf4..38586ef 100644
--- a/Doc/lib/libcodecs.tex
+++ b/Doc/lib/libcodecs.tex
@@ -5,7 +5,7 @@
\modulesynopsis{Encode and decode data and streams.}
\moduleauthor{Marc-Andre Lemburg}{mal@lemburg.com}
\sectionauthor{Marc-Andre Lemburg}{mal@lemburg.com}
-
+\sectionauthor{Martin v. L\"owis}{martin@v.loewis.de}
\index{Unicode}
\index{Codecs}
@@ -809,6 +809,11 @@ listed as operand type in the table.
{byte string}
{Convert operand to hexadecimal representation, with two digits per byte}
+\lineiv{idna}
+ {}
+ {Unicode string}
+ {Implements \rfc{3490}. \versionadded{2.3}. See also \module{encodings.idna}}
+
\lineiv{mbcs}
{dbcs}
{Unicode string}
@@ -819,6 +824,11 @@ listed as operand type in the table.
{Unicode string}
{Encoding of PalmOS 3.5}
+\lineiv{punycode}
+ {}
+ {Unicode string}
+ {Implements \rfc{3492}. \versionadded{2.3}}
+
\lineiv{quopri_codec}
{quopri, quoted-printable, quotedprintable}
{byte string}
@@ -865,3 +875,63 @@ listed as operand type in the table.
{Compress the operand using gzip}
\end{tableiv}
+
+\subsection{\module{encodings.idna} ---
+ Internationalized Domain Names in Applications}
+
+\declaremodule{standard}{encodings.idna}
+\modulesynopsis{Internationalized Domain Names implementation}
+\moduleauthor{Martin v. L\"owis}
+
+This module implements \rfc{3490} (Internationalized Domain Names in
+Applications) and \rfc{3492} (Nameprep: A Stringprep Profile for
+Internationalized Domain Names (IDN)). It builds upon the
+\code{punycode} encoding and \module{stringprep}. \versionadded{2.3}
+
+These RFCs together define a protocol to support non-ASCII characters
+in domain names. A domain name containing non-ASCII characters (such
+as ``www.Alliancefran\,caise.nu'') is converted into an
+ASCII-compatible encoding (ACE, such as
+``www.xn--alliancefranaise-npb.nu''). The ACE form of the domain name
+is then used in all places where arbitrary characters are not allowed
+by the protocol, such as DNS queries, HTTP \code{Host:} fields, and so
+on. This conversion is carried out in the application; if possible
+invisible to the user: The application should transparently convert
+Unicode domain labels to IDNA on the wire, and convert back ACE labels
+to Unicode before presenting them to the user.
+
+Python supports this conversion in several ways: The \code{idna} codec
+allows to convert between Unicode and the ACE. Furthermore, the
+\module{socket} module transparently converts Unicode host names to
+ACE, so that applications need not be concerned about converting host
+names themselves when they pass them to the socket module. On top of
+that, modules that have host names as function parameters, such as
+\module{httplib} and \module{ftplib}, accept Unicode host names
+(\module{httplib} then also transparently sends an IDNA hostname in
+the \code{Host:} field if it sends that field at all).
+
+When receiving host names from the wire (such as in reverse name
+lookup), no automatic conversion to Unicode is performed: Applications
+wishing to present such host names to the user should decode them to
+Unicode.
+
+The module \module{encodings.idna} also implements the nameprep
+procedure, which performs certain normalizations on host names, to
+achieve case-insensitivity of international domain names, and to unify
+similar characters. The nameprep functions can be used directly if
+desired.
+
+\begin{funcdesc}{nameprep}{label}
+Return the nameprepped version of \var{label}. The implementation
+currently assumes query strings, so \code{AllowUnassigned} is
+true.
+\end{funcdesc}
+
+\begin{funcdesc}{ToASCCII}{label}
+Convert a label to ASCII, as specified in \rfc{3490}.
+\code{UseSTD3ASCIIRules} is assumed to be false.
+\end{funcdesc}
+
+\begin{funcdesc}{ToUnicode}{label}
+Convert a label to Unicode, as specified in \rfc{3490}.
+\end{funcdesc}