1 files changed, 111 insertions, 41 deletions
diff --git a/doc/Encoding.3 b/doc/Encoding.3
index c365aaf..1478c35 100644
--- a/doc/Encoding.3
+++ b/doc/Encoding.3
@@ -4,13 +4,11 @@
 '\" See the file "license.terms" for information on usage and redistribution
 '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
 '\" 
-'\" RCS: @(#) $Id: Encoding.3,v 1.20 2004/10/07 15:15:37 dkf Exp $
-'\" 
-.so man.macros
 .TH Tcl_GetEncoding 3 "8.1" Tcl "Tcl Library Procedures"
+.so man.macros
 .BS
 .SH NAME
-Tcl_GetEncoding, Tcl_FreeEncoding, Tcl_ExternalToUtfDString, Tcl_ExternalToUtf, Tcl_UtfToExternalDString, Tcl_UtfToExternal, Tcl_WinTCharToUtf, Tcl_WinUtfToTChar, Tcl_GetEncodingName, Tcl_SetSystemEncoding, Tcl_GetEncodingNames, Tcl_CreateEncoding, Tcl_GetDefaultEncodingDir, Tcl_SetDefaultEncodingDir \- procedures for creating and using encodings
+Tcl_GetEncoding, Tcl_FreeEncoding, Tcl_GetEncodingFromObj, Tcl_ExternalToUtfDString, Tcl_ExternalToUtf, Tcl_UtfToExternalDString, Tcl_UtfToExternal, Tcl_WinTCharToUtf, Tcl_WinUtfToTChar, Tcl_GetEncodingName, Tcl_SetSystemEncoding, Tcl_GetEncodingNameFromEnvironment, Tcl_GetEncodingNames, Tcl_CreateEncoding, Tcl_GetEncodingSearchPath, Tcl_SetEncodingSearchPath, Tcl_GetDefaultEncodingDir, Tcl_SetDefaultEncodingDir \- procedures for creating and using encodings
 .SH SYNOPSIS
 .nf
 \fB#include <tcl.h>\fR
@@ -21,6 +19,9 @@ Tcl_Encoding
 void
 \fBTcl_FreeEncoding\fR(\fIencoding\fR)
 .sp
+int
+\fBTcl_GetEncodingFromObj\fR(\fIinterp, objPtr, encodingPtr\fR)
+.sp
 char *
 \fBTcl_ExternalToUtfDString\fR(\fIencoding, src, srcLen, dstPtr\fR)
 .sp
@@ -47,20 +48,28 @@ const char *
 int
 \fBTcl_SetSystemEncoding\fR(\fIinterp, name\fR)
 .sp
+const char *
+\fBTcl_GetEncodingNameFromEnvironment\fR(\fIbufPtr\fR)
+.sp
 void
 \fBTcl_GetEncodingNames\fR(\fIinterp\fR)
 .sp
 Tcl_Encoding
 \fBTcl_CreateEncoding\fR(\fItypePtr\fR)
 .sp
+Tcl_Obj *
+\fBTcl_GetEncodingSearchPath\fR()
+.sp
+int
+\fBTcl_SetEncodingSearchPath\fR(\fIsearchPath\fR)
+.sp
 const char *
 \fBTcl_GetDefaultEncodingDir\fR(\fIvoid\fR)
 .sp
 void
 \fBTcl_SetDefaultEncodingDir\fR(\fIpath\fR)
-
 .SH ARGUMENTS
-.AS Tcl_EncodingState *dstWrotePtr in/out
+.AS "const Tcl_EncodingType" *dstWrotePtr in/out
 .AP Tcl_Interp *interp in
 Interpreter to use for error reporting, or NULL if no error reporting is
 desired.
@@ -69,6 +78,10 @@ Name of encoding to load.
 .AP Tcl_Encoding encoding in
 The encoding to query, free, or use for converting text.  If \fIencoding\fR is 
 NULL, the current system encoding is used.
+.AP Tcl_Obj *objPtr in
+Name of encoding to get token for.
+.AP Tcl_Encoding *encodingPtr out
+Points to storage where encoding token is to be written.
 .AP "const char" *src in
 For the \fBTcl_ExternalToUtf\fR functions, an array of bytes in the
 specified encoding that are to be converted to UTF-8.  For the
@@ -93,7 +106,7 @@ block in a (potentially multi-block) input stream, telling the conversion
 routine to perform any finalization that needs to occur after the last
 byte is converted and then to reset to an initial state.
 \fBTCL_ENCODING_STOPONERROR\fR signifies that the conversion routine should
-return immediately upon reading a source character that doesn't exist in
+return immediately upon reading a source character that does not exist in
 the target encoding; otherwise a default fallback character will
 automatically be substituted.  
 .AP Tcl_EncodingState *statePtr in/out
@@ -121,8 +134,12 @@ buffer as a result of the conversion.  May be NULL.
 .AP int *dstCharsPtr out
 Filled with the number of characters that correspond to the number of bytes
 stored in the output buffer.  May be NULL.
-.AP Tcl_EncodingType *typePtr in
+.AP Tcl_DString *bufPtr out
+Storage for the prescribed system encoding name.
+.AP "const Tcl_EncodingType" *typePtr in
 Structure that defines a new type of encoding.  
+.AP Tcl_Obj *searchPath in
+List of filesystem directories in which to search for encoding data files.
 .AP "const char" *path in
 A path to the location of the encoding file.  
 .BE
@@ -171,6 +188,18 @@ anywhere (i.e., it has been freed as many times as it has been gotten)
 \fBTcl_FreeEncoding\fR will release all storage the encoding was using
 and delete it from the database. 
 .PP
+\fBTcl_GetEncodingFromObj\fR treats the string representation of
+\fIobjPtr\fR as an encoding name, and finds an encoding with that
+name, just as \fBTcl_GetEncoding\fR does. When an encoding is found,
+it is cached within the \fBobjPtr\fR value for future reference, the
+\fBTcl_Encoding\fR token is written to the storage pointed to by
+\fIencodingPtr\fR, and the value \fBTCL_OK\fR is returned. If no such
+encoding is found, the value \fBTCL_ERROR\fR is returned, and no
+writing to \fB*\fR\fIencodingPtr\fR takes place. Just as with
+\fBTcl_GetEncoding\fR, the caller should call \fBTcl_FreeEncoding\fR
+on the resulting encoding token when that token will no longer be
+used.
+.PP
 \fBTcl_ExternalToUtfDString\fR converts a source buffer \fIsrc\fR from the
 specified \fIencoding\fR into UTF-8.  The converted bytes are stored in 
 \fIdstPtr\fR, which is then null-terminated.  The caller should eventually
@@ -230,20 +259,28 @@ is filled with the corresponding number of bytes that were stored in
 Windows-only convenience
 functions for converting between UTF-8 and Windows strings.  On Windows 95
 (as with the Unix operating system),
-all strings exchanged between Tcl and the operating system are "char"
+all strings exchanged between Tcl and the operating system are
+.QW "char"
 based.  On Windows NT, some strings exchanged between Tcl and the
-operating system are "char" oriented while others are in Unicode.  By
+operating system are
+.QW "char"
+oriented while others are in Unicode.  By
 convention, in Windows a TCHAR is a character in the ANSI code page
 on Windows 95 and a Unicode character on Windows NT.
 .PP
-If you planned to use the same "char" based interfaces on both Windows
+If you planned to use the same
+.QW "char"
+based interfaces on both Windows
 95 and Windows NT, you could use \fBTcl_UtfToExternal\fR and
 \fBTcl_ExternalToUtf\fR (or their \fBTcl_DString\fR equivalents) with an
 encoding of NULL (the current system encoding).  On the other hand,
 if you planned to use the Unicode interface when running on Windows NT
-and the "char" interfaces when running on Windows 95, you would have
+and the
+.QW "char"
+interfaces when running on Windows 95, you would have
 to perform the following type of test over and over in your program
 (as represented in pseudo-code):
+.PP
 .CS
 if (running NT) {
     encoding <- Tcl_GetEncoding("unicode");
@@ -253,6 +290,7 @@ if (running NT) {
     nativeBuffer <- Tcl_UtfToExternal(NULL, utfBuffer);
 }
 .CE
+.PP
 \fBTcl_WinUtfToTChar\fR and \fBTcl_WinTCharToUtf\fR automatically
 handle this test and use the proper encoding based on the current
 operating system.  \fBTcl_WinUtfToTChar\fR returns a pointer to
@@ -277,6 +315,13 @@ procedure increments the reference count of the new system encoding,
 decrements the reference count of the old system encoding, and returns
 \fBTCL_OK\fR.
 .PP
+\fBTcl_GetEncodingNameFromEnvironment\fR provides a means for the Tcl
+library to report the encoding name it believes to be the correct one
+to use as the system encoding, based on system calls and examination of
+the environment suitable for the platform.  It accepts \fIbufPtr\fR,
+a pointer to an uninitialized or freed \fBTcl_DString\fR and writes
+the encoding name to it.  The \fBTcl_DStringValue\fR is returned.
+.PP
 \fBTcl_GetEncodingNames\fR sets the \fIinterp\fR result to a list
 consisting of the names of all the encodings that are currently defined
 or can be dynamically loaded, searching the encoding path specified by
@@ -303,13 +348,13 @@ convert between this encoding and UTF-8.  It is defined as follows:
 .PP
 .CS
 typedef struct Tcl_EncodingType {
-        const char *\fIencodingName\fR;
-        Tcl_EncodingConvertProc *\fItoUtfProc\fR;
-        Tcl_EncodingConvertProc *\fIfromUtfProc\fR;
-        Tcl_EncodingFreeProc *\fIfreeProc\fR;
-        ClientData \fIclientData\fR;
-        int \fInullSize\fR;
-} Tcl_EncodingType;  
+    const char *\fIencodingName\fR;
+    Tcl_EncodingConvertProc *\fItoUtfProc\fR;
+    Tcl_EncodingConvertProc *\fIfromUtfProc\fR;
+    Tcl_EncodingFreeProc *\fIfreeProc\fR;
+    ClientData \fIclientData\fR;
+    int \fInullSize\fR;
+} \fBTcl_EncodingType\fR;  
 .CE
 .PP
 The \fIencodingName\fR provides a string name for the encoding, by
@@ -337,7 +382,7 @@ The callback procedures \fItoUtfProc\fR and \fIfromUtfProc\fR should match the
 type \fBTcl_EncodingConvertProc\fR:
 .PP
 .CS
-typedef int Tcl_EncodingConvertProc(
+typedef int \fBTcl_EncodingConvertProc\fR(
         ClientData \fIclientData\fR,
         const char *\fIsrc\fR, 
         int \fIsrcLen\fR, 
@@ -367,8 +412,9 @@ procedure will be a non-NULL location.
 .PP
 The callback procedure \fIfreeProc\fR, if non-NULL, should match the type 
 \fBTcl_EncodingFreeProc\fR:
+.PP
 .CS
-typedef void Tcl_EncodingFreeProc(
+typedef void \fBTcl_EncodingFreeProc\fR(
         ClientData \fIclientData\fR);
 .CE
 .PP
@@ -376,13 +422,33 @@ This \fIfreeProc\fR function is called when the encoding is deleted.  The
 \fIclientData\fR parameter is the same as the \fIclientData\fR field
 specified to \fBTcl_CreateEncoding\fR when the encoding was created.  
 .PP
-
+\fBTcl_GetEncodingSearchPath\fR and \fBTcl_SetEncodingSearchPath\fR
+are called to access and set the list of filesystem directories searched
+for encoding data files.  
+.PP
+The value returned by \fBTcl_GetEncodingSearchPath\fR
+is the value stored by the last successful call to
+\fBTcl_SetEncodingSearchPath\fR.  If no calls to
+\fBTcl_SetEncodingSearchPath\fR have occurred, Tcl will compute an initial
+value based on the environment.  There is one encoding search path for the
+entire process, shared by all threads in the process.
+.PP
+\fBTcl_SetEncodingSearchPath\fR stores \fIsearchPath\fR and returns
+\fBTCL_OK\fR, unless \fIsearchPath\fR is not a valid Tcl list, which
+causes \fBTCL_ERROR\fR to be returned.  The elements of \fIsearchPath\fR
+are not verified as existing readable filesystem directories.  When
+searching for encoding data files takes place, and non-existent or
+non-readable filesystem directories on the \fIsearchPath\fR are silently
+ignored.
+.PP
 \fBTcl_GetDefaultEncodingDir\fR and \fBTcl_SetDefaultEncodingDir\fR
-access and set the directory to use when locating the default encoding
-files.  If this value is not NULL, the \fBTclpInitLibraryPath\fR routine
-appends the path to the head of the search path, and uses this path as
-the first place to look into when trying to locate the encoding file.
-
+are obsolete interfaces best replaced with calls to
+\fBTcl_GetEncodingSearchPath\fR and \fBTcl_SetEncodingSearchPath\fR.
+They are called to access and set the first element of the \fIsearchPath\fR
+list.  Since Tcl searches \fIsearchPath\fR for encoding data files in
+list order, these routines establish the
+.QW default
+directory in which to find encoding data files.
 .SH "ENCODING FILES"
 Space would prohibit precompiling into Tcl every possible encoding
 algorithm, so many encodings are stored on disk as dynamically-loadable
@@ -394,7 +460,9 @@ external encoding may consist of single-byte, multi-byte, or double-byte
 characters.  
 .PP
 Each dynamically-loadable encoding is represented as a text file.  The
-initial line of the file, beginning with a ``#'' symbol, is a comment
+initial line of the file, beginning with a
+.QW #
+symbol, is a comment
 that provides a human-readable description of the file.  The next line
 identifies the type of encoding file.  It can be one of the following
 letters:
@@ -421,6 +489,7 @@ Cases [1], [2], and [3] are collectively referred to as table-based encoding
 files.  The lines in a table-based encoding file are in the same
 format as this example taken from the \fBshiftjis\fR encoding (this is not
 the complete file):
+.PP
 .CS
 # Encoding file: shiftjis, multi-byte
 M
@@ -480,25 +549,26 @@ and 0x8163 in \fBshiftjis\fR map to 203E and 2026 in Unicode, respectively.
 Following the first page will be all the other pages, each in the same
 format as the first: one number identifying the page followed by 256
 double-byte Unicode characters.  If a character in the encoding maps to the
-Unicode character 0000, it means that the character doesn't actually exist.
+Unicode character 0000, it means that the character does not actually exist.
 If all characters on a page would map to 0000, that page can be omitted.
 .PP
 Case [4] is the escape-sequence encoding file.  The lines in an this type of
 file are in the same format as this example taken from the \fBiso2022-jp\fR
 encoding:
+.PP
 .CS
 .ta 1.5i
 # Encoding file: iso2022-jp, escape-driven
 E
 init		{}
 final		{}
-iso8859-1	\\x1b(B
-jis0201		\\x1b(J
-jis0208		\\x1b$@
-jis0208		\\x1b$B
-jis0212		\\x1b$(D
-gb2312		\\x1b$A
-ksc5601		\\x1b$(C
+iso8859-1	\ex1b(B
+jis0201		\ex1b(J
+jis0208		\ex1b$@
+jis0208		\ex1b$B
+jis0212		\ex1b$(D
+gb2312		\ex1b$A
+ksc5601		\ex1b$(C
 .CE
 .PP
 In the file, the first column represents an option and the second column
@@ -507,8 +577,11 @@ the first character is converted, while \fBfinal\fR is a string to emit
 or expect after the last character.  All other options are names of
 table-based encodings; the associated value is the escape-sequence that
 marks that encoding.  Tcl syntax is used for the values; in the above
-example, for instance, ``\fB{}\fR'' represents the empty string and
-``\fB\\x1b\fR'' represents character 27.
+example, for instance,
+.QW \fB{}\fR
+represents the empty string and
+.QW \fB\ex1b\fR
+represents character 27.
 .PP
 When \fBTcl_GetEncoding\fR encounters an encoding \fIname\fR that has not
 been loaded, it attempts to load an encoding file called \fIname\fB.enc\fR
@@ -517,6 +590,3 @@ for its script library.  If the encoding file exists, but is
 malformed, an error message will be left in \fIinterp\fR.
 .SH KEYWORDS
 utf, encoding, convert
-
-
-