From 6ddcd1aac092384d9126dd6e101edcc2a681b5e5 Mon Sep 17 00:00:00 2001
From: Miguel Sofer <miguel.sofer@gmail.com>
Date: Tue, 6 Nov 2007 15:23:15 +0000
Subject: 	* generic/tclEncoding.c: Version of the embedded iso8859-1 
 encoding handler that is faster (functions to do the encoding know 
 exactly what they're doing instead of pulling it from a table, 	though
 the table itself has to be retained for use by shift 	encodings that depend
 on iso8859-1.) [Patch 1826906], committing 	for dkf.

---
 ChangeLog             |   9 ++
 generic/tclEncoding.c | 223 +++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 219 insertions(+), 13 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 48f8d82..cc05d80 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2007-11-06  Miguel Sofer  <msofer@users.sf.net>
+
+	* generic/tclEncoding.c: Version of the embedded iso8859-1
+	encoding handler that is faster (functions to do the encoding know
+	exactly what they're doing instead of pulling it from a table,
+	though the table itself has to be retained for use by shift
+	encodings that depend on iso8859-1.) [Patch 1826906], committing
+	for dkf.
+	
 2007-11-05  Andreas Kupries  <andreask@activestate.com>
 
 	* generic/tclConfig.c (Tcl_RegisterConfig): Modified to not extend
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index fbaa8e1..e2308a5 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -8,7 +8,7 @@
  * See the file "license.terms" for information on usage and redistribution of
  * this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclEncoding.c,v 1.56 2007/10/28 00:40:48 dkf Exp $
+ * RCS: @(#) $Id: tclEncoding.c,v 1.57 2007/11/06 15:23:15 msofer Exp $
  */
 
 #include "tclInt.h"
@@ -260,14 +260,24 @@ static int		UtfExtToUtfIntProc(ClientData clientData,
 			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
 			    int *srcReadPtr, int *dstWrotePtr,
 			    int *dstCharsPtr);
+static int		Iso88591FromUtfProc(ClientData clientData,
+			    CONST char *src, int srcLen, int flags,
+			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
+			    int *srcReadPtr, int *dstWrotePtr,
+			    int *dstCharsPtr);
+static int		Iso88591ToUtfProc(ClientData clientData,
+			    CONST char *src, int srcLen, int flags,
+			    Tcl_EncodingState *statePtr, char *dst,
+			    int dstLen, int *srcReadPtr, int *dstWrotePtr,
+			    int *dstCharsPtr);
 
 /*
- * A Tcl_ObjType for holding a cached Tcl_Encoding as the intrep. This should
- * help the lifetime of encodings be more useful. See concerns raised in [Bug
- * 1077262].
+ * A Tcl_ObjType for holding a cached Tcl_Encoding in the otherValuePtr field
+ * of the intrep. This should help the lifetime of encodings be more useful.
+ * See concerns raised in [Bug 1077262].
  */
 
-static Tcl_ObjType EncodingType = {
+static Tcl_ObjType encodingType = {
     "encoding", FreeEncodingIntRep, DupEncodingIntRep, NULL, NULL
 };
 
@@ -297,7 +307,7 @@ Tcl_GetEncodingFromObj(
     Tcl_Encoding *encodingPtr)
 {
     CONST char *name = Tcl_GetString(objPtr);
-    if (objPtr->typePtr != &EncodingType) {
+    if (objPtr->typePtr != &encodingType) {
 	Tcl_Encoding encoding = Tcl_GetEncoding(interp, name);
 
 	if (encoding == NULL) {
@@ -305,7 +315,7 @@ Tcl_GetEncodingFromObj(
 	}
 	TclFreeIntRep(objPtr);
 	objPtr->internalRep.otherValuePtr = (VOID *) encoding;
-	objPtr->typePtr = &EncodingType;
+	objPtr->typePtr = &encodingType;
     }
     *encodingPtr = Tcl_GetEncoding(NULL, name);
     return TCL_OK;
@@ -611,8 +621,8 @@ TclInitEncodingSubsystem(void)
 	}
 
 	type.encodingName	= "iso8859-1";
-	type.toUtfProc		= TableToUtfProc;
-	type.fromUtfProc	= TableFromUtfProc;
+	type.toUtfProc		= Iso88591ToUtfProc;
+	type.fromUtfProc	= Iso88591FromUtfProc;
 	type.freeProc		= TableFreeProc;
 	type.nullSize		= 1;
 	type.clientData		= dataPtr;
@@ -1536,6 +1546,7 @@ OpenEncodingFileChannel(
 
     if ((NULL == chan) && (interp != NULL)) {
 	Tcl_AppendResult(interp, "unknown encoding \"", name, "\"", NULL);
+	Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "ENCODING", name, NULL);
     }
     Tcl_DecrRefCount(fileNameObj);
     Tcl_DecrRefCount(nameObj);
@@ -2132,9 +2143,9 @@ UtfIntToUtfExtProc(
  *
  * UtfExtToUtfIntProc --
  *
- *	Convert from UTF-8 to UTF-8 while converting null-bytes from
- *	the official representation (0x00) to Tcl's internal
- *	representation (0xc0, 0x80). See UtfToUtfProc for details.
+ *	Convert from UTF-8 to UTF-8 while converting null-bytes from the
+ *	official representation (0x00) to Tcl's internal representation (0xc0,
+ *	0x80). See UtfToUtfProc for details.
  *
  * Results:
  *	Returns TCL_OK if conversion was successful.
@@ -2703,6 +2714,190 @@ TableFromUtfProc(
 }
 
 /*
+ *-------------------------------------------------------------------------
+ *
+ * Iso88591ToUtfProc --
+ *
+ *	Convert from the "iso8859-1" encoding into UTF-8.
+ *
+ * Results:
+ *	Returns TCL_OK if conversion was successful.
+ *
+ * Side effects:
+ *	None.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+static int
+Iso88591ToUtfProc(
+    ClientData clientData,	/* Ignored. */
+    CONST char *src,		/* Source string in specified encoding. */
+    int srcLen,			/* Source string length in bytes. */
+    int flags,			/* Conversion control flags. */
+    Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
+				 * information used during a piecewise
+				 * conversion. Contents of statePtr are
+				 * initialized and/or reset by conversion
+				 * routine under control of flags argument. */
+    char *dst,			/* Output buffer in which converted string is
+				 * stored. */
+    int dstLen,			/* The maximum length of output buffer in
+				 * bytes. */
+    int *srcReadPtr,		/* Filled with the number of bytes from the
+				 * source string that were converted. This may
+				 * be less than the original source length if
+				 * there was a problem converting some source
+				 * characters. */
+    int *dstWrotePtr,		/* Filled with the number of bytes that were
+				 * stored in the output buffer as a result of
+				 * the conversion. */
+    int *dstCharsPtr)		/* Filled with the number of characters that
+				 * correspond to the bytes stored in the
+				 * output buffer. */
+{
+    CONST char *srcStart, *srcEnd;
+    char *dstEnd, *dstStart;
+    int result, numChars;
+
+    srcStart = src;
+    srcEnd = src + srcLen;
+
+    dstStart = dst;
+    dstEnd = dst + dstLen - TCL_UTF_MAX;
+
+    result = TCL_OK;
+    for (numChars = 0; src < srcEnd; numChars++) {
+	Tcl_UniChar ch;
+
+	if (dst > dstEnd) {
+	    result = TCL_CONVERT_NOSPACE;
+	    break;
+	}
+	ch = (Tcl_UniChar) *((unsigned char *) src);
+	/*
+	 * Special case for 1-byte utf chars for speed.
+	 */
+	if (ch && ch < 0x80) {
+	    *dst++ = (char) ch;
+	} else {
+	    dst += Tcl_UniCharToUtf(ch, dst);
+	}
+	src++;
+    }
+
+    *srcReadPtr = src - srcStart;
+    *dstWrotePtr = dst - dstStart;
+    *dstCharsPtr = numChars;
+    return result;
+}
+
+/*
+ *-------------------------------------------------------------------------
+ *
+ * Iso88591FromUtfProc --
+ *
+ *	Convert from UTF-8 into the encoding "iso8859-1".
+ *
+ * Results:
+ *	Returns TCL_OK if conversion was successful.
+ *
+ * Side effects:
+ *	None.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+static int
+Iso88591FromUtfProc(
+    ClientData clientData,	/* Ignored. */
+    CONST char *src,		/* Source string in UTF-8. */
+    int srcLen,			/* Source string length in bytes. */
+    int flags,			/* Conversion control flags. */
+    Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
+				 * information used during a piecewise
+				 * conversion. Contents of statePtr are
+				 * initialized and/or reset by conversion
+				 * routine under control of flags argument. */
+    char *dst,			/* Output buffer in which converted string is
+				 * stored. */
+    int dstLen,			/* The maximum length of output buffer in
+				 * bytes. */
+    int *srcReadPtr,		/* Filled with the number of bytes from the
+				 * source string that were converted. This may
+				 * be less than the original source length if
+				 * there was a problem converting some source
+				 * characters. */
+    int *dstWrotePtr,		/* Filled with the number of bytes that were
+				 * stored in the output buffer as a result of
+				 * the conversion. */
+    int *dstCharsPtr)		/* Filled with the number of characters that
+				 * correspond to the bytes stored in the
+				 * output buffer. */
+{
+    CONST char *srcStart, *srcEnd, *srcClose;
+    char *dstStart, *dstEnd;
+    int result, numChars;
+
+    result = TCL_OK;
+
+    srcStart = src;
+    srcEnd = src + srcLen;
+    srcClose = srcEnd;
+    if ((flags & TCL_ENCODING_END) == 0) {
+	srcClose -= TCL_UTF_MAX;
+    }
+
+    dstStart = dst;
+    dstEnd = dst + dstLen - 1;
+
+    for (numChars = 0; src < srcEnd; numChars++) {
+	Tcl_UniChar ch;
+	int len;
+
+	if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
+	    /*
+	     * If there is more string to follow, this will ensure that the
+	     * last UTF-8 character in the source buffer hasn't been cut off.
+	     */
+
+	    result = TCL_CONVERT_MULTIBYTE;
+	    break;
+	}
+	len = TclUtfToUniChar(src, &ch);
+
+	/*
+	 * Check for illegal characters.
+	 */
+
+	if (ch > 0xff) {
+	    if (flags & TCL_ENCODING_STOPONERROR) {
+		result = TCL_CONVERT_UNKNOWN;
+		break;
+	    }
+
+	    /*
+	     * Plunge on, using '?' as a fallback character.
+	     */
+
+	    ch = (Tcl_UniChar) '?';
+	}
+
+	if (dst > dstEnd) {
+	    result = TCL_CONVERT_NOSPACE;
+	    break;
+	}
+	*(dst++) = (char) ch;
+	src += len;
+    }
+
+    *srcReadPtr = src - srcStart;
+    *dstWrotePtr = dst - dstStart;
+    *dstCharsPtr = numChars;
+    return result;
+}
+
+/*
  *---------------------------------------------------------------------------
  *
  * TableFreeProc --
@@ -3235,7 +3430,8 @@ GetTableEncoding(
     if (encodingPtr == NULL) {
 	encodingPtr = (Encoding *) Tcl_GetEncoding(NULL, subTablePtr->name);
 	if ((encodingPtr == NULL)
-		|| (encodingPtr->toUtfProc != TableToUtfProc)) {
+		|| (encodingPtr->toUtfProc != TableToUtfProc
+		&& encodingPtr->toUtfProc != Iso88591ToUtfProc)) {
 	    Tcl_Panic("EscapeToUtfProc: invalid sub table");
 	}
 	subTablePtr->encodingPtr = encodingPtr;
@@ -3350,3 +3546,4 @@ InitializeEncodingSearchPath(
  * fill-column: 78
  * End:
  */
+
-- 
cgit v0.12