From 512babc0460682cb0d6921ef9e06e709638942bc Mon Sep 17 00:00:00 2001
From: vincentdarley <vincentdarley>
Date: Thu, 11 Jul 2002 17:42:20 +0000
Subject: file normalization on win 95/98

---
 ChangeLog        |   9 ++
 doc/file.n       |  34 +++---
 win/tclWinFile.c | 312 +++++++++++++++++++------------------------------------
 3 files changed, 130 insertions(+), 225 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index e60b503..5499298 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2002-07-11  Vince Darley  <vincentdarley@users.sourceforge.net>
+
+	* doc/file.n:
+	* win/tclWinFile.c: on Win 95/98/ME the long form of the path
+	is used as a normalized form.  This is required because short
+	forms are not a robust representation.  The file normalization
+	function has been sped up, but more performance gains might be
+	possible, if speed is still an issue on these platforms.
+
 2002-07-11  Don Porter  <dgp@users.sourceforge.net>
 
 	* doc/Hash.3: Overlooked CONST documentation update.
diff --git a/doc/file.n b/doc/file.n
index cf558ec..2d21150 100644
--- a/doc/file.n
+++ b/doc/file.n
@@ -5,7 +5,7 @@
 '\" See the file "license.terms" for information on usage and redistribution
 '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
 '\" 
-'\" RCS: @(#) $Id: file.n,v 1.20 2002/06/21 14:22:28 vincentdarley Exp $
+'\" RCS: @(#) $Id: file.n,v 1.21 2002/07/11 17:42:20 vincentdarley Exp $
 '\" 
 .so man.macros
 .TH file n 8.3 Tcl "Tcl Built-In Commands"
@@ -258,25 +258,19 @@ under Windows or AppleScript on the Macintosh.
 \fBfile normalize \fIname\fR
 .
 .RS
-Returns a unique normalised path representation for the file-system 
-object (file, directory, link, etc), whose string value can be used as 
-a unique identifier for it.  A normalized path is one which has all '../', './' 
-removed.  Also it is one which is in the ``standard'' format for the native 
-platform.  On MacOS, Unix, this means the segments leading up to the path 
-must be free of symbolic links/aliases (but the very last path component 
-may be a symbolic link), and on Windows it also means means we want the
-long form (when running Win NT/2000/XP) or the short form (when running Win
-95/98) with that form's case-dependence (which gives us a unique,
-case-dependent path).  The one exception concerning the last link in the
-path is necessary, because Tcl or the user may wish to operate on the
-actual symbolic link itself (for example 'file delete', 'file rename', 'file copy'
-are defined to operate on symbolic links, not on the things that they point to).
-.PP
-Note that this means normalized paths are different on old Windows
-operating systems (95/98) and new Windows operating systems
-(NT/2000/XP).  This is necessary because the APIs  
-to produce a long normalized path in older operating systems are 
-unfortunately very slow.
+Returns a unique normalised path representation for the file-system
+object (file, directory, link, etc), whose string value can be used as a
+unique identifier for it.  A normalized path is one which has all '../',
+'./' removed.  Also it is one which is in the ``standard'' format for the
+native platform.  On MacOS, Unix, this means the segments leading up to
+the path must be free of symbolic links/aliases (but the very last path
+component may be a symbolic link), and on Windows it also means means we
+want the long form with that form's case-dependence (which gives us a
+unique, case-dependent path).  The one exception concerning the last link
+in the path is necessary, because Tcl or the user may wish to operate on
+the actual symbolic link itself (for example 'file delete', 'file
+rename', 'file copy' are defined to operate on symbolic links, not on the
+things that they point to).
 .RE
 .TP
 \fBfile owned \fIname\fR 
diff --git a/win/tclWinFile.c b/win/tclWinFile.c
index 8c34b24..18b5555 100644
--- a/win/tclWinFile.c
+++ b/win/tclWinFile.c
@@ -11,7 +11,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tclWinFile.c,v 1.33 2002/06/21 14:22:29 vincentdarley Exp $
+ * RCS: @(#) $Id: tclWinFile.c,v 1.34 2002/07/11 17:42:20 vincentdarley Exp $
  */
 
 //#define _WIN32_WINNT  0x0500
@@ -1956,88 +1956,6 @@ TclpFilesystemPathType(pathObjPtr)
 }
 
 
-#if 0
-/* 
- * This function could be thoroughly tested and then substituted in
- * below to speed up file normalization on Windows NT/2000/XP
- */
-
-void WinGetLongPathName(CONST TCHAR* origPath, Tcl_DString *dsPtr);
-
-#define IsDirSep(a) (a == '/' || a == '\\')
-
-void WinGetLongPathName(CONST TCHAR* pszOriginal, Tcl_DString *dsPtr) {
-    TCHAR szResult[_MAX_PATH * 2 + 1];		
-    
-    TCHAR* pchResult = szResult;
-    const TCHAR* pchScan = pszOriginal;
-    WIN32_FIND_DATA wfd;
-    
-    /* Do Drive Letter check... */
-    if (pchScan[0] && pchScan[1] == ':') {
-	/* Copy drive letter and colon, ensuring drive is upper case. */
-	char drive = *pchScan++;
-	*pchResult++ = (drive < 97 ? drive : drive - 32);
-	*pchResult++ = *pchScan++;
-    } else if (IsDirSep(pchScan[0]) && IsDirSep(pchScan[1])) {
-	/* Copy \\ and machine name. */
-	*pchResult++ = *pchScan++;
-	*pchResult++ = *pchScan++;
-	while (*pchScan && !IsDirSep(*pchScan)) {
-	    *pchResult++ = *pchScan++;
-	}
-	/* 
-	 * Note that the code below will fail since FindFirstFile
-	 * on a UNC path seems not to work on directory name searches?
-	 */
-    }
-  
-    if (!IsDirSep(*pchScan)) {
-	while ((*pchResult++ = *pchScan++) != '\0');
-    } else {
-	/* Now loop through directories and files... */
-	while (IsDirSep(*pchScan)) {
-	    char* pchReplace;
-	    const TCHAR* pchEnd;
-	    HANDLE hFind;
-	    
-	    *pchResult++ = *pchScan++;
-	    pchReplace = pchResult;
-	    
-	    pchEnd = pchScan;
-	    while (*pchEnd && !IsDirSep(*pchEnd)) {
-		*pchResult++ = *pchEnd++;
-	    }
-	    
-	    *pchResult = '\0';
-	    
-	    /* Now run this through FindFirstFile... */
-	    hFind = FindFirstFileA(szResult, &wfd);
-	    if (hFind != INVALID_HANDLE_VALUE) {
-		FindClose(hFind);
-		strcpy(pchReplace, wfd.cFileName);
-		pchResult = pchReplace + strlen(pchReplace);
-	    } else {
-		/* Copy rest of input path & end. */
-		strcat(pchResult, pchEnd);
-		break;
-	    }
-	    pchScan = pchEnd;
-	}
-    }
-    /* Copy it over */
-    Tcl_ExternalToUtfDString(NULL, szResult, -1, dsPtr);
-}
-    
-#endif
-
-/* 
- * We have two different implementations of file normalization which
- * can be turned on or off here.  They should both agree for all files,
- * and timings show the 'TCLWIN_NEW_NORM' version is about 10% faster.
- */
-#define TCLWIN_NEW_NORM
-
 /*
  *---------------------------------------------------------------------------
  *
@@ -2068,103 +1986,136 @@ TclpObjNormalizePath(interp, pathPtr, nextCheckpoint)
     int nextCheckpoint;
 {
     char *lastValidPathEnd = NULL;
-    Tcl_DString ds;
-    int pathLen;
-    
-    char *path = Tcl_GetStringFromObj(pathPtr, &pathLen);
+    char *path = Tcl_GetString(pathPtr);
 
     if (TclWinGetPlatformId() == VER_PLATFORM_WIN32_WINDOWS) {
-	Tcl_DString eDs;
-	char *nativePath;
-	int nativeLen;
-
-	Tcl_UtfToExternalDString(NULL, path, -1, &ds);
-	nativePath = Tcl_DStringValue(&ds);
-	nativeLen = Tcl_DStringLength(&ds);
+	/* 
+	 * We're on Win95, 98 or ME.  There are two assumptions
+	 * in this block of code.  First that the native (NULL)
+	 * encoding is basically ascii, and second that symbolic
+	 * links are not possible.  Both of these assumptions
+	 * appear to be true of these operating systems.
+	 */
+	char *currentPathEndPosition;
+	Tcl_Obj *temp = NULL;
+	int isDrive = 1;
+	Tcl_DString ds;
+	/* This will hold the normalized string */
+	Tcl_DString dsNorm;
+	Tcl_DStringInit(&dsNorm);
 
-	/* We're on Windows 95/98 */
-	lastValidPathEnd = nativePath + Tcl_DStringLength(&ds);
-	
+	currentPathEndPosition = path + nextCheckpoint;
 	while (1) {
-	    DWORD res = GetShortPathNameA(nativePath, nativePath, 1+nativeLen);
-	    if (res != 0) {
-		/* We found an ok path */
-		break;
-	    }
-	    /* Undo the null-termination we put in before */
-	    if (lastValidPathEnd != (nativePath + nativeLen)) {
-		*lastValidPathEnd = '/';
-	    }
-	    /* 
-	     * The path doesn't exist.  Back up the path, one component
-	     * (directory/file) at a time, until one does exist. 
-	     */
-	    while (1) {
-		char cur;
-		lastValidPathEnd--;
-		if (lastValidPathEnd == nativePath) {
-		    /* We didn't accept any of the path */
-		    Tcl_DStringFree(&ds);
-		    return nextCheckpoint;
+	    char cur = *currentPathEndPosition;
+	    if ((cur == '/' || cur == 0) && (path != currentPathEndPosition)) {
+		/* Reached directory separator, or end of string */
+		CONST char *nativePath = Tcl_UtfToExternalDString(NULL, path, 
+			    currentPathEndPosition - path, &ds);
+
+		/*
+		 * Now we convert the tail of the current path to its
+		 * 'long form', and append it to 'dsNorm' which holds
+		 * the current normalized path, if the file exists.
+		 */
+		if (isDrive) {
+		    if (GetFileAttributesA(nativePath) 
+			== 0xffffffff) {
+			/* File doesn't exist */
+			Tcl_DStringFree(&ds);
+			break;
+		    }
+		    if (nativePath[0] >= 'a') {
+			((char*)nativePath)[0] -= ('a' - 'A');
+		    }
+		    Tcl_DStringAppend(&dsNorm,nativePath,Tcl_DStringLength(&ds));
+		} else {
+		    WIN32_FIND_DATA fData;
+		    HANDLE handle;
+		    
+		    handle = FindFirstFileA(nativePath, &fData);
+		    if (handle == INVALID_HANDLE_VALUE) {
+			if (GetFileAttributesA(nativePath) 
+			    == 0xffffffff) {
+			    /* File doesn't exist */
+			    Tcl_DStringFree(&ds);
+			    break;
+			}
+			/* This is usually the '/' in 'c:/' at end of string */
+			Tcl_DStringAppend(&dsNorm,"/", 1);
+		    } else {
+			char *nativeName;
+			if (fData.cFileName[0] != '\0') {
+			    nativeName = fData.cFileName;
+			} else {
+			    nativeName = fData.cAlternateFileName;
+			}
+			FindClose(handle);
+			Tcl_DStringAppend(&dsNorm,"/", 1);
+			Tcl_DStringAppend(&dsNorm,nativeName,-1);
+		    }
 		}
-		cur = *(lastValidPathEnd);
-		if (cur == '/' || cur == '\\') {
-		    /* Reached directory separator */
+		Tcl_DStringFree(&ds);
+		lastValidPathEnd = currentPathEndPosition;
+		if (cur == 0) {
 		    break;
 		}
+		/* 
+		 * If we get here, we've got past one directory
+		 * delimiter, so we know it is no longer a drive 
+		 */
+		isDrive = 0;
 	    }
-	    /* Temporarily terminate the string */
-	    *lastValidPathEnd = '\0';
+	    currentPathEndPosition++;
 	}
-	/* 
-	 * If we get here, we found a valid path, which we've converted
-	 * to short form, and the valid string ends at or before
-	 * 'lastValidPathEnd' and the invalid string starts at
-	 * 'lastValidPathEnd'.
-	 */
-
-	/* Copy over the valid part of the path and find its length */
-	Tcl_ExternalToUtfDString(NULL, nativePath, -1, &eDs);
-	path = Tcl_DStringValue(&eDs);
-	if (path[1] == ':') {
-	    if (path[0] >= 'a' && path[0] <= 'z') {
-		/* Make uppercase */
-		path[0] -= 32;
+	nextCheckpoint = currentPathEndPosition - path;
+	
+	if (lastValidPathEnd != NULL) {
+	    /* 
+	     * Concatenate the normalized string in dsNorm with the
+	     * tail of the path which we didn't recognise.  The
+	     * string in dsNorm is in the native encoding, so we
+	     * have to convert it to Utf.
+	     */
+	    Tcl_DString dsTemp;
+	    Tcl_ExternalToUtfDString(NULL, Tcl_DStringValue(&dsNorm), 
+			      Tcl_DStringLength(&dsNorm), &dsTemp);
+	    nextCheckpoint = Tcl_DStringLength(&dsTemp);
+	    if (*lastValidPathEnd != 0) {
+		/* Not the end of the string */
+		int len;
+		char *path;
+		Tcl_Obj *tmpPathPtr;
+		tmpPathPtr = Tcl_NewStringObj(Tcl_DStringValue(&dsTemp), 
+					      nextCheckpoint);
+		Tcl_AppendToObj(tmpPathPtr, lastValidPathEnd, -1);
+		path = Tcl_GetStringFromObj(tmpPathPtr, &len);
+		Tcl_SetStringObj(pathPtr, path, len);
+		Tcl_DecrRefCount(tmpPathPtr);
+	    } else {
+		/* End of string was reached above */
+		Tcl_SetStringObj(pathPtr, Tcl_DStringValue(&dsTemp),
+				 nextCheckpoint);
 	    }
+	    Tcl_DStringFree(&dsTemp);
 	}
-	nextCheckpoint = Tcl_DStringLength(&eDs);
-	Tcl_SetStringObj(pathPtr, path, Tcl_DStringLength(&eDs));
-	Tcl_DStringFree(&eDs);
-	if (lastValidPathEnd != (nativePath + nativeLen)) {
-	    CONST char *tmp;
-	    *lastValidPathEnd = '/';
-	    /* Now copy over the invalid (i.e. non-existent) part of the path */
-	    tmp = Tcl_ExternalToUtfDString(NULL, lastValidPathEnd, -1, &eDs);
-	    Tcl_AppendToObj(pathPtr, tmp, Tcl_DStringLength(&eDs));
-	    Tcl_DStringFree(&eDs);
-	}
-	Tcl_DStringFree(&ds);
+	Tcl_DStringFree(&dsNorm);
     } else {
 	/* We're on WinNT or 2000 or XP */
-	CONST char *nativePath;
 	char *currentPathEndPosition;
 	Tcl_Obj *temp = NULL;
-	WIN32_FILE_ATTRIBUTE_DATA data;
 	int isDrive = 1;
-#ifdef TCLWIN_NEW_NORM
+	Tcl_DString ds;
 	/* This will hold the normalized string */
 	Tcl_DString dsNorm;
 	Tcl_DStringInit(&dsNorm);
-#endif
-	nativePath = Tcl_WinUtfToTChar(path, -1, &ds);
 
-	Tcl_DStringFree(&ds);
 	currentPathEndPosition = path + nextCheckpoint;
 	while (1) {
 	    char cur = *currentPathEndPosition;
 	    if ((cur == '/' || cur == 0) && (path != currentPathEndPosition)) {
 		/* Reached directory separator, or end of string */
-		nativePath = Tcl_WinUtfToTChar(path, 
+		WIN32_FILE_ATTRIBUTE_DATA data;
+		CONST char *nativePath = Tcl_WinUtfToTChar(path, 
 			    currentPathEndPosition - path, &ds);
 		if ((*tclWinProcs->getFileAttributesExProc)(nativePath,
 		    GetFileExInfoStandard, &data) != TRUE) {
@@ -2192,7 +2143,7 @@ TclpObjNormalizePath(interp, pathPtr, nextCheckpoint)
 		    Tcl_Obj *to = WinReadLinkDirectory(nativePath);
 		    if (to != NULL) {
 			/* Read the reparse point ok */
-			Tcl_GetStringFromObj(to, &pathLen);
+			/* Tcl_GetStringFromObj(to, &pathLen); */
 			nextCheckpoint = 0; /* pathLen */
 			Tcl_AppendToObj(to, currentPathEndPosition, -1);
 			/* Convert link to forward slashes */
@@ -2207,15 +2158,12 @@ TclpObjNormalizePath(interp, pathPtr, nextCheckpoint)
 			temp = to;
 			/* Reset variables so we can restart normalization */
 			isDrive = 1;
-#ifdef TCLWIN_NEW_NORM
 			Tcl_DStringFree(&dsNorm);
 			Tcl_DStringInit(&dsNorm);
-#endif
 			Tcl_DStringFree(&ds);
 			continue;
 		    }
 		}
-#ifdef TCLWIN_NEW_NORM
 		/*
 		 * Now we convert the tail of the current path to its
 		 * 'long form', and append it to 'dsNorm' which holds
@@ -2251,7 +2199,6 @@ TclpObjNormalizePath(interp, pathPtr, nextCheckpoint)
 					  wcslen(nativeName)*sizeof(WCHAR));
 		    }
 		}
-#endif		
 		Tcl_DStringFree(&ds);
 		lastValidPathEnd = currentPathEndPosition;
 		if (cur == 0) {
@@ -2268,7 +2215,6 @@ TclpObjNormalizePath(interp, pathPtr, nextCheckpoint)
 	nextCheckpoint = currentPathEndPosition - path;
 	
 	if (lastValidPathEnd != NULL) {
-#ifdef TCLWIN_NEW_NORM
 	    /* 
 	     * Concatenate the normalized string in dsNorm with the
 	     * tail of the path which we didn't recognise.  The
@@ -2296,52 +2242,8 @@ TclpObjNormalizePath(interp, pathPtr, nextCheckpoint)
 				 nextCheckpoint);
 	    }
 	    Tcl_DStringFree(&dsTemp);
-#else
-	    /* 
-	     * The leading end of the path description was acceptable to
-	     * us.  We therefore convert it to its long form (which is
-	     * used by Tcl as a unique normalized form), and return
-	     * that.
-	     */
-	    int endOfString;
-	    Tcl_Obj *tmpPathPtr;
-	    Tcl_Obj* objPtr = NULL;
-	    int useLength = lastValidPathEnd - path;
-	    if (*lastValidPathEnd == 0) {
-		tmpPathPtr = Tcl_NewStringObj(path, useLength);
-		endOfString = 1;
-	    } else {
-		tmpPathPtr = Tcl_NewStringObj(path, useLength + 1);
-		endOfString = 0;
-	    }
-	    /* 
-	     * If this returns an error, we have a strange situation; the
-	     * file exists, but we can't get its long name.  We will have
-	     * to assume the name we have is ok.
-	     */
-	    Tcl_IncrRefCount(tmpPathPtr);
-	    if (ConvertFileNameFormat(interp, 0, tmpPathPtr, 1, &objPtr) 
-	      == TCL_OK) {
-		int len;
-		CONST char* converted = Tcl_GetStringFromObj(objPtr,&len);
-		if (!endOfString) {
-		    if (converted[len-1] == '/') {
-		        lastValidPathEnd++;
-		    }
-		    /* Be nice and fix the string before we clear it */
-		    Tcl_AppendToObj(objPtr, lastValidPathEnd, -1);
-		}
-		nextCheckpoint += (len - useLength);
-		path = Tcl_GetStringFromObj(objPtr,&len);
-		Tcl_SetStringObj(pathPtr,path, len);
-		Tcl_DecrRefCount(objPtr);
-	    }
-	    Tcl_DecrRefCount(tmpPathPtr);
-#endif
 	}
-#ifdef TCLWIN_NEW_NORM
 	Tcl_DStringFree(&dsNorm);
-#endif
     }
     return nextCheckpoint;
 }
-- 
cgit v0.12