summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorvincentdarley <vincentdarley>2002-07-11 17:42:20 (GMT)
committervincentdarley <vincentdarley>2002-07-11 17:42:20 (GMT)
commit512babc0460682cb0d6921ef9e06e709638942bc (patch)
tree53e69a17a37f4b18c7cca9ab30724f9350253151
parent3fee670effbf74c4f52c8b227dacca0be2a67404 (diff)
downloadtcl-512babc0460682cb0d6921ef9e06e709638942bc.zip
tcl-512babc0460682cb0d6921ef9e06e709638942bc.tar.gz
tcl-512babc0460682cb0d6921ef9e06e709638942bc.tar.bz2
file normalization on win 95/98
-rw-r--r--ChangeLog9
-rw-r--r--doc/file.n34
-rw-r--r--win/tclWinFile.c312
3 files changed, 130 insertions, 225 deletions
diff --git a/ChangeLog b/ChangeLog
index e60b503..5499298 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2002-07-11 Vince Darley <vincentdarley@users.sourceforge.net>
+
+ * doc/file.n:
+ * win/tclWinFile.c: on Win 95/98/ME the long form of the path
+ is used as a normalized form. This is required because short
+ forms are not a robust representation. The file normalization
+ function has been sped up, but more performance gains might be
+ possible, if speed is still an issue on these platforms.
+
2002-07-11 Don Porter <dgp@users.sourceforge.net>
* doc/Hash.3: Overlooked CONST documentation update.
diff --git a/doc/file.n b/doc/file.n
index cf558ec..2d21150 100644
--- a/doc/file.n
+++ b/doc/file.n
@@ -5,7 +5,7 @@
'\" See the file "license.terms" for information on usage and redistribution
'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
'\"
-'\" RCS: @(#) $Id: file.n,v 1.20 2002/06/21 14:22:28 vincentdarley Exp $
+'\" RCS: @(#) $Id: file.n,v 1.21 2002/07/11 17:42:20 vincentdarley Exp $
'\"
.so man.macros
.TH file n 8.3 Tcl "Tcl Built-In Commands"
@@ -258,25 +258,19 @@ under Windows or AppleScript on the Macintosh.
\fBfile normalize \fIname\fR
.
.RS
-Returns a unique normalised path representation for the file-system
-object (file, directory, link, etc), whose string value can be used as
-a unique identifier for it. A normalized path is one which has all '../', './'
-removed. Also it is one which is in the ``standard'' format for the native
-platform. On MacOS, Unix, this means the segments leading up to the path
-must be free of symbolic links/aliases (but the very last path component
-may be a symbolic link), and on Windows it also means means we want the
-long form (when running Win NT/2000/XP) or the short form (when running Win
-95/98) with that form's case-dependence (which gives us a unique,
-case-dependent path). The one exception concerning the last link in the
-path is necessary, because Tcl or the user may wish to operate on the
-actual symbolic link itself (for example 'file delete', 'file rename', 'file copy'
-are defined to operate on symbolic links, not on the things that they point to).
-.PP
-Note that this means normalized paths are different on old Windows
-operating systems (95/98) and new Windows operating systems
-(NT/2000/XP). This is necessary because the APIs
-to produce a long normalized path in older operating systems are
-unfortunately very slow.
+Returns a unique normalised path representation for the file-system
+object (file, directory, link, etc), whose string value can be used as a
+unique identifier for it. A normalized path is one which has all '../',
+'./' removed. Also it is one which is in the ``standard'' format for the
+native platform. On MacOS, Unix, this means the segments leading up to
+the path must be free of symbolic links/aliases (but the very last path
+component may be a symbolic link), and on Windows it also means means we
+want the long form with that form's case-dependence (which gives us a
+unique, case-dependent path). The one exception concerning the last link
+in the path is necessary, because Tcl or the user may wish to operate on
+the actual symbolic link itself (for example 'file delete', 'file
+rename', 'file copy' are defined to operate on symbolic links, not on the
+things that they point to).
.RE
.TP
\fBfile owned \fIname\fR
diff --git a/win/tclWinFile.c b/win/tclWinFile.c
index 8c34b24..18b5555 100644
--- a/win/tclWinFile.c
+++ b/win/tclWinFile.c
@@ -11,7 +11,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclWinFile.c,v 1.33 2002/06/21 14:22:29 vincentdarley Exp $
+ * RCS: @(#) $Id: tclWinFile.c,v 1.34 2002/07/11 17:42:20 vincentdarley Exp $
*/
//#define _WIN32_WINNT 0x0500
@@ -1956,88 +1956,6 @@ TclpFilesystemPathType(pathObjPtr)
}
-#if 0
-/*
- * This function could be thoroughly tested and then substituted in
- * below to speed up file normalization on Windows NT/2000/XP
- */
-
-void WinGetLongPathName(CONST TCHAR* origPath, Tcl_DString *dsPtr);
-
-#define IsDirSep(a) (a == '/' || a == '\\')
-
-void WinGetLongPathName(CONST TCHAR* pszOriginal, Tcl_DString *dsPtr) {
- TCHAR szResult[_MAX_PATH * 2 + 1];
-
- TCHAR* pchResult = szResult;
- const TCHAR* pchScan = pszOriginal;
- WIN32_FIND_DATA wfd;
-
- /* Do Drive Letter check... */
- if (pchScan[0] && pchScan[1] == ':') {
- /* Copy drive letter and colon, ensuring drive is upper case. */
- char drive = *pchScan++;
- *pchResult++ = (drive < 97 ? drive : drive - 32);
- *pchResult++ = *pchScan++;
- } else if (IsDirSep(pchScan[0]) && IsDirSep(pchScan[1])) {
- /* Copy \\ and machine name. */
- *pchResult++ = *pchScan++;
- *pchResult++ = *pchScan++;
- while (*pchScan && !IsDirSep(*pchScan)) {
- *pchResult++ = *pchScan++;
- }
- /*
- * Note that the code below will fail since FindFirstFile
- * on a UNC path seems not to work on directory name searches?
- */
- }
-
- if (!IsDirSep(*pchScan)) {
- while ((*pchResult++ = *pchScan++) != '\0');
- } else {
- /* Now loop through directories and files... */
- while (IsDirSep(*pchScan)) {
- char* pchReplace;
- const TCHAR* pchEnd;
- HANDLE hFind;
-
- *pchResult++ = *pchScan++;
- pchReplace = pchResult;
-
- pchEnd = pchScan;
- while (*pchEnd && !IsDirSep(*pchEnd)) {
- *pchResult++ = *pchEnd++;
- }
-
- *pchResult = '\0';
-
- /* Now run this through FindFirstFile... */
- hFind = FindFirstFileA(szResult, &wfd);
- if (hFind != INVALID_HANDLE_VALUE) {
- FindClose(hFind);
- strcpy(pchReplace, wfd.cFileName);
- pchResult = pchReplace + strlen(pchReplace);
- } else {
- /* Copy rest of input path & end. */
- strcat(pchResult, pchEnd);
- break;
- }
- pchScan = pchEnd;
- }
- }
- /* Copy it over */
- Tcl_ExternalToUtfDString(NULL, szResult, -1, dsPtr);
-}
-
-#endif
-
-/*
- * We have two different implementations of file normalization which
- * can be turned on or off here. They should both agree for all files,
- * and timings show the 'TCLWIN_NEW_NORM' version is about 10% faster.
- */
-#define TCLWIN_NEW_NORM
-
/*
*---------------------------------------------------------------------------
*
@@ -2068,103 +1986,136 @@ TclpObjNormalizePath(interp, pathPtr, nextCheckpoint)
int nextCheckpoint;
{
char *lastValidPathEnd = NULL;
- Tcl_DString ds;
- int pathLen;
-
- char *path = Tcl_GetStringFromObj(pathPtr, &pathLen);
+ char *path = Tcl_GetString(pathPtr);
if (TclWinGetPlatformId() == VER_PLATFORM_WIN32_WINDOWS) {
- Tcl_DString eDs;
- char *nativePath;
- int nativeLen;
-
- Tcl_UtfToExternalDString(NULL, path, -1, &ds);
- nativePath = Tcl_DStringValue(&ds);
- nativeLen = Tcl_DStringLength(&ds);
+ /*
+ * We're on Win95, 98 or ME. There are two assumptions
+ * in this block of code. First that the native (NULL)
+ * encoding is basically ascii, and second that symbolic
+ * links are not possible. Both of these assumptions
+ * appear to be true of these operating systems.
+ */
+ char *currentPathEndPosition;
+ Tcl_Obj *temp = NULL;
+ int isDrive = 1;
+ Tcl_DString ds;
+ /* This will hold the normalized string */
+ Tcl_DString dsNorm;
+ Tcl_DStringInit(&dsNorm);
- /* We're on Windows 95/98 */
- lastValidPathEnd = nativePath + Tcl_DStringLength(&ds);
-
+ currentPathEndPosition = path + nextCheckpoint;
while (1) {
- DWORD res = GetShortPathNameA(nativePath, nativePath, 1+nativeLen);
- if (res != 0) {
- /* We found an ok path */
- break;
- }
- /* Undo the null-termination we put in before */
- if (lastValidPathEnd != (nativePath + nativeLen)) {
- *lastValidPathEnd = '/';
- }
- /*
- * The path doesn't exist. Back up the path, one component
- * (directory/file) at a time, until one does exist.
- */
- while (1) {
- char cur;
- lastValidPathEnd--;
- if (lastValidPathEnd == nativePath) {
- /* We didn't accept any of the path */
- Tcl_DStringFree(&ds);
- return nextCheckpoint;
+ char cur = *currentPathEndPosition;
+ if ((cur == '/' || cur == 0) && (path != currentPathEndPosition)) {
+ /* Reached directory separator, or end of string */
+ CONST char *nativePath = Tcl_UtfToExternalDString(NULL, path,
+ currentPathEndPosition - path, &ds);
+
+ /*
+ * Now we convert the tail of the current path to its
+ * 'long form', and append it to 'dsNorm' which holds
+ * the current normalized path, if the file exists.
+ */
+ if (isDrive) {
+ if (GetFileAttributesA(nativePath)
+ == 0xffffffff) {
+ /* File doesn't exist */
+ Tcl_DStringFree(&ds);
+ break;
+ }
+ if (nativePath[0] >= 'a') {
+ ((char*)nativePath)[0] -= ('a' - 'A');
+ }
+ Tcl_DStringAppend(&dsNorm,nativePath,Tcl_DStringLength(&ds));
+ } else {
+ WIN32_FIND_DATA fData;
+ HANDLE handle;
+
+ handle = FindFirstFileA(nativePath, &fData);
+ if (handle == INVALID_HANDLE_VALUE) {
+ if (GetFileAttributesA(nativePath)
+ == 0xffffffff) {
+ /* File doesn't exist */
+ Tcl_DStringFree(&ds);
+ break;
+ }
+ /* This is usually the '/' in 'c:/' at end of string */
+ Tcl_DStringAppend(&dsNorm,"/", 1);
+ } else {
+ char *nativeName;
+ if (fData.cFileName[0] != '\0') {
+ nativeName = fData.cFileName;
+ } else {
+ nativeName = fData.cAlternateFileName;
+ }
+ FindClose(handle);
+ Tcl_DStringAppend(&dsNorm,"/", 1);
+ Tcl_DStringAppend(&dsNorm,nativeName,-1);
+ }
}
- cur = *(lastValidPathEnd);
- if (cur == '/' || cur == '\\') {
- /* Reached directory separator */
+ Tcl_DStringFree(&ds);
+ lastValidPathEnd = currentPathEndPosition;
+ if (cur == 0) {
break;
}
+ /*
+ * If we get here, we've got past one directory
+ * delimiter, so we know it is no longer a drive
+ */
+ isDrive = 0;
}
- /* Temporarily terminate the string */
- *lastValidPathEnd = '\0';
+ currentPathEndPosition++;
}
- /*
- * If we get here, we found a valid path, which we've converted
- * to short form, and the valid string ends at or before
- * 'lastValidPathEnd' and the invalid string starts at
- * 'lastValidPathEnd'.
- */
-
- /* Copy over the valid part of the path and find its length */
- Tcl_ExternalToUtfDString(NULL, nativePath, -1, &eDs);
- path = Tcl_DStringValue(&eDs);
- if (path[1] == ':') {
- if (path[0] >= 'a' && path[0] <= 'z') {
- /* Make uppercase */
- path[0] -= 32;
+ nextCheckpoint = currentPathEndPosition - path;
+
+ if (lastValidPathEnd != NULL) {
+ /*
+ * Concatenate the normalized string in dsNorm with the
+ * tail of the path which we didn't recognise. The
+ * string in dsNorm is in the native encoding, so we
+ * have to convert it to Utf.
+ */
+ Tcl_DString dsTemp;
+ Tcl_ExternalToUtfDString(NULL, Tcl_DStringValue(&dsNorm),
+ Tcl_DStringLength(&dsNorm), &dsTemp);
+ nextCheckpoint = Tcl_DStringLength(&dsTemp);
+ if (*lastValidPathEnd != 0) {
+ /* Not the end of the string */
+ int len;
+ char *path;
+ Tcl_Obj *tmpPathPtr;
+ tmpPathPtr = Tcl_NewStringObj(Tcl_DStringValue(&dsTemp),
+ nextCheckpoint);
+ Tcl_AppendToObj(tmpPathPtr, lastValidPathEnd, -1);
+ path = Tcl_GetStringFromObj(tmpPathPtr, &len);
+ Tcl_SetStringObj(pathPtr, path, len);
+ Tcl_DecrRefCount(tmpPathPtr);
+ } else {
+ /* End of string was reached above */
+ Tcl_SetStringObj(pathPtr, Tcl_DStringValue(&dsTemp),
+ nextCheckpoint);
}
+ Tcl_DStringFree(&dsTemp);
}
- nextCheckpoint = Tcl_DStringLength(&eDs);
- Tcl_SetStringObj(pathPtr, path, Tcl_DStringLength(&eDs));
- Tcl_DStringFree(&eDs);
- if (lastValidPathEnd != (nativePath + nativeLen)) {
- CONST char *tmp;
- *lastValidPathEnd = '/';
- /* Now copy over the invalid (i.e. non-existent) part of the path */
- tmp = Tcl_ExternalToUtfDString(NULL, lastValidPathEnd, -1, &eDs);
- Tcl_AppendToObj(pathPtr, tmp, Tcl_DStringLength(&eDs));
- Tcl_DStringFree(&eDs);
- }
- Tcl_DStringFree(&ds);
+ Tcl_DStringFree(&dsNorm);
} else {
/* We're on WinNT or 2000 or XP */
- CONST char *nativePath;
char *currentPathEndPosition;
Tcl_Obj *temp = NULL;
- WIN32_FILE_ATTRIBUTE_DATA data;
int isDrive = 1;
-#ifdef TCLWIN_NEW_NORM
+ Tcl_DString ds;
/* This will hold the normalized string */
Tcl_DString dsNorm;
Tcl_DStringInit(&dsNorm);
-#endif
- nativePath = Tcl_WinUtfToTChar(path, -1, &ds);
- Tcl_DStringFree(&ds);
currentPathEndPosition = path + nextCheckpoint;
while (1) {
char cur = *currentPathEndPosition;
if ((cur == '/' || cur == 0) && (path != currentPathEndPosition)) {
/* Reached directory separator, or end of string */
- nativePath = Tcl_WinUtfToTChar(path,
+ WIN32_FILE_ATTRIBUTE_DATA data;
+ CONST char *nativePath = Tcl_WinUtfToTChar(path,
currentPathEndPosition - path, &ds);
if ((*tclWinProcs->getFileAttributesExProc)(nativePath,
GetFileExInfoStandard, &data) != TRUE) {
@@ -2192,7 +2143,7 @@ TclpObjNormalizePath(interp, pathPtr, nextCheckpoint)
Tcl_Obj *to = WinReadLinkDirectory(nativePath);
if (to != NULL) {
/* Read the reparse point ok */
- Tcl_GetStringFromObj(to, &pathLen);
+ /* Tcl_GetStringFromObj(to, &pathLen); */
nextCheckpoint = 0; /* pathLen */
Tcl_AppendToObj(to, currentPathEndPosition, -1);
/* Convert link to forward slashes */
@@ -2207,15 +2158,12 @@ TclpObjNormalizePath(interp, pathPtr, nextCheckpoint)
temp = to;
/* Reset variables so we can restart normalization */
isDrive = 1;
-#ifdef TCLWIN_NEW_NORM
Tcl_DStringFree(&dsNorm);
Tcl_DStringInit(&dsNorm);
-#endif
Tcl_DStringFree(&ds);
continue;
}
}
-#ifdef TCLWIN_NEW_NORM
/*
* Now we convert the tail of the current path to its
* 'long form', and append it to 'dsNorm' which holds
@@ -2251,7 +2199,6 @@ TclpObjNormalizePath(interp, pathPtr, nextCheckpoint)
wcslen(nativeName)*sizeof(WCHAR));
}
}
-#endif
Tcl_DStringFree(&ds);
lastValidPathEnd = currentPathEndPosition;
if (cur == 0) {
@@ -2268,7 +2215,6 @@ TclpObjNormalizePath(interp, pathPtr, nextCheckpoint)
nextCheckpoint = currentPathEndPosition - path;
if (lastValidPathEnd != NULL) {
-#ifdef TCLWIN_NEW_NORM
/*
* Concatenate the normalized string in dsNorm with the
* tail of the path which we didn't recognise. The
@@ -2296,52 +2242,8 @@ TclpObjNormalizePath(interp, pathPtr, nextCheckpoint)
nextCheckpoint);
}
Tcl_DStringFree(&dsTemp);
-#else
- /*
- * The leading end of the path description was acceptable to
- * us. We therefore convert it to its long form (which is
- * used by Tcl as a unique normalized form), and return
- * that.
- */
- int endOfString;
- Tcl_Obj *tmpPathPtr;
- Tcl_Obj* objPtr = NULL;
- int useLength = lastValidPathEnd - path;
- if (*lastValidPathEnd == 0) {
- tmpPathPtr = Tcl_NewStringObj(path, useLength);
- endOfString = 1;
- } else {
- tmpPathPtr = Tcl_NewStringObj(path, useLength + 1);
- endOfString = 0;
- }
- /*
- * If this returns an error, we have a strange situation; the
- * file exists, but we can't get its long name. We will have
- * to assume the name we have is ok.
- */
- Tcl_IncrRefCount(tmpPathPtr);
- if (ConvertFileNameFormat(interp, 0, tmpPathPtr, 1, &objPtr)
- == TCL_OK) {
- int len;
- CONST char* converted = Tcl_GetStringFromObj(objPtr,&len);
- if (!endOfString) {
- if (converted[len-1] == '/') {
- lastValidPathEnd++;
- }
- /* Be nice and fix the string before we clear it */
- Tcl_AppendToObj(objPtr, lastValidPathEnd, -1);
- }
- nextCheckpoint += (len - useLength);
- path = Tcl_GetStringFromObj(objPtr,&len);
- Tcl_SetStringObj(pathPtr,path, len);
- Tcl_DecrRefCount(objPtr);
- }
- Tcl_DecrRefCount(tmpPathPtr);
-#endif
}
-#ifdef TCLWIN_NEW_NORM
Tcl_DStringFree(&dsNorm);
-#endif
}
return nextCheckpoint;
}