summaryrefslogtreecommitdiffstats
path: root/Python/fileutils.c
diff options
context:
space:
mode:
authorNice Zombies <nineteendo19d0@gmail.com>2024-04-25 09:07:38 (GMT)
committerGitHub <noreply@github.com>2024-04-25 09:07:38 (GMT)
commit10bb90ed49a81a525b126ce8e4d8564c1616d0b3 (patch)
treed75d8c65579f7e04a11abf3d04bdc64fd8cb1b3e /Python/fileutils.c
parente38b43c213a8ab2ad9748bac2732af9b58c816ae (diff)
downloadcpython-10bb90ed49a81a525b126ce8e4d8564c1616d0b3.zip
cpython-10bb90ed49a81a525b126ce8e4d8564c1616d0b3.tar.gz
cpython-10bb90ed49a81a525b126ce8e4d8564c1616d0b3.tar.bz2
gh-102511: Speed up os.path.splitroot() with native helpers (GH-118089)
Diffstat (limited to 'Python/fileutils.c')
-rw-r--r--Python/fileutils.c147
1 files changed, 115 insertions, 32 deletions
diff --git a/Python/fileutils.c b/Python/fileutils.c
index 882d329..54853ba 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -2295,6 +2295,99 @@ PathCchCombineEx(wchar_t *buffer, size_t bufsize, const wchar_t *dirname,
#endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */
+void
+_Py_skiproot(const wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize,
+ Py_ssize_t *rootsize)
+{
+ assert(drvsize);
+ assert(rootsize);
+#ifndef MS_WINDOWS
+#define IS_SEP(x) (*(x) == SEP)
+ *drvsize = 0;
+ if (!IS_SEP(&path[0])) {
+ // Relative path, e.g.: 'foo'
+ *rootsize = 0;
+ }
+ else if (!IS_SEP(&path[1]) || IS_SEP(&path[2])) {
+ // Absolute path, e.g.: '/foo', '///foo', '////foo', etc.
+ *rootsize = 1;
+ }
+ else {
+ // Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see
+ // https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13
+ *rootsize = 2;
+ }
+#undef IS_SEP
+#else
+ const wchar_t *pEnd = size >= 0 ? &path[size] : NULL;
+#define IS_END(x) (pEnd ? (x) == pEnd : !*(x))
+#define IS_SEP(x) (*(x) == SEP || *(x) == ALTSEP)
+#define SEP_OR_END(x) (IS_SEP(x) || IS_END(x))
+ if (IS_SEP(&path[0])) {
+ if (IS_SEP(&path[1])) {
+ // Device drives, e.g. \\.\device or \\?\device
+ // UNC drives, e.g. \\server\share or \\?\UNC\server\share
+ Py_ssize_t idx;
+ if (path[2] == L'?' && IS_SEP(&path[3]) &&
+ (path[4] == L'U' || path[4] == L'u') &&
+ (path[5] == L'N' || path[5] == L'n') &&
+ (path[6] == L'C' || path[6] == L'c') &&
+ IS_SEP(&path[7]))
+ {
+ idx = 8;
+ }
+ else {
+ idx = 2;
+ }
+ while (!SEP_OR_END(&path[idx])) {
+ idx++;
+ }
+ if (IS_END(&path[idx])) {
+ *drvsize = idx;
+ *rootsize = 0;
+ }
+ else {
+ idx++;
+ while (!SEP_OR_END(&path[idx])) {
+ idx++;
+ }
+ *drvsize = idx;
+ if (IS_END(&path[idx])) {
+ *rootsize = 0;
+ }
+ else {
+ *rootsize = 1;
+ }
+ }
+ }
+ else {
+ // Relative path with root, e.g. \Windows
+ *drvsize = 0;
+ *rootsize = 1;
+ }
+ }
+ else if (!IS_END(&path[0]) && path[1] == L':') {
+ *drvsize = 2;
+ if (IS_SEP(&path[2])) {
+ // Absolute drive-letter path, e.g. X:\Windows
+ *rootsize = 1;
+ }
+ else {
+ // Relative path with drive, e.g. X:Windows
+ *rootsize = 0;
+ }
+ }
+ else {
+ // Relative path, e.g. Windows
+ *drvsize = 0;
+ *rootsize = 0;
+ }
+#undef SEP_OR_END
+#undef IS_SEP
+#undef IS_END
+#endif
+}
+
// The caller must ensure "buffer" is big enough.
static int
join_relfile(wchar_t *buffer, size_t bufsize,
@@ -2411,49 +2504,39 @@ _Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *normsize)
#endif
#define SEP_OR_END(x) (IS_SEP(x) || IS_END(x))
- // Skip leading '.\'
if (p1[0] == L'.' && IS_SEP(&p1[1])) {
+ // Skip leading '.\'
path = &path[2];
- while (IS_SEP(path) && !IS_END(path)) {
+ while (IS_SEP(path)) {
path++;
}
p1 = p2 = minP2 = path;
lastC = SEP;
}
+ else {
+ Py_ssize_t drvsize, rootsize;
+ _Py_skiproot(path, size, &drvsize, &rootsize);
+ if (drvsize || rootsize) {
+ // Skip past root and update minP2
+ p1 = &path[drvsize + rootsize];
+#ifndef ALTSEP
+ p2 = p1;
+#else
+ for (; p2 < p1; ++p2) {
+ if (*p2 == ALTSEP) {
+ *p2 = SEP;
+ }
+ }
+#endif
+ minP2 = p2 - 1;
+ lastC = *minP2;
#ifdef MS_WINDOWS
- // Skip past drive segment and update minP2
- else if (p1[0] && p1[1] == L':') {
- *p2++ = *p1++;
- *p2++ = *p1++;
- minP2 = p2;
- lastC = L':';
- }
- // Skip past all \\-prefixed paths, including \\?\, \\.\,
- // and network paths, including the first segment.
- else if (IS_SEP(&p1[0]) && IS_SEP(&p1[1])) {
- int sepCount = 2;
- *p2++ = SEP;
- *p2++ = SEP;
- p1 += 2;
- for (; !IS_END(p1) && sepCount; ++p1) {
- if (IS_SEP(p1)) {
- --sepCount;
- *p2++ = lastC = SEP;
- } else {
- *p2++ = lastC = *p1;
+ if (lastC != SEP) {
+ minP2++;
}
+#endif
}
- minP2 = p2 - 1;
- }
-#else
- // Skip past two leading SEPs
- else if (IS_SEP(&p1[0]) && IS_SEP(&p1[1]) && !IS_SEP(&p1[2])) {
- *p2++ = *p1++;
- *p2++ = *p1++;
- minP2 = p2 - 1; // Absolute path has SEP at minP2
- lastC = SEP;
}
-#endif /* MS_WINDOWS */
/* if pEnd is specified, check that. Else, check for null terminator */
for (; !IS_END(p1); ++p1) {