summaryrefslogtreecommitdiffstats
path: root/Python
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2011-10-23 15:29:08 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2011-10-23 15:29:08 (GMT)
commit2db72863fb38bfe7cb10c733dfaef15316619930 (patch)
treefefb6783d553c588be693ce44ba54913b39da62b /Python
parent9715d26305f7cf6c4db91f3670f1b7e99f04dd2d (diff)
downloadcpython-2db72863fb38bfe7cb10c733dfaef15316619930.zip
cpython-2db72863fb38bfe7cb10c733dfaef15316619930.tar.gz
cpython-2db72863fb38bfe7cb10c733dfaef15316619930.tar.bz2
Reformulate make_compiled_pathname in terms of unicode objects.
Diffstat (limited to 'Python')
-rw-r--r--Python/import.c177
1 files changed, 61 insertions, 116 deletions
diff --git a/Python/import.c b/Python/import.c
index 0db7e43..1101c92 100644
--- a/Python/import.c
+++ b/Python/import.c
@@ -904,6 +904,25 @@ rightmost_sep(Py_UCS4 *s)
return found;
}
+/* Like rightmost_sep, but operate on unicode objects. */
+static Py_ssize_t
+rightmost_sep_obj(PyObject* o)
+{
+ Py_ssize_t found, i;
+ Py_UCS4 c;
+ for (found = -1, i = 0; i < PyUnicode_GET_LENGTH(o); i++) {
+ c = PyUnicode_READ_CHAR(o, i);
+ if (c == SEP
+#ifdef ALTSEP
+ || c == ALTSEP
+#endif
+ )
+ {
+ found = i;
+ }
+ }
+ return found;
+}
/* Given a pathname for a Python source file, fill a buffer with the
pathname for the corresponding compiled file. Return the pathname
@@ -915,123 +934,49 @@ rightmost_sep(Py_UCS4 *s)
static PyObject*
make_compiled_pathname(PyObject *pathstr, int debug)
{
- Py_UCS4 *pathname;
- Py_UCS4 buf[MAXPATHLEN];
- size_t buflen = (size_t)MAXPATHLEN;
- size_t len;
- size_t i, save;
- Py_UCS4 *pos;
- int sep = SEP;
-
- pathname = PyUnicode_AsUCS4Copy(pathstr);
- if (!pathname)
- return NULL;
- len = Py_UCS4_strlen(pathname);
-
- /* Sanity check that the buffer has roughly enough space to hold what
- will eventually be the full path to the compiled file. The 5 extra
- bytes include the slash afer __pycache__, the two extra dots, the
- extra trailing character ('c' or 'o') and null. This isn't exact
- because the contents of the buffer can affect how many actual
- characters of the string get into the buffer. We'll do a final
- sanity check before writing the extension to ensure we do not
- overflow the buffer.
- */
- if (len + Py_UCS4_strlen(CACHEDIR_UNICODE) + Py_UCS4_strlen(PYC_TAG_UNICODE) + 5 > buflen) {
- PyMem_Free(pathname);
- return NULL;
- }
-
- /* Find the last path separator and copy everything from the start of
- the source string up to and including the separator.
- */
- pos = rightmost_sep(pathname);
- if (pos == NULL) {
- i = 0;
- }
- else {
- sep = *pos;
- i = pos - pathname + 1;
- Py_UCS4_strncpy(buf, pathname, i);
- }
-
- save = i;
- buf[i++] = '\0';
- /* Add __pycache__/ */
- Py_UCS4_strcat(buf, CACHEDIR_UNICODE);
- i += Py_UCS4_strlen(CACHEDIR_UNICODE) - 1;
- buf[i++] = sep;
- buf[i] = '\0';
- /* Add the base filename, but remove the .py or .pyw extension, since
- the tag name must go before the extension.
- */
- Py_UCS4_strcat(buf, pathname + save);
- pos = Py_UCS4_strrchr(buf + i, '.');
- if (pos != NULL)
- *++pos = '\0';
-
- /* pathname is not used from here on. */
- PyMem_Free(pathname);
-
- Py_UCS4_strcat(buf, PYC_TAG_UNICODE);
- /* The length test above assumes that we're only adding one character
- to the end of what would normally be the extension. What if there
- is no extension, or the string ends in '.' or '.p', and otherwise
- fills the buffer? By appending 4 more characters onto the string
- here, we could overrun the buffer.
-
- As a simple example, let's say buflen=32 and the input string is
- 'xxx.py'. strlen() would be 6 and the test above would yield:
-
- (6 + 11 + 10 + 5 == 32) > 32
-
- which is false and so the name mangling would continue. This would
- be fine because we'd end up with this string in buf:
-
- __pycache__/xxx.cpython-32.pyc\0
-
- strlen(of that) == 30 + the nul fits inside a 32 character buffer.
- We can even handle an input string of say 'xxxxx' above because
- that's (5 + 11 + 10 + 5 == 31) > 32 which is also false. Name
- mangling that yields:
-
- __pycache__/xxxxxcpython-32.pyc\0
-
- which is 32 characters including the nul, and thus fits in the
- buffer. However, an input string of 'xxxxxx' would yield a result
- string of:
-
- __pycache__/xxxxxxcpython-32.pyc\0
-
- which is 33 characters long (including the nul), thus overflowing
- the buffer, even though the first test would fail, i.e.: the input
- string is also 6 characters long, so 32 > 32 is false.
-
- The reason the first test fails but we still overflow the buffer is
- that the test above only expects to add one extra character to be
- added to the extension, and here we're adding three (pyc). We
- don't add the first dot, so that reclaims one of expected
- positions, leaving us overflowing by 1 byte (3 extra - 1 reclaimed
- dot - 1 expected extra == 1 overflowed).
-
- The best we can do is ensure that we still have enough room in the
- target buffer before we write the extension. Because it's always
- only the extension that can cause the overflow, and never the other
- path bytes we've written, it's sufficient to just do one more test
- here. Still, the assertion that follows can't hurt.
- */
-#if 0
- printf("strlen(buf): %d; buflen: %d\n", (int)strlen(buf), (int)buflen);
-#endif
- len = Py_UCS4_strlen(buf);
- if (len + 5 > buflen)
+ PyObject *result;
+ Py_ssize_t fname, ext, len, i, pos, taglen;
+ Py_ssize_t pycache_len = sizeof("__pycache__/") - 1;
+ int kind;
+ void *data;
+
+ /* Compute the output string size. */
+ len = PyUnicode_GET_LENGTH(pathstr);
+ /* If there is no separator, this returns -1, so
+ lastsep will be 0. */
+ fname = rightmost_sep_obj(pathstr) + 1;
+ ext = fname - 1;
+ for(i = fname; i < len; i++)
+ if (PyUnicode_READ_CHAR(pathstr, i) == '.')
+ ext = i + 1;
+ if (ext < fname)
+ /* No dot in filename; use entire filename */
+ ext = len;
+
+ /* result = pathstr[:fname] + "__pycache__" + SEP +
+ pathstr[fname:ext] + tag + ".py[co]" */
+ taglen = strlen(pyc_tag);
+ result = PyUnicode_New(ext + pycache_len + taglen + 4,
+ PyUnicode_MAX_CHAR_VALUE(pathstr));
+ if (!result)
return NULL;
- buf[len] = '.'; len++;
- buf[len] = 'p'; len++;
- buf[len] = 'y'; len++;
- buf[len] = debug ? 'c' : 'o'; len++;
- assert(len <= buflen);
- return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buf, len);
+ kind = PyUnicode_KIND(result);
+ data = PyUnicode_DATA(result);
+ PyUnicode_CopyCharacters(result, 0, pathstr, 0, fname);
+ pos = fname;
+ for (i = 0; i < pycache_len - 1; i++)
+ PyUnicode_WRITE(kind, data, pos++, "__pycache__"[i]);
+ PyUnicode_WRITE(kind, data, pos++, SEP);
+ PyUnicode_CopyCharacters(result, pos, pathstr,
+ fname, ext - fname);
+ pos += ext - fname;
+ for (i = 0; pyc_tag[i]; i++)
+ PyUnicode_WRITE(kind, data, pos++, pyc_tag[i]);
+ PyUnicode_WRITE(kind, data, pos++, '.');
+ PyUnicode_WRITE(kind, data, pos++, 'p');
+ PyUnicode_WRITE(kind, data, pos++, 'y');
+ PyUnicode_WRITE(kind, data, pos++, debug ? 'c' : 'o');
+ return result;
}