summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Include/stringobject.h4
-rw-r--r--Lib/encodings/string_escape.py23
-rw-r--r--Lib/pickle.py13
-rw-r--r--Lib/test/pickletester.py6
-rw-r--r--Modules/_codecsmodule.c46
-rw-r--r--Modules/cPickle.c51
-rw-r--r--Objects/stringobject.c160
-rw-r--r--Python/compile.c103
8 files changed, 267 insertions, 139 deletions
diff --git a/Include/stringobject.h b/Include/stringobject.h
index abc8fad..fd0f49a 100644
--- a/Include/stringobject.h
+++ b/Include/stringobject.h
@@ -53,6 +53,7 @@ PyAPI_FUNC(PyObject *) PyString_FromFormat(const char*, ...)
__attribute__((format(printf, 1, 2)));
PyAPI_FUNC(int) PyString_Size(PyObject *);
PyAPI_FUNC(char *) PyString_AsString(PyObject *);
+PyAPI_FUNC(PyObject *) PyString_Repr(PyObject *, int);
PyAPI_FUNC(void) PyString_Concat(PyObject **, PyObject *);
PyAPI_FUNC(void) PyString_ConcatAndDel(PyObject **, PyObject *);
PyAPI_FUNC(int) _PyString_Resize(PyObject **, int);
@@ -60,6 +61,9 @@ PyAPI_FUNC(int) _PyString_Eq(PyObject *, PyObject*);
PyAPI_FUNC(PyObject *) PyString_Format(PyObject *, PyObject *);
PyAPI_FUNC(PyObject *) _PyString_FormatLong(PyObject*, int, int,
int, char**, int*);
+extern DL_IMPORT(PyObject *) PyString_DecodeEscape(const char *, int,
+ const char *, int,
+ const char *);
PyAPI_FUNC(void) PyString_InternInPlace(PyObject **);
PyAPI_FUNC(PyObject *) PyString_InternFromString(const char *);
diff --git a/Lib/encodings/string_escape.py b/Lib/encodings/string_escape.py
new file mode 100644
index 0000000..0e9a17f
--- /dev/null
+++ b/Lib/encodings/string_escape.py
@@ -0,0 +1,23 @@
+# -*- coding: iso-8859-1 -*-
+""" Python 'escape' Codec
+
+
+Written by Martin v. Löwis (martin@v.loewis.de).
+
+"""
+import codecs
+
+class Codec(codecs.Codec):
+
+ encode = codecs.escape_encode
+ decode = codecs.escape_decode
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+def getregentry():
+
+ return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
diff --git a/Lib/pickle.py b/Lib/pickle.py
index a507595..4bc54ec 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -126,6 +126,8 @@ FALSE = 'I00\n'
__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
del x
+_quotes = ["'", '"']
+
class Pickler:
def __init__(self, file, bin = 0):
@@ -740,10 +742,15 @@ class Unpickler:
def load_string(self):
rep = self.readline()[:-1]
- if not self._is_string_secure(rep):
+ for q in _quotes:
+ if rep.startswith(q):
+ if not rep.endswith(q):
+ raise ValueError, "insecure string pickle"
+ rep = rep[len(q):-len(q)]
+ break
+ else:
raise ValueError, "insecure string pickle"
- self.append(eval(rep,
- {'__builtins__': {}})) # Let's be careful
+ self.append(rep.decode("string-escape"))
dispatch[STRING] = load_string
def _is_string_secure(self, s):
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index eb97a9c..3dc7901 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -195,13 +195,13 @@ class AbstractPickleTests(unittest.TestCase):
def test_insecure_strings(self):
insecure = ["abc", "2 + 2", # not quoted
- "'abc' + 'def'", # not a single quoted string
+ #"'abc' + 'def'", # not a single quoted string
"'abc", # quote is not closed
"'abc\"", # open quote and close quote don't match
"'abc' ?", # junk after close quote
# some tests of the quoting rules
- "'abc\"\''",
- "'\\\\a\'\'\'\\\'\\\\\''",
+ #"'abc\"\''",
+ #"'\\\\a\'\'\'\\\'\\\\\''",
]
for s in insecure:
buf = "S" + s + "\012p0\012."
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
index d663293..1e3fc5d 100644
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -71,7 +71,6 @@ PyObject *codeclookup(PyObject *self, PyObject *args)
return NULL;
}
-#ifdef Py_USING_UNICODE
/* --- Helpers ------------------------------------------------------------ */
static
@@ -97,6 +96,49 @@ PyObject *codec_tuple(PyObject *unicode,
return v;
}
+/* --- String codecs ------------------------------------------------------ */
+static PyObject *
+escape_decode(PyObject *self,
+ PyObject *args)
+{
+ const char *errors = NULL;
+ const char *data;
+ int size;
+
+ if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
+ &data, &size, &errors))
+ return NULL;
+ return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
+ size);
+}
+
+static PyObject *
+escape_encode(PyObject *self,
+ PyObject *args)
+{
+ PyObject *str;
+ const char *errors = NULL;
+ char *buf;
+ int len;
+
+ if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
+ &PyString_Type, &str, &errors))
+ return NULL;
+
+ str = PyString_Repr(str, 0);
+ if (!str)
+ return NULL;
+
+ /* The string will be quoted. Unquote, similar to unicode-escape. */
+ buf = PyString_AS_STRING (str);
+ len = PyString_GET_SIZE (str);
+ memmove(buf, buf+1, len-2);
+ _PyString_Resize(&str, len-2);
+
+ return codec_tuple(str, PyString_Size(str));
+}
+
+#ifdef Py_USING_UNICODE
/* --- Decoder ------------------------------------------------------------ */
static PyObject *
@@ -669,6 +711,8 @@ mbcs_encode(PyObject *self,
static PyMethodDef _codecs_functions[] = {
{"register", codecregister, METH_VARARGS},
{"lookup", codeclookup, METH_VARARGS},
+ {"escape_encode", escape_encode, METH_VARARGS},
+ {"escape_decode", escape_decode, METH_VARARGS},
#ifdef Py_USING_UNICODE
{"utf_8_encode", utf_8_encode, METH_VARARGS},
{"utf_8_decode", utf_8_decode, METH_VARARGS},
diff --git a/Modules/cPickle.c b/Modules/cPickle.c
index d1f7867..14936a6 100644
--- a/Modules/cPickle.c
+++ b/Modules/cPickle.c
@@ -2864,46 +2864,35 @@ static int
load_string(Unpicklerobject *self)
{
PyObject *str = 0;
- int len, res = -1, nslash;
- char *s, q, *p;
-
- static PyObject *eval_dict = 0;
+ int len, res = -1;
+ char *s, *p;
if ((len = (*self->readline_func)(self, &s)) < 0) return -1;
if (len < 2) return bad_readline();
if (!( s=pystrndup(s,len))) return -1;
- /* Check for unquoted quotes (evil strings) */
- q=*s;
- if (q != '"' && q != '\'') goto insecure;
- for (p=s+1, nslash=0; *p; p++) {
- if (*p==q && nslash%2==0) break;
- if (*p=='\\') nslash++;
- else nslash=0;
- }
- if (*p == q) {
- for (p++; *p; p++)
- if (*(unsigned char *)p > ' ')
- goto insecure;
- }
- else
+
+ /* Strip outermost quotes */
+ while (s[len-1] <= ' ')
+ len--;
+ if(s[0]=='"' && s[len-1]=='"'){
+ s[len-1] = '\0';
+ p = s + 1 ;
+ len -= 2;
+ } else if(s[0]=='\'' && s[len-1]=='\''){
+ s[len-1] = '\0';
+ p = s + 1 ;
+ len -= 2;
+ } else
goto insecure;
/********************************************/
- if (!( eval_dict ))
- if (!( eval_dict = Py_BuildValue("{s{}}", "__builtins__")))
- goto finally;
-
- if (!( str = PyRun_String(s, Py_eval_input, eval_dict, eval_dict)))
- goto finally;
-
- free(s);
- PDATA_PUSH(self->stack, str, -1);
- return 0;
-
- finally:
+ str = PyString_DecodeEscape(p, len, NULL, 0, NULL);
+ if (str) {
+ PDATA_PUSH(self->stack, str, -1);
+ res = 0;
+ }
free(s);
-
return res;
insecure:
diff --git a/Objects/stringobject.c b/Objects/stringobject.c
index 1bbd201..19c2834 100644
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -489,6 +489,152 @@ string_dealloc(PyObject *op)
op->ob_type->tp_free(op);
}
+/* Unescape a backslash-escaped string. If unicode is non-zero,
+ the string is a u-literal. If recode_encoding is non-zero,
+ the string is UTF-8 encoded and should be re-encoded in the
+ specified encoding. */
+
+PyObject *PyString_DecodeEscape(const char *s,
+ int len,
+ const char *errors,
+ int unicode,
+ const char *recode_encoding)
+{
+ int c;
+ char *p, *buf;
+ const char *end;
+ PyObject *v;
+ v = PyString_FromStringAndSize((char *)NULL,
+ recode_encoding ? 4*len:len);
+ if (v == NULL)
+ return NULL;
+ p = buf = PyString_AsString(v);
+ end = s + len;
+ while (s < end) {
+ if (*s != '\\') {
+#ifdef Py_USING_UNICODE
+ if (recode_encoding && (*s & 0x80)) {
+ PyObject *u, *w;
+ char *r;
+ const char* t;
+ int rn;
+ t = s;
+ /* Decode non-ASCII bytes as UTF-8. */
+ while (t < end && (*t & 0x80)) t++;
+ u = PyUnicode_DecodeUTF8(s, t - s, errors);
+ if(!u) goto failed;
+
+ /* Recode them in target encoding. */
+ w = PyUnicode_AsEncodedString(
+ u, recode_encoding, errors);
+ Py_DECREF(u);
+ if (!w) goto failed;
+
+ /* Append bytes to output buffer. */
+ r = PyString_AsString(w);
+ rn = PyString_Size(w);
+ memcpy(p, r, rn);
+ p += rn;
+ Py_DECREF(w);
+ s = t;
+ } else {
+ *p++ = *s++;
+ }
+#else
+ *p++ = *s++;
+#endif
+ continue;
+ }
+ s++;
+ switch (*s++) {
+ /* XXX This assumes ASCII! */
+ case '\n': break;
+ case '\\': *p++ = '\\'; break;
+ case '\'': *p++ = '\''; break;
+ case '\"': *p++ = '\"'; break;
+ case 'b': *p++ = '\b'; break;
+ case 'f': *p++ = '\014'; break; /* FF */
+ case 't': *p++ = '\t'; break;
+ case 'n': *p++ = '\n'; break;
+ case 'r': *p++ = '\r'; break;
+ case 'v': *p++ = '\013'; break; /* VT */
+ case 'a': *p++ = '\007'; break; /* BEL, not classic C */
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7':
+ c = s[-1] - '0';
+ if ('0' <= *s && *s <= '7') {
+ c = (c<<3) + *s++ - '0';
+ if ('0' <= *s && *s <= '7')
+ c = (c<<3) + *s++ - '0';
+ }
+ *p++ = c;
+ break;
+ case 'x':
+ if (isxdigit(Py_CHARMASK(s[0]))
+ && isxdigit(Py_CHARMASK(s[1]))) {
+ unsigned int x = 0;
+ c = Py_CHARMASK(*s);
+ s++;
+ if (isdigit(c))
+ x = c - '0';
+ else if (islower(c))
+ x = 10 + c - 'a';
+ else
+ x = 10 + c - 'A';
+ x = x << 4;
+ c = Py_CHARMASK(*s);
+ s++;
+ if (isdigit(c))
+ x += c - '0';
+ else if (islower(c))
+ x += 10 + c - 'a';
+ else
+ x += 10 + c - 'A';
+ *p++ = x;
+ break;
+ }
+ if (!errors || strcmp(errors, "strict") == 0) {
+ Py_DECREF(v);
+ PyErr_SetString(PyExc_ValueError,
+ "invalid \\x escape");
+ return NULL;
+ }
+ if (strcmp(errors, "replace") == 0) {
+ *p++ = '?';
+ } else if (strcmp(errors, "ignore") == 0)
+ /* do nothing */;
+ else {
+ PyErr_Format(PyExc_ValueError,
+ "decoding error; "
+ "unknown error handling code: %.400s",
+ errors);
+ return NULL;
+ }
+#ifndef Py_USING_UNICODE
+ case 'u':
+ case 'U':
+ case 'N':
+ if (unicode) {
+ Py_DECREF(v);
+ com_error(com, PyExc_ValueError,
+ "Unicode escapes not legal "
+ "when Unicode disabled");
+ return NULL;
+ }
+#endif
+ default:
+ *p++ = '\\';
+ *p++ = s[-1];
+ break;
+ }
+ }
+ _PyString_Resize(&v, (int)(p - buf));
+ return v;
+ failed:
+ Py_DECREF(v);
+ return NULL;
+}
+
static int
string_getsize(register PyObject *op)
{
@@ -614,9 +760,10 @@ string_print(PyStringObject *op, FILE *fp, int flags)
return 0;
}
-static PyObject *
-string_repr(register PyStringObject *op)
+PyObject *
+PyString_Repr(PyObject *obj, int smartquotes)
{
+ register PyStringObject* op = (PyStringObject*) obj;
size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
PyObject *v;
if (newsize > INT_MAX) {
@@ -635,7 +782,8 @@ string_repr(register PyStringObject *op)
/* figure out which quote to use; single is preferred */
quote = '\'';
- if (memchr(op->ob_sval, '\'', op->ob_size) &&
+ if (smartquotes &&
+ memchr(op->ob_sval, '\'', op->ob_size) &&
!memchr(op->ob_sval, '"', op->ob_size))
quote = '"';
@@ -674,6 +822,12 @@ string_repr(register PyStringObject *op)
}
static PyObject *
+string_repr(PyObject *op)
+{
+ return PyString_Repr(op, 1);
+}
+
+static PyObject *
string_str(PyObject *s)
{
assert(PyString_Check(s));
diff --git a/Python/compile.c b/Python/compile.c
index b160f73..d1655e9 100644
--- a/Python/compile.c
+++ b/Python/compile.c
@@ -1226,9 +1226,7 @@ parsestr(struct compiling *com, char *s)
char *buf;
char *p;
char *end;
- int c;
- int first = *s;
- int quote = first;
+ int quote = *s;
int rawmode = 0;
char* encoding = ((com == NULL) ? NULL : com->c_encoding);
int need_encoding;
@@ -1347,102 +1345,11 @@ parsestr(struct compiling *com, char *s)
return PyString_FromStringAndSize(s, len);
}
}
- v = PyString_FromStringAndSize((char *)NULL, /* XXX 4 is enough? */
- need_encoding ? len * 4 : len);
+
+ v = PyString_DecodeEscape(s, len, NULL, unicode,
+ need_encoding ? encoding : NULL);
if (v == NULL)
- return NULL;
- p = buf = PyString_AsString(v);
- end = s + len;
- while (s < end) {
- if (*s != '\\') {
- ORDINAL:
- if (need_encoding && (*s & 0x80)) {
- char *r;
- int rn;
- PyObject* w = decode_utf8(&s, end, encoding);
- if (w == NULL)
- return NULL;
- r = PyString_AsString(w);
- rn = PyString_Size(w);
- memcpy(p, r, rn);
- p += rn;
- Py_DECREF(w);
- } else {
- *p++ = *s++;
- }
- continue;
- }
- s++;
- switch (*s++) {
- /* XXX This assumes ASCII! */
- case '\n': break;
- case '\\': *p++ = '\\'; break;
- case '\'': *p++ = '\''; break;
- case '\"': *p++ = '\"'; break;
- case 'b': *p++ = '\b'; break;
- case 'f': *p++ = '\014'; break; /* FF */
- case 't': *p++ = '\t'; break;
- case 'n': *p++ = '\n'; break;
- case 'r': *p++ = '\r'; break;
- case 'v': *p++ = '\013'; break; /* VT */
- case 'a': *p++ = '\007'; break; /* BEL, not classic C */
- case '0': case '1': case '2': case '3':
- case '4': case '5': case '6': case '7':
- c = s[-1] - '0';
- if ('0' <= *s && *s <= '7') {
- c = (c<<3) + *s++ - '0';
- if ('0' <= *s && *s <= '7')
- c = (c<<3) + *s++ - '0';
- }
- *p++ = c;
- break;
- case 'x':
- if (isxdigit(Py_CHARMASK(s[0]))
- && isxdigit(Py_CHARMASK(s[1]))) {
- unsigned int x = 0;
- c = Py_CHARMASK(*s);
- s++;
- if (isdigit(c))
- x = c - '0';
- else if (islower(c))
- x = 10 + c - 'a';
- else
- x = 10 + c - 'A';
- x = x << 4;
- c = Py_CHARMASK(*s);
- s++;
- if (isdigit(c))
- x += c - '0';
- else if (islower(c))
- x += 10 + c - 'a';
- else
- x += 10 + c - 'A';
- *p++ = x;
- break;
- }
- Py_DECREF(v);
- com_error(com, PyExc_ValueError,
- "invalid \\x escape");
- return NULL;
-#ifndef Py_USING_UNICODE
- case 'u':
- case 'U':
- case 'N':
- if (unicode) {
- Py_DECREF(v);
- com_error(com, PyExc_ValueError,
- "Unicode escapes not legal "
- "when Unicode disabled");
- return NULL;
- }
-#endif
- default:
- *p++ = '\\';
- s--;
- goto ORDINAL;
- }
- }
- _PyString_Resize(&v, (int)(p - buf));
+ PyErr_SyntaxLocation(com->c_filename, com->c_lineno);
return v;
}