summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Wouters <thomas@python.org>2007-02-23 19:56:57 (GMT)
committerThomas Wouters <thomas@python.org>2007-02-23 19:56:57 (GMT)
commit00e41defe8801ef37548fb60abacb3be13156d2a (patch)
tree863d072e568fee2b8f4959016b5954de457c7f4c
parentcf297e46b85257396560774e5492e9d71a40f32e (diff)
downloadcpython-00e41defe8801ef37548fb60abacb3be13156d2a.zip
cpython-00e41defe8801ef37548fb60abacb3be13156d2a.tar.gz
cpython-00e41defe8801ef37548fb60abacb3be13156d2a.tar.bz2
Bytes literal.
-rw-r--r--Include/Python-ast.h12
-rw-r--r--Include/opcode.h2
-rw-r--r--Lib/compiler/ast.py14
-rw-r--r--Lib/compiler/pyassem.py1
-rw-r--r--Lib/compiler/pycodegen.py4
-rw-r--r--Lib/compiler/transformer.py6
-rw-r--r--Lib/opcode.py1
-rw-r--r--Lib/test/test_bytes.py14
-rw-r--r--Lib/test/test_compiler.py24
-rw-r--r--Parser/Python.asdl1
-rw-r--r--Parser/tokenizer.c8
-rw-r--r--Python/Python-ast.c41
-rw-r--r--Python/ast.c50
-rw-r--r--Python/ceval.c13
-rw-r--r--Python/compile.c7
15 files changed, 179 insertions, 19 deletions
diff --git a/Include/Python-ast.h b/Include/Python-ast.h
index c2fabfb..66d7b52 100644
--- a/Include/Python-ast.h
+++ b/Include/Python-ast.h
@@ -176,9 +176,9 @@ struct _stmt {
enum _expr_kind {BoolOp_kind=1, BinOp_kind=2, UnaryOp_kind=3, Lambda_kind=4,
IfExp_kind=5, Dict_kind=6, Set_kind=7, ListComp_kind=8,
GeneratorExp_kind=9, Yield_kind=10, Compare_kind=11,
- Call_kind=12, Num_kind=13, Str_kind=14, Ellipsis_kind=15,
- Attribute_kind=16, Subscript_kind=17, Name_kind=18,
- List_kind=19, Tuple_kind=20};
+ Call_kind=12, Num_kind=13, Str_kind=14, Bytes_kind=15,
+ Ellipsis_kind=16, Attribute_kind=17, Subscript_kind=18,
+ Name_kind=19, List_kind=20, Tuple_kind=21};
struct _expr {
enum _expr_kind kind;
union {
@@ -255,6 +255,10 @@ struct _expr {
} Str;
struct {
+ string s;
+ } Bytes;
+
+ struct {
expr_ty value;
identifier attr;
expr_context_ty ctx;
@@ -465,6 +469,8 @@ expr_ty _Py_Call(expr_ty func, asdl_seq * args, asdl_seq * keywords, expr_ty
expr_ty _Py_Num(object n, int lineno, int col_offset, PyArena *arena);
#define Str(a0, a1, a2, a3) _Py_Str(a0, a1, a2, a3)
expr_ty _Py_Str(string s, int lineno, int col_offset, PyArena *arena);
+#define Bytes(a0, a1, a2, a3) _Py_Bytes(a0, a1, a2, a3)
+expr_ty _Py_Bytes(string s, int lineno, int col_offset, PyArena *arena);
#define Ellipsis(a0, a1, a2) _Py_Ellipsis(a0, a1, a2)
expr_ty _Py_Ellipsis(int lineno, int col_offset, PyArena *arena);
#define Attribute(a0, a1, a2, a3, a4, a5) _Py_Attribute(a0, a1, a2, a3, a4, a5)
diff --git a/Include/opcode.h b/Include/opcode.h
index 007816d..316ba4f 100644
--- a/Include/opcode.h
+++ b/Include/opcode.h
@@ -72,7 +72,7 @@ extern "C" {
#define LOAD_LOCALS 82
#define RETURN_VALUE 83
#define IMPORT_STAR 84
-
+#define MAKE_BYTES 85
#define YIELD_VALUE 86
#define POP_BLOCK 87
#define END_FINALLY 88
diff --git a/Lib/compiler/ast.py b/Lib/compiler/ast.py
index bc283c0..4794d66 100644
--- a/Lib/compiler/ast.py
+++ b/Lib/compiler/ast.py
@@ -267,6 +267,20 @@ class Break(Node):
def __repr__(self):
return "Break()"
+class Bytes(Node):
+ def __init__(self, value, lineno=None):
+ self.value = value
+ self.lineno = lineno
+
+ def getChildren(self):
+ return self.value,
+
+ def getChildNodes(self):
+ return ()
+
+ def __repr__(self):
+ return "Bytes(%s)" % (repr(self.value),)
+
class CallFunc(Node):
def __init__(self, node, args, star_args = None, dstar_args = None, lineno=None):
self.node = node
diff --git a/Lib/compiler/pyassem.py b/Lib/compiler/pyassem.py
index cac899d..f665c54 100644
--- a/Lib/compiler/pyassem.py
+++ b/Lib/compiler/pyassem.py
@@ -792,6 +792,7 @@ class StackDepthTracker:
'DELETE_ATTR': -1,
'STORE_GLOBAL': -1,
'BUILD_MAP': 1,
+ 'MAKE_BYTES': 0,
'COMPARE_OP': -1,
'STORE_FAST': -1,
'IMPORT_STAR': -1,
diff --git a/Lib/compiler/pycodegen.py b/Lib/compiler/pycodegen.py
index 8db4e0d..83fbc17 100644
--- a/Lib/compiler/pycodegen.py
+++ b/Lib/compiler/pycodegen.py
@@ -930,6 +930,10 @@ class CodeGenerator:
def visitConst(self, node):
self.emit('LOAD_CONST', node.value)
+
+ def visitBytes(self, node):
+ self.emit('LOAD_CONST', node.value)
+ self.emit('MAKE_BYTES')
def visitKeyword(self, node):
self.emit('LOAD_CONST', node.name)
diff --git a/Lib/compiler/transformer.py b/Lib/compiler/transformer.py
index 5f2face..79b702c 100644
--- a/Lib/compiler/transformer.py
+++ b/Lib/compiler/transformer.py
@@ -745,9 +745,11 @@ class Transformer:
return eval(lit)
def atom_string(self, nodelist):
- k = ''
- for node in nodelist:
+ k = self.decode_literal(nodelist[0][1])
+ for node in nodelist[1:]:
k += self.decode_literal(node[1])
+ if isinstance(k, bytes):
+ return Bytes(str(k), lineno=nodelist[0][2])
return Const(k, lineno=nodelist[0][2])
def atom_ellipsis(self, nodelist):
diff --git a/Lib/opcode.py b/Lib/opcode.py
index 1e15582..69982f2 100644
--- a/Lib/opcode.py
+++ b/Lib/opcode.py
@@ -111,6 +111,7 @@ def_op('WITH_CLEANUP', 81)
def_op('LOAD_LOCALS', 82)
def_op('RETURN_VALUE', 83)
def_op('IMPORT_STAR', 84)
+def_op('MAKE_BYTES', 85)
def_op('YIELD_VALUE', 86)
def_op('POP_BLOCK', 87)
def_op('END_FINALLY', 88)
diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py
index 997122b..4dee01b 100644
--- a/Lib/test/test_bytes.py
+++ b/Lib/test/test_bytes.py
@@ -403,7 +403,19 @@ class BytesTest(unittest.TestCase):
self.assertEqual(bytes.join(tuple(lst)), bytes("abc"))
self.assertEqual(bytes.join(iter(lst)), bytes("abc"))
# XXX more...
-
+
+ def test_literal(self):
+ tests = [
+ (b"Wonderful spam", u"Wonderful spam"),
+ (br"Wonderful spam too", u"Wonderful spam too"),
+ (b"\xaa\x00\000\200", u"\xaa\x00\000\200"),
+ (br"\xaa\x00\000\200", ur"\xaa\x00\000\200"),
+ ]
+ for b, s in tests:
+ self.assertEqual(b, bytes(s, 'latin-1'))
+ for c in range(128, 256):
+ self.assertRaises(SyntaxError, eval,
+ 'b"%s"' % chr(c))
# Optimizations:
# __iter__? (optimization)
diff --git a/Lib/test/test_compiler.py b/Lib/test/test_compiler.py
index ab9a660..bbd7511 100644
--- a/Lib/test/test_compiler.py
+++ b/Lib/test/test_compiler.py
@@ -187,6 +187,30 @@ class CompilerTest(unittest.TestCase):
exec(c, dct)
self.assertEquals(dct.get('result'), 1)
+ def testBytesLiteral(self):
+ c = compiler.compile("b'foo'", '<string>', 'eval')
+ b = eval(c)
+
+ c = compiler.compile('def f(b=b"foo"):\n'
+ ' b[0] += 1\n'
+ ' return b\n'
+ 'f(); f(); result = f()\n',
+ '<string>',
+ 'exec')
+ dct = {}
+ exec(c, dct)
+ self.assertEquals(dct.get('result'), b"ioo")
+
+ c = compiler.compile('def f():\n'
+ ' b = b"foo"\n'
+ ' b[0] += 1\n'
+ ' return b\n'
+ 'f(); f(); result = f()\n',
+ '<string>',
+ 'exec')
+ dct = {}
+ exec(c, dct)
+ self.assertEquals(dct.get('result'), b"goo")
NOLINENO = (compiler.ast.Module, compiler.ast.Stmt, compiler.ast.Discard)
diff --git a/Parser/Python.asdl b/Parser/Python.asdl
index ea11349..fd47aa0 100644
--- a/Parser/Python.asdl
+++ b/Parser/Python.asdl
@@ -60,6 +60,7 @@ module Python version "$Revision$"
expr? starargs, expr? kwargs)
| Num(object n) -- a number as a PyObject.
| Str(string s) -- need to specify raw, unicode, etc?
+ | Bytes(string s)
| Ellipsis
-- other literals? bools?
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 84b7232..84bd60e 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1244,6 +1244,14 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
if (c == '"' || c == '\'')
goto letter_quote;
break;
+ case 'b':
+ case 'B':
+ c = tok_nextc(tok);
+ if (c == 'r' || c == 'R')
+ c = tok_nextc(tok);
+ if (c == '"' || c == '\'')
+ goto letter_quote;
+ break;
}
while (isalnum(c) || c == '_') {
c = tok_nextc(tok);
diff --git a/Python/Python-ast.c b/Python/Python-ast.c
index ae3a396..390ba15 100644
--- a/Python/Python-ast.c
+++ b/Python/Python-ast.c
@@ -2,7 +2,7 @@
/*
- __version__ 53731.
+ __version__ 53866.
This module must be committed separately after each AST grammar change;
The __version__ number is set to the revision number of the commit
@@ -216,6 +216,10 @@ static PyTypeObject *Str_type;
static char *Str_fields[]={
"s",
};
+static PyTypeObject *Bytes_type;
+static char *Bytes_fields[]={
+ "s",
+};
static PyTypeObject *Ellipsis_type;
static PyTypeObject *Attribute_type;
static char *Attribute_fields[]={
@@ -547,6 +551,8 @@ static int init_types(void)
if (!Num_type) return 0;
Str_type = make_type("Str", expr_type, Str_fields, 1);
if (!Str_type) return 0;
+ Bytes_type = make_type("Bytes", expr_type, Bytes_fields, 1);
+ if (!Bytes_type) return 0;
Ellipsis_type = make_type("Ellipsis", expr_type, NULL, 0);
if (!Ellipsis_type) return 0;
Attribute_type = make_type("Attribute", expr_type, Attribute_fields, 3);
@@ -1587,6 +1593,27 @@ Str(string s, int lineno, int col_offset, PyArena *arena)
}
expr_ty
+Bytes(string s, int lineno, int col_offset, PyArena *arena)
+{
+ expr_ty p;
+ if (!s) {
+ PyErr_SetString(PyExc_ValueError,
+ "field s is required for Bytes");
+ return NULL;
+ }
+ p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
+ if (!p) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+ p->kind = Bytes_kind;
+ p->v.Bytes.s = s;
+ p->lineno = lineno;
+ p->col_offset = col_offset;
+ return p;
+}
+
+expr_ty
Ellipsis(int lineno, int col_offset, PyArena *arena)
{
expr_ty p;
@@ -2550,6 +2577,15 @@ ast2obj_expr(void* _o)
goto failed;
Py_DECREF(value);
break;
+ case Bytes_kind:
+ result = PyType_GenericNew(Bytes_type, NULL, NULL);
+ if (!result) goto failed;
+ value = ast2obj_string(o->v.Bytes.s);
+ if (!value) goto failed;
+ if (PyObject_SetAttrString(result, "s", value) == -1)
+ goto failed;
+ Py_DECREF(value);
+ break;
case Ellipsis_kind:
result = PyType_GenericNew(Ellipsis_type, NULL, NULL);
if (!result) goto failed;
@@ -3089,7 +3125,7 @@ init_ast(void)
if (PyDict_SetItemString(d, "AST", (PyObject*)AST_type) < 0) return;
if (PyModule_AddIntConstant(m, "PyCF_ONLY_AST", PyCF_ONLY_AST) < 0)
return;
- if (PyModule_AddStringConstant(m, "__version__", "53731") < 0)
+ if (PyModule_AddStringConstant(m, "__version__", "53866") < 0)
return;
if (PyDict_SetItemString(d, "mod", (PyObject*)mod_type) < 0) return;
if (PyDict_SetItemString(d, "Module", (PyObject*)Module_type) < 0)
@@ -3155,6 +3191,7 @@ init_ast(void)
if (PyDict_SetItemString(d, "Call", (PyObject*)Call_type) < 0) return;
if (PyDict_SetItemString(d, "Num", (PyObject*)Num_type) < 0) return;
if (PyDict_SetItemString(d, "Str", (PyObject*)Str_type) < 0) return;
+ if (PyDict_SetItemString(d, "Bytes", (PyObject*)Bytes_type) < 0) return;
if (PyDict_SetItemString(d, "Ellipsis", (PyObject*)Ellipsis_type) < 0)
return;
if (PyDict_SetItemString(d, "Attribute", (PyObject*)Attribute_type) <
diff --git a/Python/ast.c b/Python/ast.c
index a7d5713..9d5caf8 100644
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -33,8 +33,9 @@ static expr_ty ast_for_testlist_gexp(struct compiling *, const node *);
static expr_ty ast_for_call(struct compiling *, const node *, expr_ty);
static PyObject *parsenumber(const char *);
-static PyObject *parsestr(const char *s, const char *encoding);
-static PyObject *parsestrplus(struct compiling *, const node *n);
+static PyObject *parsestr(const node *n, const char *encoding, int *bytesmode);
+static PyObject *parsestrplus(struct compiling *, const node *n,
+ int *bytesmode);
#ifndef LINENO
#define LINENO(n) ((n)->n_lineno)
@@ -1383,6 +1384,7 @@ ast_for_atom(struct compiling *c, const node *n)
| '{' [dictsetmaker] '}' | NAME | NUMBER | STRING+
*/
node *ch = CHILD(n, 0);
+ int bytesmode = 0;
switch (TYPE(ch)) {
case NAME:
@@ -1390,12 +1392,15 @@ ast_for_atom(struct compiling *c, const node *n)
changed. */
return Name(NEW_IDENTIFIER(ch), Load, LINENO(n), n->n_col_offset, c->c_arena);
case STRING: {
- PyObject *str = parsestrplus(c, n);
+ PyObject *str = parsestrplus(c, n, &bytesmode);
if (!str)
return NULL;
PyArena_AddPyObject(c->c_arena, str);
- return Str(str, LINENO(n), n->n_col_offset, c->c_arena);
+ if (bytesmode)
+ return Bytes(str, LINENO(n), n->n_col_offset, c->c_arena);
+ else
+ return Str(str, LINENO(n), n->n_col_offset, c->c_arena);
}
case NUMBER: {
PyObject *pynum = parsenumber(STR(ch));
@@ -3254,9 +3259,10 @@ decode_unicode(const char *s, size_t len, int rawmode, const char *encoding)
* parsestr parses it, and returns the decoded Python string object.
*/
static PyObject *
-parsestr(const char *s, const char *encoding)
+parsestr(const node *n, const char *encoding, int *bytesmode)
{
size_t len;
+ const char *s = STR(n);
int quote = Py_CHARMASK(*s);
int rawmode = 0;
int need_encoding;
@@ -3267,6 +3273,10 @@ parsestr(const char *s, const char *encoding)
quote = *++s;
unicode = 1;
}
+ if (quote == 'b' || quote == 'B') {
+ quote = *++s;
+ *bytesmode = 1;
+ }
if (quote == 'r' || quote == 'R') {
quote = *++s;
rawmode = 1;
@@ -3276,6 +3286,10 @@ parsestr(const char *s, const char *encoding)
PyErr_BadInternalCall();
return NULL;
}
+ if (unicode && *bytesmode) {
+ ast_error(n, "string cannot be both bytes and unicode");
+ return NULL;
+ }
s++;
len = strlen(s);
if (len > INT_MAX) {
@@ -3300,7 +3314,18 @@ parsestr(const char *s, const char *encoding)
return decode_unicode(s, len, rawmode, encoding);
}
#endif
- need_encoding = (encoding != NULL &&
+ if (*bytesmode) {
+ /* Disallow non-ascii characters (but not escapes) */
+ const char *c;
+ for (c = s; *c; c++) {
+ if (Py_CHARMASK(*c) >= 0x80) {
+ ast_error(n, "bytes can only contain ASCII "
+ "literal characters.");
+ return NULL;
+ }
+ }
+ }
+ need_encoding = (!*bytesmode && encoding != NULL &&
strcmp(encoding, "utf-8") != 0 &&
strcmp(encoding, "iso-8859-1") != 0);
if (rawmode || strchr(s, '\\') == NULL) {
@@ -3332,18 +3357,25 @@ parsestr(const char *s, const char *encoding)
* pasting the intermediate results together.
*/
static PyObject *
-parsestrplus(struct compiling *c, const node *n)
+parsestrplus(struct compiling *c, const node *n, int *bytesmode)
{
PyObject *v;
int i;
REQ(CHILD(n, 0), STRING);
- if ((v = parsestr(STR(CHILD(n, 0)), c->c_encoding)) != NULL) {
+ v = parsestr(CHILD(n, 0), c->c_encoding, bytesmode);
+ if (v != NULL) {
/* String literal concatenation */
for (i = 1; i < NCH(n); i++) {
PyObject *s;
- s = parsestr(STR(CHILD(n, i)), c->c_encoding);
+ int subbm = 0;
+ s = parsestr(CHILD(n, i), c->c_encoding, &subbm);
if (s == NULL)
goto onError;
+ if (*bytesmode != subbm) {
+ ast_error(n, "cannot mix bytes and nonbytes"
+ "literals");
+ goto onError;
+ }
if (PyString_Check(v) && PyString_Check(s)) {
PyString_ConcatAndDel(&v, s);
if (v == NULL)
diff --git a/Python/ceval.c b/Python/ceval.c
index 0194687..5ceb743 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -1885,6 +1885,19 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
PUSH(x);
if (x != NULL) continue;
break;
+
+ case MAKE_BYTES:
+ w = POP();
+ if (PyString_Check(w))
+ x = PyBytes_FromStringAndSize(
+ PyString_AS_STRING(w),
+ PyString_GET_SIZE(w));
+ else
+ x = NULL;
+ Py_DECREF(w);
+ PUSH(x);
+ if (x != NULL) continue;
+ break;
case LOAD_ATTR:
w = GETITEM(names, oparg);
diff --git a/Python/compile.c b/Python/compile.c
index 927569a..9655765 100644
--- a/Python/compile.c
+++ b/Python/compile.c
@@ -789,6 +789,8 @@ opcode_stack_effect(int opcode, int oparg)
return 1-oparg;
case BUILD_MAP:
return 1;
+ case MAKE_BYTES:
+ return 0;
case LOAD_ATTR:
return 0;
case COMPARE_OP:
@@ -3077,6 +3079,10 @@ compiler_visit_expr(struct compiler *c, expr_ty e)
case Str_kind:
ADDOP_O(c, LOAD_CONST, e->v.Str.s, consts);
break;
+ case Bytes_kind:
+ ADDOP_O(c, LOAD_CONST, e->v.Bytes.s, consts);
+ ADDOP(c, MAKE_BYTES);
+ break;
case Ellipsis_kind:
ADDOP_O(c, LOAD_CONST, Py_Ellipsis, consts);
break;
@@ -3426,7 +3432,6 @@ compiler_visit_slice(struct compiler *c, slice_ty s, expr_context_ty ctx)
return compiler_handle_subscr(c, kindname, ctx);
}
-
/* End of the compiler section, beginning of the assembler section */
/* do depth-first search of basic block graph, starting with block.