/*
* This file compiles an abstract syntax tree (AST) into Python bytecode.
*
* The primary entry point is PyAST_Compile(), which returns a
* PyCodeObject. The compiler makes several passes to build the code
* object:
* 1. Checks for future statements. See future.c
* 2. Builds a symbol table. See symtable.c.
* 3. Generate code for basic blocks. See compiler_mod() in this file.
* 4. Assemble the basic blocks into final code. See assemble() in
* this file.
* 5. Optimize the byte code (peephole optimizations). See peephole.c
*
* Note that compiler_mod() suggests module, but the module ast type
* (mod_ty) has cases for expressions and interactive statements.
*
* CAUTION: The VISIT_* macros abort the current function when they
* encounter a problem. So don't invoke them when there is memory
* which needs to be released. Code blocks are OK, as the compiler
* structure takes care of releasing those. Use the arena to manage
* objects.
*/
#include "Python.h"
#include "Python-ast.h"
#include "node.h"
#include "ast.h"
#include "code.h"
#include "symtable.h"
#include "opcode.h"
#include "wordcode_helpers.h"
#define DEFAULT_BLOCK_SIZE 16
#define DEFAULT_BLOCKS 8
#define DEFAULT_CODE_SIZE 128
#define DEFAULT_LNOTAB_SIZE 16
#define COMP_GENEXP 0
#define COMP_LISTCOMP 1
#define COMP_SETCOMP 2
#define COMP_DICTCOMP 3
struct instr {
unsigned i_jabs : 1;
unsigned i_jrel : 1;
unsigned char i_opcode;
int i_oparg;
struct basicblock_ *i_target; /* target block (if jump instruction) */
int i_lineno;
};
typedef struct basicblock_ {
/* Each basicblock in a compilation unit is linked via b_list in the
reverse order that the block are allocated. b_list points to the next
block, not to be confused with b_next, which is next by control flow. */
struct basicblock_ *b_list;
/* number of instructions used */
int b_iused;
/* length of instruction array (b_instr) */
int b_ialloc;
/* pointer to an array of instructions, initially NULL */
struct instr *b_instr;
/* If b_next is non-NULL, it is a pointer to the next
block reached by normal control flow. */
struct basicblock_ *b_next;
/* b_seen is used to perform a DFS of basicblocks. */
unsigned b_seen : 1;
/* b_return is true if a RETURN_VALUE opcode is inserted. */
unsigned b_return : 1;
/* depth of stack upon entry of block, computed by stackdepth() */
int b_startdepth;
/* instruction offset for block, computed by assemble_jump_offsets() */
int b_offset;
} basicblock;
/* fblockinfo tracks the current frame block.
A frame block is used to handle loops, try/except, and try/finally.
It's called a frame block to distinguish it from a basic block in the
compiler IR.
*/
enum fblocktype { LOOP, EXCEPT, FINALLY_TRY, FINALLY_END };
struct fblockinfo {
enum fblocktype fb_type;
basicblock *fb_block;
};
enum {
COMPILER_SCOPE_MODULE,
COMPILER_SCOPE_CLASS,
COMPILER_SCOPE_FUNCTION,
COMPILER_SCOPE_ASYNC_FUNCTION,
COMPILER_SCOPE_LAMBDA,
COMPILER_SCOPE_COMPREHENSION,
};
/* The following items change on entry and exit of code blocks.
They must be saved and restored when returning to a block.
*/
struct compiler_unit {
PySTEntryObject *u_ste;
PyObject *u_name;
PyObject *u_qualname; /* dot-separated qualified name (lazy) */
int u_scope_type;
/* The following fields are dicts that map objects to
the index of them in co_XXX. The index is used as
the argument for opcodes that refer to those collections.
*/
PyObject *u_consts; /* all constants */
PyObject *u_names; /* all names */
PyObject *u_varnames; /* local variables */
PyObject *u_cellvars; /* cell variables */
PyObject *u_freevars; /* free variables */
PyObject *u_private; /* for private name mangling */
Py_ssize_t u_argcount; /* number of arguments for block */
Py_ssize_t u_kwonlyargcount; /* number of keyword only arguments for block */
/* Pointer to the most recently allocated block. By following b_list
members, you can reach all early allocated blocks. */
basicblock *u_blocks;
basicblock *u_curblock; /* pointer to current block */
int u_nfblocks;
struct fblockinfo u_fblock[CO_MAXBLOCKS];
r & (DEF_LOCAL | USE)) {
char buf[256];
if (cur & DEF_LOCAL)
PyOS_snprintf(buf, sizeof(buf),
GLOBAL_AFTER_ASSIGN,
c_name);
else
PyOS_snprintf(buf, sizeof(buf),
GLOBAL_AFTER_USE,
c_name);
if (!symtable_warn(st, buf, s->lineno))
return 0;
}
if (!symtable_add_def(st, name, DEF_GLOBAL))
return 0;
}
break;
}
case Nonlocal_kind: {
int i;
asdl_seq *seq = s->v.Nonlocal.names;
for (i = 0; i < asdl_seq_LEN(seq); i++) {
identifier name = (identifier)asdl_seq_GET(seq, i);
char *c_name = _PyUnicode_AsString(name);
long cur = symtable_lookup(st, name);
if (cur < 0)
return 0;
if (cur & (DEF_LOCAL | USE)) {
char buf[256];
if (cur & DEF_LOCAL)
PyOS_snprintf(buf, sizeof(buf),
NONLOCAL_AFTER_ASSIGN,
c_name);
else
PyOS_snprintf(buf, sizeof(buf),
NONLOCAL_AFTER_USE,
c_name);
if (!symtable_warn(st, buf, s->lineno))
return 0;
}
if (!symtable_add_def(st, name, DEF_NONLOCAL))
return 0;
}
break;
}
case Expr_kind:
VISIT(st, expr, s->v.Expr.value);
break;
case Pass_kind:
case Break_kind:
case Continue_kind:
/* nothing to do here */
break;
case With_kind:
VISIT(st, expr, s->v.With.context_expr);
if (s->v.With.optional_vars) {
VISIT(st, expr, s->v.With.optional_vars);
}
VISIT_SEQ(st, stmt, s->v.With.body);
break;
}
return 1;
}
static int
symtable_visit_expr(struct symtable *st, expr_ty e)
{
switch (e->kind) {
case BoolOp_kind:
VISIT_SEQ(st, expr, e->v.BoolOp.values);
break;
case BinOp_kind:
VISIT(st, expr, e->v.BinOp.left);
VISIT(st, expr, e->v.BinOp.right);
break;
case UnaryOp_kind:
VISIT(st, expr, e->v.UnaryOp.operand);
break;
case Lambda_kind: {
if (!GET_IDENTIFIER(lambda))
return 0;
if (e->v.Lambda.args->defaults)
VISIT_SEQ(st, expr, e->v.Lambda.args->defaults);
if (!symtable_enter_block(st, lambda,
FunctionBlock, (void *)e, e->lineno,
e->col_offset))
return 0;
VISIT_IN_BLOCK(st, arguments, e->v.Lambda.args, (void*)e);
VISIT_IN_BLOCK(st, expr, e->v.Lambda.body, (void*)e);
if (!symtable_exit_block(st, (void *)e))
return 0;
break;
}
case IfExp_kind:
VISIT(st, expr, e->v.IfExp.test);
VISIT(st, expr, e->v.IfExp.body);
VISIT(st, expr, e->v.IfExp.orelse);
break;
case Dict_kind:
VISIT_SEQ(st, expr, e->v.Dict.keys);
VISIT_SEQ(st, expr, e->v.Dict.values);
break;
case Set_kind:
VISIT_SEQ(st, expr, e->v.Set.elts);
break;
case GeneratorExp_kind:
if (!symtable_visit_genexp(st, e))
return 0;
break;
case ListComp_kind:
if (!symtable_visit_listcomp(st, e))
return 0;
break;
case SetComp_kind:
if (!symtable_visit_setcomp(st, e))
return 0;
break;
case DictComp_kind:
if (!symtable_visit_dictcomp(st, e))
return 0;
break;
case Yield_kind:
if (e->v.Yield.value)
VISIT(st, expr, e->v.Yield.value);
st->st_cur->ste_generator = 1;
if (st->st_cur->ste_returns_value) {
PyErr_SetString(PyExc_SyntaxError,
RETURN_VAL_IN_GENERATOR);
PyErr_SyntaxLocationEx(st->st_filename,
e->lineno, e->col_offset);
return 0;
}
break;
case Compare_kind:
VISIT(st, expr, e->v.Compare.left);
VISIT_SEQ(st, expr, e->v.Compare.comparators);
break;
case Call_kind:
VISIT(st, expr, e->v.Call.func);
VISIT_SEQ(st, expr, e->v.Call.args);
VISIT_SEQ(st, keyword, e->v.Call.keywords);
if (e->v.Call.starargs)
VISIT(st, expr, e->v.Call.starargs);
if (e->v.Call.kwargs)
VISIT(st, expr, e->v.Call.kwargs);
break;
case Num_kind:
case Str_kind:
case Bytes_kind:
case Ellipsis_kind:
/* Nothing to do here. */
break;
/* The following exprs can be assignment targets. */
case Attribute_kind:
VISIT(st, expr, e->v.Attribute.value);
break;
case Subscript_kind:
VISIT(st, expr, e->v.Subscript.value);
VISIT(st, slice, e->v.Subscript.slice);
break;
case Starred_kind:
VISIT(st, expr, e->v.Starred.value);
break;
case Name_kind:
if (!symtable_add_def(st, e->v.Name.id,
e->v.Name.ctx == Load ? USE : DEF_LOCAL))
return 0;
/* Special-case super: it counts as a use of __class__ */
if (e->v.Name.ctx == Load &&
st->st_cur->ste_type == FunctionBlock &&
!PyUnicode_CompareWithASCIIString(e->v.Name.id, "super")) {
if (!GET_IDENTIFIER(__class__) ||
!symtable_add_def(st, __class__, USE))
return 0;
}
break;
/* child nodes of List and Tuple will have expr_context set */
case List_kind:
VISIT_SEQ(st, expr, e->v.List.elts);
break;
case Tuple_kind:
VISIT_SEQ(st, expr, e->v.Tuple.elts);
break;
}
return 1;
}
static int
symtable_implicit_arg(struct symtable *st, int pos)
{
PyObject *id = PyUnicode_FromFormat(".%d", pos);
if (id == NULL)
return 0;
if (!symtable_add_def(st, id, DEF_PARAM)) {
Py_DECREF(id);
return 0;
}
Py_DECREF(id);
return 1;
}
static int
symtable_visit_params(struct symtable *st, asdl_seq *args)
{
int i;
if (!args)
return -1;
for (i = 0; i < asdl_seq_LEN(args); i++) {
arg_ty arg = (arg_ty)asdl_seq_GET(args, i);
if (!symtable_add_def(st, arg->arg, DEF_PARAM))
return 0;
}
return 1;
}
static int
symtable_visit_argannotations(struct symtable *st, asdl_seq *args)
{
int i;
if (!args)
return -1;
for (i = 0; i < asdl_seq_LEN(args); i++) {
arg_ty arg = (arg_ty)asdl_seq_GET(args, i);
if (arg->annotation)
VISIT(st, expr, arg->annotation);
}
return 1;
}
static int
symtable_visit_annotations(struct symtable *st, stmt_ty s)
{
arguments_ty a = s->v.FunctionDef.args;
if (a->args && !symtable_visit_argannotations(st, a->args))
return 0;
if (a->varargannotation)
VISIT(st, expr, a->varargannotation);
if (a->kwargannotation)
VISIT(st, expr, a->kwargannotation);
if (a->kwonlyargs && !symtable_visit_argannotations(st, a->kwonlyargs))
return 0;
if (s->v.FunctionDef.returns)
VISIT(st, expr, s->v.FunctionDef.returns);
return 1;
}
static int
symtable_visit_arguments(struct symtable *st, arguments_ty a)
{
/* skip default arguments inside function block
XXX should ast be different?
*/
if (a->args && !symtable_visit_params(st, a->args))
return 0;
if (a->kwonlyargs && !symtable_visit_params(st, a->kwonlyargs))
return 0;
if (a->vararg) {
if (!symtable_add_def(st, a->vararg, DEF_PARAM))
return 0;
st->st_cur->ste_varargs = 1;
}
if (a->kwarg) {
if (!symtable_add_def(st, a->kwarg, DEF_PARAM))
return 0;
st->st_cur->ste_varkeywords = 1;
}
return 1;
}
static int
symtable_visit_excepthandler(struct symtable *st, excepthandler_ty eh)
{
if (eh->v.ExceptHandler.type)
VISIT(st, expr, eh->v.ExceptHandler.type);
if (eh->v.ExceptHandler.name)
if (!symtable_add_def(st, eh->v.ExceptHandler.name, DEF_LOCAL))
return 0;
VISIT_SEQ(st, stmt, eh->v.ExceptHandler.body);
return 1;
}
static int
symtable_visit_alias(struct symtable *st, alias_ty a)
{
/* Compute store_name, the name actually bound by the import
operation. It is different than a->name when a->name is a
dotted package name (e.g. spam.eggs)
*/
PyObject *store_name;
PyObject *name = (a->asname == NULL) ? a->name : a->asname;
const Py_UNICODE *base = PyUnicode_AS_UNICODE(name);
Py_UNICODE *dot = Py_UNICODE_strchr(base, '.');
if (dot) {
store_name = PyUnicode_FromUnicode(base, dot - base);
if (!store_name)
return 0;
}
else {
store_name = name;
Py_INCREF(store_name);
}
if (PyUnicode_CompareWithASCIIString(name, "*")) {
int r = symtable_add_def(st, store_name, DEF_IMPORT);
Py_DECREF(store_name);
return r;
}
else {
if (st->st_cur->ste_type != ModuleBlock) {
int lineno = st->st_cur->ste_lineno;
int col_offset = st->st_cur->ste_col_offset;
PyErr_SetString(PyExc_SyntaxError, IMPORT_STAR_WARNING);
PyErr_SyntaxLocationEx(st->st_filename, lineno, col_offset);
Py_DECREF(store_name);
return 0;
}
st->st_cur->ste_unoptimized |= OPT_IMPORT_STAR;
Py_DECREF(store_name);
return 1;
}
}
static int
symtable_visit_comprehension(struct symtable *st, comprehension_ty lc)
{
VISIT(st, expr, lc->target);
VISIT(st, expr, lc->iter);
VISIT_SEQ(st, expr, lc->ifs);
return 1;
}
static int
symtable_visit_keyword(struct symtable *st, keyword_ty k)
{
VISIT(st, expr, k->value);
return 1;
}
static int
symtable_visit_slice(struct symtable *st, slice_ty s)
{
switch (s->kind) {
case Slice_kind:
if (s->v.Slice.lower)
VISIT(st, expr, s->v.Slice.lower)
if (s->v.Slice.upper)
VISIT(st, expr, s->v.Slice.upper)
if (s->v.Slice.step)
VISIT(st, expr, s->v.Slice.step)
break;
case ExtSlice_kind:
VISIT_SEQ(st, slice, s->v.ExtSlice.dims)
break;
case Index_kind:
VISIT(st, expr, s->v.Index.value)
break;
}
return 1;
}
static int
symtable_handle_comprehension(struct symtable *st, expr_ty e,
identifier scope_name, asdl_seq *generators,
expr_ty elt, expr_ty value)
{
int is_generator = (e->kind == GeneratorExp_kind);
int needs_tmp = !is_generator;
comprehension_ty outermost = ((comprehension_ty)
asdl_seq_GET(generators, 0));
/* Outermost iterator is evaluated in current scope */
VISIT(st, expr, outermost->iter);
/* Create comprehension scope for the rest */
if (!scope_name ||
!symtable_enter_block(st, scope_name, FunctionBlock, (void *)e,
e->lineno, e->col_offset)) {
return 0;
}
st->st_cur->ste_generator = is_generator;
/* Outermost iter is received as an argument */
if (!symtable_implicit_arg(st, 0)) {
symtable_exit_block(st, (void *)e);
return 0;
}
/* Allocate temporary name if needed */
if (needs_tmp && !symtable_new_tmpname(st)) {
symtable_exit_block(st, (void *)e);
return 0;
}
VISIT_IN_BLOCK(st, expr, outermost->target, (void*)e);
VISIT_SEQ_IN_BLOCK(st, expr, outermost->ifs, (void*)e);
VISIT_SEQ_TAIL_IN_BLOCK(st, comprehension,
generators, 1, (void*)e);
if (value)
VISIT_IN_BLOCK(st, expr, value, (void*)e);
VISIT_IN_BLOCK(st, expr, elt, (void*)e);
return symtable_exit_block(st, (void *)e);
}
static int
symtable_visit_genexp(struct symtable *st, expr_ty e)
{
return symtable_handle_comprehension(st, e, GET_IDENTIFIER(genexpr),
e->v.GeneratorExp.generators,
e->v.GeneratorExp.elt, NULL);
}
static int
symtable_visit_listcomp(struct symtable *st, expr_ty e)
{
return symtable_handle_comprehension(st, e, GET_IDENTIFIER(listcomp),
e->v.ListComp.generators,
e->v.ListComp.elt, NULL);
}
static int
symtable_visit_setcomp(struct symtable *st, expr_ty e)
{
return symtable_handle_comprehension(st, e, GET_IDENTIFIER(setcomp),
e->v.SetComp.generators,
e->v.SetComp.elt, NULL);
}
static int
symtable_visit_dictcomp(struct symtable *st, expr_ty e)
{
return symtable_handle_comprehension(st, e, GET_IDENTIFIER(dictcomp),
e->v.DictComp.generators,
e->v.DictComp.key,
e->v.DictComp.value);
}
|