/* parsermodule.c * * Copyright 1995-1996 by Fred L. Drake, Jr. and Virginia Polytechnic * Institute and State University, Blacksburg, Virginia, USA. * Portions copyright 1991-1995 by Stichting Mathematisch Centrum, * Amsterdam, The Netherlands. Copying is permitted under the terms * associated with the main Python distribution, with the additional * restriction that this additional notice be included and maintained * on all distributed copies. * * This module serves to replace the original parser module written * by Guido. The functionality is not matched precisely, but the * original may be implemented on top of this. This is desirable * since the source of the text to be parsed is now divorced from * this interface. * * Unlike the prior interface, the ability to give a parse tree * produced by Python code as a tuple to the compiler is enabled by * this module. See the documentation for more details. * * I've added some annotations that help with the lint code-checking * program, but they're not complete by a long shot. The real errors * that lint detects are gone, but there are still warnings with * Py_[X]DECREF() and Py_[X]INCREF() macros. The lint annotations * look like "NOTE(...)". * */ #include "Python.h" /* general Python API */ #include "Python-ast.h" /* mod_ty */ #undef Yield /* undefine macro conflicting with */ #include "ast.h" #include "graminit.h" /* symbols defined in the grammar */ #include "node.h" /* internal parser structure */ #include "errcode.h" /* error codes for PyNode_*() */ #include "token.h" /* token definitions */ /* ISTERMINAL() / ISNONTERMINAL() */ #include "grammar.h" #include "parsetok.h" extern grammar _PyParser_Grammar; /* From graminit.c */ #ifdef lint #include #else #define NOTE(x) #endif /* String constants used to initialize module attributes. * */ static const char parser_copyright_string[] = "Copyright 1995-1996 by Virginia Polytechnic Institute & State\n\ University, Blacksburg, Virginia, USA, and Fred L. Drake, Jr., Reston,\n\ Virginia, USA. Portions copyright 1991-1995 by Stichting Mathematisch\n\ Centrum, Amsterdam, The Netherlands."; PyDoc_STRVAR(parser_doc_string, "This is an interface to Python's internal parser."); static const char parser_version_string[] = "0.5"; typedef PyObject* (*SeqMaker) (Py_ssize_t length); typedef int (*SeqInserter) (PyObject* sequence, Py_ssize_t index, PyObject* element); /* The function below is copyrighted by Stichting Mathematisch Centrum. The * original copyright statement is included below, and continues to apply * in full to the function immediately following. All other material is * original, copyrighted by Fred L. Drake, Jr. and Virginia Polytechnic * Institute and State University. Changes were made to comply with the * new naming conventions. Added arguments to provide support for creating * lists as well as tuples, and optionally including the line numbers. */ static PyObject* node2tuple(node *n, /* node to convert */ SeqMaker mkseq, /* create sequence */ SeqInserter addelem, /* func. to add elem. in seq. */ int lineno, /* include line numbers? */ int col_offset) /* include column offsets? */ { PyObject *result = NULL, *w; if (n == NULL) { Py_RETURN_NONE; } if (ISNONTERMINAL(TYPE(n))) { int i; result = mkseq(1 + NCH(n) + (TYPE(n) == encoding_decl)); if (result == NULL) goto error; w = PyLong_FromLong(TYPE(n)); if (w == NULL) goto error; (void) addelem(result, 0, w); for (i = 0; i < NCH(n); i++) { w = node2tuple(CHILD(n, i), mkseq, addelem, lineno, col_offset); if (w == NULL) goto error; (void) addelem(result, i+1, w); } if (TYPE(n) == encoding_decl) { w = PyUnicode_FromString(STR(n)); if (w == NULL) goto error; (void) addelem(result, i+1, w); } } else if (ISTERMINAL(TYPE(n))) { result = mkseq(2 + lineno + col_offset); if (result == NULL) goto error; w = PyLong_FromLong(TYPE(n)); if (w == NULL) goto error; (void) addelem(result, 0, w); w = PyUnicode_FromString(STR(n)); if (w == NULL) goto error; (void) addelem(result, 1, w); if (lineno) { w = PyLong_FromLong(n->n_lineno); if (w == NULL) goto error; (void) addelem(result, 2, w); } if (col_offset) { w = PyLong_FromLong(n->n_col_offset); if (w == NULL) goto error; (void) addelem(result, 2 + lineno, w); } } else { PyErr_SetString(PyExc_SystemError, "unrecognized parse tree node type"); return ((PyObject*) NULL); } return result; error: Py_XDECREF(result); return NULL; } /* * End of material copyrighted by Stichting Mathematisch Centrum. */ /* There are two types of intermediate objects we're interested in: * 'eval' and 'exec' types. These constants can be used in the st_type * field of the object type to identify which any given object represents. * These should probably go in an external header to allow other extensions * to use them, but then, we really should be using C++ too. ;-) */ #define PyST_EXPR 1 #define PyST_SUITE 2 /* These are the internal objects and definitions required to implement the * ST type. Most of the internal names are more reminiscent of the 'old' * naming style, but the code uses the new naming convention. */ static PyObject* parser_error = 0; typedef struct { PyObject_HEAD /* standard object header */ node* st_node; /* the node* returned by the parser */ int st_type; /* EXPR or SUITE ? */ PyCompilerFlags st_flags; /* Parser and compiler flags */ } PyST_Object; static void parser_free(PyST_Object *st); static PyObject* parser_sizeof(PyST_Object *, void *); static PyObject* parser_richcompare(PyObject *left, PyObject *right, int op); static PyObject* parser_compilest(PyST_Object *, PyObject *, PyObject *); static PyObject* parser_isexpr(PyST_Object *, PyObject *, PyObject *); static PyObject* parser_issuite(PyST_Object *, PyObject *, PyObject *); static PyObject* parser_st2list(PyST_Object *, PyObject *, PyObject *); static PyObject* parser_st2tuple(PyST_Object *, PyObject *, PyObject *); #define PUBLIC_METHOD_TYPE (METH_VARARGS|METH_KEYWORDS) static PyMethodDef parser_methods[] = { {"compile", (PyCFunction)(void(*)(void))parser_compilest, PUBLIC_METHOD_TYPE, PyDoc_STR("Compile this ST object into a code object.")}, {"isexpr", (PyCFunction)(void(*)(void))parser_isexpr, PUBLIC_METHOD_TYPE, PyDoc_STR("Determines if this ST object was created from an expression.")}, {"issuite", (PyCFunction)(void(*)(void))parser_issuite, PUBLIC_METHOD_TYPE, PyDoc_STR("Determines if this ST object was created from a suite.")}, {"tolist", (PyCFunction)(void(*)(void))parser_st2list, PUBLIC_METHOD_TYPE, PyDoc_STR("Creates a list-tree representation of this ST.")}, {"totuple", (PyCFunction)(void(*)(void))parser_st2tuple, PUBLIC_METHOD_TYPE, PyDoc_STR("Creates a tuple-tree representation of this ST.")}, {"__sizeof__", (PyCFunction)parser_sizeof, METH_NOARGS, PyDoc_STR("Returns size in memory, in bytes.")}, {NULL, NULL, 0, NULL} }; static PyTypeObject PyST_Type = { PyVarObject_HEAD_INIT(NULL, 0) "parser.st", /* tp_name */ (int) sizeof(PyST_Object), /* tp_basicsize */ 0, /* tp_itemsize */ (destructor)parser_free, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_reserved */ 0, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ 0, /* tp_getattro */ 0, /* tp_setattro */ /* Functions to access object as input/output buffer */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT, /* tp_flags */ /* __doc__ */ "Intermediate representation of a Python parse tree.", 0, /* tp_traverse */ 0, /* tp_clear */ parser_richcompare, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ parser_methods, /* tp_methods */ }; /* PyST_Type */ /* PyST_Type isn't subclassable, so just check ob_type */ #define PyST_Object_Check(v) ((v)->ob_type == &PyST_Type) static int parser_compare_nodes(node *left, node *right) { int j; if (TYPE(left) < TYPE(right)) return (-1); if (TYPE(right) < TYPE(left)) return (1); if (ISTERMINAL(TYPE(left))) return (strcmp(STR(left), STR(right))); if (NCH(left) < NCH(right)) return (-1); if (NCH(right) < NCH(left)) return (1); for (j = 0; j < NCH(left); ++j) { int v = parser_compare_nodes(CHILD(left, j), CHILD(right, j)); if (v != 0) return (v); } return (0); } /* parser_richcompare(PyObject* left, PyObject* right, int op) * * Comparison function used by the Python operators ==, !=, <, >, <=, >= * This really just wraps a call to parser_compare_nodes() with some easy * checks and protection code. * */ static PyObject * parser_richcompare(PyObject *left, PyObject *right, int op) { int result; /* neither argument should be NULL, unless something's gone wrong */ if (left == NULL || right == NULL) { PyErr_BadInternalCall(); return NULL; } /* both arguments should be instances of PyST_Object */ if (!PyST_Object_Check(left) || !PyST_Object_Check(right)) { Py_RETURN_NOTIMPLEMENTED; } if (left == right) /* if arguments are identical, they're equal */ result = 0; else result = parser_compare_nodes(((PyST_Object *)left)->st_node, ((PyST_Object *)right)->st_node); Py_RETURN_RICHCOMPARE(result, 0, op); } /* parser_newstobject(node* st) * * Allocates a new Python object representing an ST. This is simply the * 'wrapper' object that holds a node* and allows it to be passed around in * Python code. * */ static PyObject* parser_newstobject(node *st, int type) { PyST_Object* o = PyObject_New(PyST_Object, &PyST_Type); if (o != 0) { o->st_node = st; o->st_type = type; o->st_flags.cf_flags = 0; o->st_flags.cf_feature_version = PY_MINOR_VERSION; } else { PyNode_Free(st); } return ((PyObject*)o); } /* void parser_free(PyST_Object* st) * * This is called by a del statement that reduces the reference count to 0. * */ static void parser_free(PyST_Object *st) { PyNode_Free(st->st_node); PyObject_Del(st); } static PyObject * parser_sizeof(PyST_Object *st, void *unused) { Py_ssize_t res; res = _PyObject_SIZE(Py_TYPE(st)) + _PyNode_SizeOf(st->st_node); return PyLong_FromSsize_t(res); } /* parser_st2tuple(PyObject* self, PyObject* args, PyObject* kw) * * This provides conversion from a node* to a tuple object that can be * returned to the Python-level caller. The ST object is not modified. * */ static PyObject* parser_st2tuple(PyST_Object *self, PyObject *args, PyObject *kw) { int line_info = 0; int col_info = 0; PyObject *res = 0; int ok; static char *keywords[] = {"st", "line_info", "col_info", NULL}; if (self == NULL || PyModule_Check(self)) { ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|pp:st2tuple", keywords, &PyST_Type, &self, &line_info, &col_info); } else ok = PyArg_ParseTupleAndKeywords(args, kw, "|pp:totuple", &keywords[1], &line_info, &col_info); if (ok != 0) { /* * Convert ST into a tuple representation. Use Guido's function, * since it's known to work already. */ res = node2tuple(((PyST_Object*)self)->st_node, PyTuple_New, PyTuple_SetItem, line_info, col_info); } return (res); } /* parser_st2list(PyObject* self, PyObject* args, PyObject* kw) * * This provides conversion from a node* to a list object that can be * returned to the Python-level caller. The ST object is not modified. * */ static PyObject* parser_st2list(PyST_Object *self, PyObject *args, PyObject *kw) { int line_info = 0; int col_info = 0; PyObject *res = 0; int ok; static char *keywords[] = {"st", "line_info", "col_info", NULL}; if (self == NULL || PyModule_Check(self)) ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|pp:st2list", keywords, &PyST_Type, &self, &line_info, &col_info); else ok = PyArg_ParseTupleAndKeywords(args, kw, "|pp:tolist", &keywords[1], &line_info, &col_info); if (ok) { /* * Convert ST into a tuple representation. Use Guido's function, * since it's known to work already. */ res = node2tuple(self->st_node, PyList_New, PyList_SetItem, line_info, col_info); } return (res); } /* parser_compilest(PyObject* self, PyObject* args) * * This function creates code objects from the parse tree represented by * the passed-in data object. An optional file name is passed in as well. * */ static PyObject* parser_compilest(PyST_Object *self, PyObject *args, PyObject *kw) { PyObject* res = NULL; PyArena* arena = NULL; mod_ty mod; PyObject* filename = NULL; int ok; static char *keywords[] = {"st", "filename", NULL}; if (self == NULL || PyModule_Check(self)) ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|O&:compilest", keywords, &PyST_Type, &self, PyUnicode_FSDecoder, &filename); else ok = PyArg_ParseTupleAndKeywords(args, kw, "|O&:compile", &keywords[1], PyUnicode_FSDecoder, &filename); if (!ok) goto error; if (filename == NULL) { filename = PyUnicode_FromString(""); if (filename == NULL) goto error; } arena = PyArena_New(); if (!arena) goto error; mod = PyAST_FromNodeObject(self->st_node, &self->st_flags, filename, arena); if (!mod) goto error; res = (PyObject *)PyAST_CompileObject(mod, filename, &self->st_flags, -1, arena); error: Py_XDECREF(filename); if (arena != NULL) PyArena_Free(arena); return res; } /* PyObject* parser_isexpr(PyObject* self, PyObject* args) * PyObject* parser_issuite(PyObject* self, PyObject* args) * * Checks the passed-in ST object to determine if it is an expression or * a statement suite, respectively. The return is a Python truth value. * */ static PyObject* parser_isexpr(PyST_Object *self, PyObject *args, PyObject *kw) { PyObject* res = 0; int ok; static char *keywords[] = {"st", NULL}; if (self == NULL || PyModule_Check(self)) ok = PyArg_ParseTupleAndKeywords(args, kw, "O!:isexpr", keywords, &PyST_Type, &self); else ok = PyArg_ParseTupleAndKeywords(args, kw, ":isexpr", &keywords[1]); if (ok) { /* Check to see if the ST represents an expression or not. */ res = (self->st_type == PyST_EXPR) ? Py_True : Py_False; Py_INCREF(res); } return (res); } static PyObject* parser_issuite(PyST_Object *self, PyObject *args, PyObject *kw) { PyObject* res = 0; int ok; static char *keywords[] = {"st", NULL}; if (self == NULL || PyModule_Check(self)) ok = PyArg_ParseTupleAndKeywords(args, kw, "O!:issuite", keywords, &PyST_Type, &self); else ok = PyArg_ParseTupleAndKeywords(args, kw, ":issuite", &keywords[1]); if (ok) { /* Check to see if the ST represents an expression or not. */ res = (self->st_type == PyST_EXPR) ? Py_False : Py_True; Py_INCREF(res); } return (res); } /* err_string(const char* message) * * Sets the error string for an exception of type ParserError. * */ static void err_string(const char *message) { PyErr_SetString(parser_error, message); } /* PyObject* parser_do_parse(PyObject* args, int type) * * Internal function to actually execute the parse and return the result if * successful or set an exception if not. * */ static PyObject* parser_do_parse(PyObject *args, PyObject *kw, const char *argspec, int type) { char* string = 0; PyObject* res = 0; int flags = 0; perrdetail err; static char *keywords[] = {"source", NULL}; if (PyArg_ParseTupleAndKeywords(args, kw, argspec, keywords, &string)) { node* n = PyParser_ParseStringFlagsFilenameEx(string, NULL, &_PyParser_Grammar, (type == PyST_EXPR) ? eval_input : file_input, &err, &flags); if (n) { res = parser_newstobject(n, type); if (res) { ((PyST_Object *)res)->st_flags.cf_flags = flags & PyCF_MASK; ((PyST_Object *)res)->st_flags.cf_feature_version = PY_MINOR_VERSION; } } else { PyParser_SetError(&err); } PyParser_ClearError(&err); } return (res); } /* PyObject* parser_expr(PyObject* self, PyObject* args) * PyObject* parser_suite(PyObject* self, PyObject* args) * * External interfaces to the parser itself. Which is called determines if * the parser attempts to recognize an expression ('eval' form) or statement * suite ('exec' form). The real work is done by parser_do_parse() above. * */ static PyObject* parser_expr(PyST_Object *self, PyObject *args, PyObject *kw) { NOTE(ARGUNUSED(self)) return (parser_do_parse(args, kw, "s:expr", PyST_EXPR)); } static PyObject* parser_suite(PyST_Object *self, PyObject *args, PyObject *kw) { NOTE(ARGUNUSED(self)) return (parser_do_parse(args, kw, "s:suite", PyST_SUITE)); } /* This is the messy part of the code. Conversion from a tuple to an ST * object requires that the input tuple be valid without having to rely on * catching an exception from the compiler. This is done to allow the * compiler itself to remain fast, since most of its input will come from * the parser directly, and therefore be known to be syntactically correct. * This validation is done to ensure that we don't core dump the compile * phase, returning an exception instead. * * Two aspects can be broken out in this code: creating a node tree from * the tuple passed in, and verifying that it is indeed valid. It may be * advantageous to expand the number of ST types to include funcdefs and * lambdadefs to take advantage of the optimizer, recognizing those STs * here. They are not necessary, and not quite as useful in a raw form. * For now, let's get expressions and suites working reliably. */ static node* build_node_tree(PyObject *tuple); static int validate_node(node *tree) { int type = TYPE(tree); int nch = NCH(tree); dfa *nt_dfa; state *dfa_state; int pos, arc; assert(ISNONTERMINAL(type)); type -= NT_OFFSET; if (type >= _PyParser_Grammar.g_ndfas) { PyErr_Format(parser_error, "Unrecognized node type %d.", TYPE(tree)); return 0; } nt_dfa = &_PyParser_Grammar.g_dfa[type]; REQ(tree, nt_dfa->d_type); /* Run the DFA for this nonterminal. */ dfa_state = nt_dfa->d_state; for (pos = 0; pos < nch; ++pos) { node *ch = CHILD(tree, pos); int ch_type = TYPE(ch); if ((ch_type >= NT_OFFSET + _PyParser_Grammar.g_ndfas) || (ISTERMINAL(ch_type) && (ch_type >= N_TOKENS)) || (ch_type < 0) ) { PyErr_Format(parser_error, "Unrecognized node type %d.", ch_type); return 0; } if (ch_type == suite && TYPE(tree) == funcdef) { /* This is the opposite hack of what we do in parser.c (search for func_body_suite), except we don't ever support type comments here. */ ch_type = func_body_suite; } for (arc = 0; arc < dfa_state->s_narcs; ++arc) { short a_label = dfa_state->s_arc[arc].a_lbl; assert(a_label < _PyParser_Grammar.g_ll.ll_nlabels); const char *label_str = _PyParser_Grammar.g_ll.ll_label[a_label].lb_str; if ((_PyParser_Grammar.g_ll.ll_label[a_label].lb_type == ch_type) && ((ch->n_str == NULL) || (label_str == NULL) || (strcmp(ch->n_str, label_str) == 0)) ) { /* The child is acceptable; if non-terminal, validate it recursively. */ if (ISNONTERMINAL(ch_type) && !validate_node(ch)) return 0; /* Update the state, and move on to the next child. */ dfa_state = &nt_dfa->d_state[dfa_state->s_arc[arc].a_arrow]; goto arc_found; } } /* What would this state have accepted? */ { short a_label = dfa_state->s_arc->a_lbl; if (!a_label) /* Wouldn't accept any more children */ goto illegal_num_children; int next_type = _PyParser_Grammar.g_ll.ll_label[a_label].lb_type; const char *expected_str = _PyParser_Grammar.g_ll.ll_label[a_label].lb_str; if (ISNONTERMINAL(next_type)) { PyErr_Format(parser_error, "Expected %s, got %s.", _PyParser_Grammar.g_dfa[next_type - NT_OFFSET].d_name, ISTERMINAL(ch_type) ? _PyParser_TokenNames[ch_type] : _PyParser_Grammar.g_dfa[ch_type - NT_OFFSET].d_name); } else if (expected_str != NULL) { PyErr_Format(parser_error, "Illegal terminal: expected '%s'.", expected_str); } else { PyErr_Format(parser_error, "Illegal terminal: expected %s.", _PyParser_TokenNames[next_type]); } return 0; } arc_found: continue; } /* Are we in a final state? If so, return 1 for successful validation. */ for (arc = 0; arc < dfa_state->s_narcs; ++arc) { if (!dfa_state->s_arc[arc].a_lbl) { return 1; } } illegal_num_children: PyErr_Format(parser_error, "Illegal number of children for %s node.", nt_dfa->d_name); return 0; } /* PyObject* parser_tuple2st(PyObject* self, PyObject* args) * * This is the public function, called from the Python code. It receives a * single tuple object from the caller, and creates an ST object if the * tuple can be validated. It does this by checking the first code of the * tuple, and, if acceptable, builds the internal representation. If this * step succeeds, the internal representation is validated as fully as * possible with the recursive validate_node() routine defined above. * * This function must be changed if support is to be added for PyST_FRAGMENT * ST objects. * */ static PyObject* parser_tuple2st(PyST_Object *self, PyObject *args, PyObject *kw) { NOTE(ARGUNUSED(self)) PyObject *st = 0; PyObject *tuple; node *tree; static char *keywords[] = {"sequence", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kw, "O:sequence2st", keywords, &tuple)) return (0); if (!PySequence_Check(tuple)) { PyErr_SetString(PyExc_ValueError, "sequence2st() requires a single sequence argument"); return (0); } /* * Convert the tree to the internal form before checking it. */ tree = build_node_tree(tuple); if (tree != 0) { node *validation_root = NULL; int tree_type = 0; switch (TYPE(tree)) { case eval_input: /* Might be an eval form. */ tree_type = PyST_EXPR; validation_root = tree; break; case encoding_decl: /* This looks like an encoding_decl so far. */ if (NCH(tree) == 1) { tree_type = PyST_SUITE; validation_root = CHILD(tree, 0); } else { err_string("Error Parsing encoding_decl"); } break; case file_input: /* This looks like an exec form so far. */ tree_type = PyST_SUITE; validation_root = tree; break; default: /* This is a fragment, at best. */ err_string("parse tree does not use a valid start symbol"); } if (validation_root != NULL && validate_node(validation_root)) st = parser_newstobject(tree, tree_type); else PyNode_Free(tree); } /* Make sure we raise an exception on all errors. We should never * get this, but we'd do well to be sure something is done. */ if (st == NULL && !PyErr_Occurred()) err_string("unspecified ST error occurred"); return st; } /* node* build_node_children() * * Iterate across the children of the current non-terminal node and build * their structures. If successful, return the root of this portion of * the tree, otherwise, 0. Any required exception will be specified already, * and no memory will have been deallocated. * */ static node* build_node_children(PyObject *tuple, node *root, int *line_num) { Py_ssize_t len = PyObject_Size(tuple); Py_ssize_t i; int err; if (len < 0) { return NULL; } for (i = 1; i < len; ++i) { /* elem must always be a sequence, however simple */ PyObject* elem = PySequence_GetItem(tuple, i); int ok = elem != NULL; int type = 0; char *strn = 0; if (ok) ok = PySequence_Check(elem); if (ok) { PyObject *temp = PySequence_GetItem(elem, 0); if (temp == NULL) ok = 0; else { ok = PyLong_Check(temp); if (ok) { type = _PyLong_AsInt(temp); if (type == -1 && PyErr_Occurred()) { Py_DECREF(temp); Py_DECREF(elem); return NULL; } } Py_DECREF(temp); } } if (!ok) { PyObject *err = Py_BuildValue("Os", elem, "Illegal node construct."); PyErr_SetObject(parser_error, err); Py_XDECREF(err); Py_XDECREF(elem); return NULL; } if (ISTERMINAL(type)) { Py_ssize_t len = PyObject_Size(elem); PyObject *temp; const char *temp_str; if ((len != 2) && (len != 3)) { err_string("terminal nodes must have 2 or 3 entries"); Py_DECREF(elem); return NULL; } temp = PySequence_GetItem(elem, 1); if (temp == NULL) { Py_DECREF(elem); return NULL; } if (!PyUnicode_Check(temp)) { PyErr_Format(parser_error, "second item in terminal node must be a string," " found %s", Py_TYPE(temp)->tp_name); Py_DECREF(temp); Py_DECREF(elem); return NULL; } if (len == 3) { PyObject *o = PySequence_GetItem(elem, 2); if (o == NULL) { Py_DECREF(temp); Py_DECREF(elem); return NULL; } if (PyLong_Check(o)) { int num = _PyLong_AsInt(o); if (num == -1 && PyErr_Occurred()) { Py_DECREF(o); Py_DECREF(temp); Py_DECREF(elem); return NULL; } *line_num = num; } else { PyErr_Format(parser_error, "third item in terminal node must be an" " integer, found %s", Py_TYPE(temp)->tp_name); Py_DECREF(o); Py_DECREF(temp); Py_DECREF(elem); return NULL; } Py_DECREF(o); } temp_str = PyUnicode_AsUTF8AndSize(temp, &len); if (temp_str == NULL) { Py_DECREF(temp); Py_DECREF(elem); return NULL; } strn = (char *)PyObject_MALLOC(len + 1); if (strn == NULL) { Py_DECREF(temp); Py_DECREF(elem); PyErr_NoMemory(); return NULL; } (void) memcpy(strn, temp_str, len + 1); Py_DECREF(temp); } else if (!ISNONTERMINAL(type)) { /* * It has to be one or the other; this is an error. * Raise an exception. */ PyObject *err = Py_BuildValue("Os", elem, "unknown node type."); PyErr_SetObject(parser_error, err); Py_XDECREF(err); Py_DECREF(elem); return NULL; } err = PyNode_AddChild(root, type, strn, *line_num, 0, *line_num, 0); if (err == E_NOMEM) { Py_DECREF(elem); PyObject_FREE(strn); PyErr_NoMemory(); return NULL; } if (err == E_OVERFLOW) { Py_DECREF(elem); PyObject_FREE(strn); PyErr_SetString(PyExc_ValueError, "unsupported number of child nodes"); return NULL; } if (ISNONTERMINAL(type)) { node* new_child = CHILD(root, i - 1); if (new_child != build_node_children(elem, new_child, line_num)) { Py_DECREF(elem); return NULL; } } else if (type == NEWLINE) { /* It's true: we increment the */ ++(*line_num); /* line number *after* the newline! */ } Py_DECREF(elem); } return root; } static node* build_node_tree(PyObject *tuple) { node* res = 0; PyObject *temp = PySequence_GetItem(tuple, 0); long num = -1; if (temp != NULL) num = PyLong_AsLong(temp); Py_XDECREF(temp); if (ISTERMINAL(num)) { /* * The tuple is simple, but it doesn't start with a start symbol. * Raise an exception now and be done with it. */ tuple = Py_BuildValue("Os", tuple, "Illegal syntax-tree; cannot start with terminal symbol."); PyErr_SetObject(parser_error, tuple); Py_XDECREF(tuple); } else if (ISNONTERMINAL(num)) { /* * Not efficient, but that can be handled later. */ int line_num = 0; PyObject *encoding = NULL; if (num == encoding_decl) { encoding = PySequence_GetItem(tuple, 2); if (encoding == NULL) { PyErr_SetString(parser_error, "missed encoding"); return NULL; } if (!PyUnicode_Check(encoding)) { PyErr_Format(parser_error, "encoding must be a string, found %.200s", Py_TYPE(encoding)->tp_name); Py_DECREF(encoding); return NULL; } /* tuple isn't borrowed anymore here, need to DECREF */ tuple = PySequence_GetSlice(tuple, 0, 2); if (tuple == NULL) { Py_DECREF(encoding); return NULL; } } res = PyNode_New(num); if (res != NULL) { if (res != build_node_children(tuple, res, &line_num)) { PyNode_Free(res); res = NULL; } if (res && encoding) { Py_ssize_t len; const char *temp; temp = PyUnicode_AsUTF8AndSize(encoding, &len); if (temp == NULL) { PyNode_Free(res); Py_DECREF(encoding); Py_DECREF(tuple); return NULL; } res->n_str = (char *)PyObject_MALLOC(len + 1); if (res->n_str == NULL) { PyNode_Free(res); Py_DECREF(encoding); Py_DECREF(tuple); PyErr_NoMemory(); return NULL; } (void) memcpy(res->n_str, temp, len + 1); } } if (encoding != NULL) { Py_DECREF(encoding); Py_DECREF(tuple); } } else { /* The tuple is illegal -- if the number is neither TERMINAL nor * NONTERMINAL, we can't use it. Not sure the implementation * allows this condition, but the API doesn't preclude it. */ PyObject *err = Py_BuildValue("Os", tuple, "Illegal component tuple."); PyErr_SetObject(parser_error, err); Py_XDECREF(err); } return (res); } static PyObject* pickle_constructor = NULL; static PyObject* parser__pickler(PyObject *self, PyObject *args) { NOTE(ARGUNUSED(self)) PyObject *result = NULL; PyObject *st = NULL; PyObject *empty_dict = NULL; if (PyArg_ParseTuple(args, "O!:_pickler", &PyST_Type, &st)) { PyObject *newargs; PyObject *tuple; if ((empty_dict = PyDict_New()) == NULL) goto finally; if ((newargs = Py_BuildValue("Oi", st, 1)) == NULL) goto finally; tuple = parser_st2tuple((PyST_Object*)NULL, newargs, empty_dict); if (tuple != NULL) { result = Py_BuildValue("O(O)", pickle_constructor, tuple); Py_DECREF(tuple); } Py_DECREF(newargs); } finally: Py_XDECREF(empty_dict); return (result); } /* Functions exported by this module. Most of this should probably * be converted into an ST object with methods, but that is better * done directly in Python, allowing subclasses to be created directly. * We'd really have to write a wrapper around it all anyway to allow * inheritance. */ static PyMethodDef parser_functions[] = { {"compilest", (PyCFunction)(void(*)(void))parser_compilest, PUBLIC_METHOD_TYPE, PyDoc_STR("Compiles an ST object into a code object.")}, {"expr", (PyCFunction)(void(*)(void))parser_expr, PUBLIC_METHOD_TYPE, PyDoc_STR("Creates an ST object from an expression.")}, {"isexpr", (PyCFunction)(void(*)(void))parser_isexpr, PUBLIC_METHOD_TYPE, PyDoc_STR("Determines if an ST object was created from an expression.")}, {"issuite", (PyCFunction)(void(*)(void))parser_issuite, PUBLIC_METHOD_TYPE, PyDoc_STR("Determines if an ST object was created from a suite.")}, {"suite", (PyCFunction)(void(*)(void))parser_suite, PUBLIC_METHOD_TYPE, PyDoc_STR("Creates an ST object from a suite.")}, {"sequence2st", (PyCFunction)(void(*)(void))parser_tuple2st, PUBLIC_METHOD_TYPE, PyDoc_STR("Creates an ST object from a tree representation.")}, {"st2tuple", (PyCFunction)(void(*)(void))parser_st2tuple, PUBLIC_METHOD_TYPE, PyDoc_STR("Creates a tuple-tree representation of an ST.")}, {"st2list", (PyCFunction)(void(*)(void))parser_st2list, PUBLIC_METHOD_TYPE, PyDoc_STR("Creates a list-tree representation of an ST.")}, {"tuple2st", (PyCFunction)(void(*)(void))parser_tuple2st, PUBLIC_METHOD_TYPE, PyDoc_STR("Creates an ST object from a tree representation.")}, /* private stuff: support pickle module */ {"_pickler", (PyCFunction)parser__pickler, METH_VARARGS, PyDoc_STR("Returns the pickle magic to allow ST objects to be pickled.")}, {NULL, NULL, 0, NULL} }; static struct PyModuleDef parsermodule = { PyModuleDef_HEAD_INIT, "parser", NULL, -1, parser_functions, NULL, NULL, NULL, NULL }; PyMODINIT_FUNC PyInit_parser(void); /* supply a prototype */ PyMODINIT_FUNC PyInit_parser(void) { PyObject *module, *copyreg; if (PyType_Ready(&PyST_Type) < 0) return NULL; module = PyModule_Create(&parsermodule); if (module == NULL) return NULL; if (parser_error == 0) parser_error = PyErr_NewException("parser.ParserError", NULL, NULL); if (parser_error == 0) return NULL; /* CAUTION: The code next used to skip bumping the refcount on * parser_error. That's a disaster if PyInit_parser() gets called more * than once. By incref'ing, we ensure that each module dict that * gets created owns its reference to the shared parser_error object, * and the file static parser_error vrbl owns a reference too. */ Py_INCREF(parser_error); if (PyModule_AddObject(module, "ParserError", parser_error) != 0) return NULL; Py_INCREF(&PyST_Type); PyModule_AddObject(module, "STType", (PyObject*)&PyST_Type); PyModule_AddStringConstant(module, "__copyright__", parser_copyright_string); PyModule_AddStringConstant(module, "__doc__", parser_doc_string); PyModule_AddStringConstant(module, "__version__", parser_version_string); /* Register to support pickling. * If this fails, the import of this module will fail because an * exception will be raised here; should we clear the exception? */ copyreg = PyImport_ImportModuleNoBlock("copyreg"); if (copyreg != NULL) { PyObject *func, *pickler; _Py_IDENTIFIER(pickle); _Py_IDENTIFIER(sequence2st); _Py_IDENTIFIER(_pickler); func = _PyObject_GetAttrId(copyreg, &PyId_pickle); pickle_constructor = _PyObject_GetAttrId(module, &PyId_sequence2st); pickler = _PyObject_GetAttrId(module, &PyId__pickler); Py_XINCREF(pickle_constructor); if ((func != NULL) && (pickle_constructor != NULL) && (pickler != NULL)) { PyObject *res; res = PyObject_CallFunctionObjArgs(func, &PyST_Type, pickler, pickle_constructor, NULL); Py_XDECREF(res); } Py_XDECREF(func); Py_XDECREF(pickle_constructor); Py_XDECREF(pickler); Py_DECREF(copyreg); } return module; }