diff options
author | Mark Shannon <mark@hotpy.org> | 2022-04-21 15:10:37 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-04-21 15:10:37 (GMT) |
commit | 944fffee8916cb94321fa33cd3a43f4108717746 (patch) | |
tree | f88202dd13021ad5cf4b260ecf05ebab6015a5f6 /Objects | |
parent | 2a5f171759a31597032cfe52646929e6f8727243 (diff) | |
download | cpython-944fffee8916cb94321fa33cd3a43f4108717746.zip cpython-944fffee8916cb94321fa33cd3a43f4108717746.tar.gz cpython-944fffee8916cb94321fa33cd3a43f4108717746.tar.bz2 |
GH-88116: Use a compact format to represent end line and column offsets. (GH-91666)
* Stores all location info in linetable to conform to PEP 626.
* Remove column table from code objects.
* Remove end-line table from code objects.
* Document new location table format
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/clinic/codeobject.c.h | 70 | ||||
-rw-r--r-- | Objects/codeobject.c | 487 | ||||
-rw-r--r-- | Objects/frameobject.c | 6 | ||||
-rw-r--r-- | Objects/locations.md | 69 |
4 files changed, 417 insertions, 215 deletions
diff --git a/Objects/clinic/codeobject.c.h b/Objects/clinic/codeobject.c.h index 272bcd6..41c5c2e 100644 --- a/Objects/clinic/codeobject.c.h +++ b/Objects/clinic/codeobject.c.h @@ -5,8 +5,8 @@ preserve PyDoc_STRVAR(code_new__doc__, "code(argcount, posonlyargcount, kwonlyargcount, nlocals, stacksize,\n" " flags, codestring, constants, names, varnames, filename, name,\n" -" qualname, firstlineno, linetable, endlinetable, columntable,\n" -" exceptiontable, freevars=(), cellvars=(), /)\n" +" qualname, firstlineno, linetable, exceptiontable, freevars=(),\n" +" cellvars=(), /)\n" "--\n" "\n" "Create a code object. Not for the faint of heart."); @@ -17,7 +17,6 @@ code_new_impl(PyTypeObject *type, int argcount, int posonlyargcount, PyObject *code, PyObject *consts, PyObject *names, PyObject *varnames, PyObject *filename, PyObject *name, PyObject *qualname, int firstlineno, PyObject *linetable, - PyObject *endlinetable, PyObject *columntable, PyObject *exceptiontable, PyObject *freevars, PyObject *cellvars); @@ -40,8 +39,6 @@ code_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) PyObject *qualname; int firstlineno; PyObject *linetable; - PyObject *endlinetable; - PyObject *columntable; PyObject *exceptiontable; PyObject *freevars = NULL; PyObject *cellvars = NULL; @@ -51,7 +48,7 @@ code_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) !_PyArg_NoKeywords("code", kwargs)) { goto exit; } - if (!_PyArg_CheckPositional("code", PyTuple_GET_SIZE(args), 18, 20)) { + if (!_PyArg_CheckPositional("code", PyTuple_GET_SIZE(args), 16, 18)) { goto exit; } argcount = _PyLong_AsInt(PyTuple_GET_ITEM(args, 0)); @@ -131,31 +128,29 @@ code_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) goto exit; } linetable = PyTuple_GET_ITEM(args, 14); - endlinetable = PyTuple_GET_ITEM(args, 15); - columntable = PyTuple_GET_ITEM(args, 16); - if (!PyBytes_Check(PyTuple_GET_ITEM(args, 17))) { - _PyArg_BadArgument("code", "argument 18", "bytes", PyTuple_GET_ITEM(args, 17)); + if (!PyBytes_Check(PyTuple_GET_ITEM(args, 15))) { + _PyArg_BadArgument("code", "argument 16", "bytes", PyTuple_GET_ITEM(args, 15)); goto exit; } - exceptiontable = PyTuple_GET_ITEM(args, 17); - if (PyTuple_GET_SIZE(args) < 19) { + exceptiontable = PyTuple_GET_ITEM(args, 15); + if (PyTuple_GET_SIZE(args) < 17) { goto skip_optional; } - if (!PyTuple_Check(PyTuple_GET_ITEM(args, 18))) { - _PyArg_BadArgument("code", "argument 19", "tuple", PyTuple_GET_ITEM(args, 18)); + if (!PyTuple_Check(PyTuple_GET_ITEM(args, 16))) { + _PyArg_BadArgument("code", "argument 17", "tuple", PyTuple_GET_ITEM(args, 16)); goto exit; } - freevars = PyTuple_GET_ITEM(args, 18); - if (PyTuple_GET_SIZE(args) < 20) { + freevars = PyTuple_GET_ITEM(args, 16); + if (PyTuple_GET_SIZE(args) < 18) { goto skip_optional; } - if (!PyTuple_Check(PyTuple_GET_ITEM(args, 19))) { - _PyArg_BadArgument("code", "argument 20", "tuple", PyTuple_GET_ITEM(args, 19)); + if (!PyTuple_Check(PyTuple_GET_ITEM(args, 17))) { + _PyArg_BadArgument("code", "argument 18", "tuple", PyTuple_GET_ITEM(args, 17)); goto exit; } - cellvars = PyTuple_GET_ITEM(args, 19); + cellvars = PyTuple_GET_ITEM(args, 17); skip_optional: - return_value = code_new_impl(type, argcount, posonlyargcount, kwonlyargcount, nlocals, stacksize, flags, code, consts, names, varnames, filename, name, qualname, firstlineno, linetable, endlinetable, columntable, exceptiontable, freevars, cellvars); + return_value = code_new_impl(type, argcount, posonlyargcount, kwonlyargcount, nlocals, stacksize, flags, code, consts, names, varnames, filename, name, qualname, firstlineno, linetable, exceptiontable, freevars, cellvars); exit: return return_value; @@ -167,8 +162,7 @@ PyDoc_STRVAR(code_replace__doc__, " co_flags=-1, co_firstlineno=-1, co_code=None, co_consts=None,\n" " co_names=None, co_varnames=None, co_freevars=None,\n" " co_cellvars=None, co_filename=None, co_name=None,\n" -" co_qualname=None, co_linetable=None, co_endlinetable=None,\n" -" co_columntable=None, co_exceptiontable=None)\n" +" co_qualname=None, co_linetable=None, co_exceptiontable=None)\n" "--\n" "\n" "Return a copy of the code object with new values for the specified fields."); @@ -185,16 +179,16 @@ code_replace_impl(PyCodeObject *self, int co_argcount, PyObject *co_varnames, PyObject *co_freevars, PyObject *co_cellvars, PyObject *co_filename, PyObject *co_name, PyObject *co_qualname, - PyBytesObject *co_linetable, PyObject *co_endlinetable, - PyObject *co_columntable, PyBytesObject *co_exceptiontable); + PyBytesObject *co_linetable, + PyBytesObject *co_exceptiontable); static PyObject * code_replace(PyCodeObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; - static const char * const _keywords[] = {"co_argcount", "co_posonlyargcount", "co_kwonlyargcount", "co_nlocals", "co_stacksize", "co_flags", "co_firstlineno", "co_code", "co_consts", "co_names", "co_varnames", "co_freevars", "co_cellvars", "co_filename", "co_name", "co_qualname", "co_linetable", "co_endlinetable", "co_columntable", "co_exceptiontable", NULL}; + static const char * const _keywords[] = {"co_argcount", "co_posonlyargcount", "co_kwonlyargcount", "co_nlocals", "co_stacksize", "co_flags", "co_firstlineno", "co_code", "co_consts", "co_names", "co_varnames", "co_freevars", "co_cellvars", "co_filename", "co_name", "co_qualname", "co_linetable", "co_exceptiontable", NULL}; static _PyArg_Parser _parser = {NULL, _keywords, "replace", 0}; - PyObject *argsbuf[20]; + PyObject *argsbuf[18]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; int co_argcount = self->co_argcount; int co_posonlyargcount = self->co_posonlyargcount; @@ -213,8 +207,6 @@ code_replace(PyCodeObject *self, PyObject *const *args, Py_ssize_t nargs, PyObje PyObject *co_name = self->co_name; PyObject *co_qualname = self->co_qualname; PyBytesObject *co_linetable = (PyBytesObject *)self->co_linetable; - PyObject *co_endlinetable = self->co_endlinetable; - PyObject *co_columntable = self->co_columntable; PyBytesObject *co_exceptiontable = (PyBytesObject *)self->co_exceptiontable; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 0, 0, argsbuf); @@ -396,25 +388,13 @@ code_replace(PyCodeObject *self, PyObject *const *args, Py_ssize_t nargs, PyObje goto skip_optional_kwonly; } } - if (args[17]) { - co_endlinetable = args[17]; - if (!--noptargs) { - goto skip_optional_kwonly; - } - } - if (args[18]) { - co_columntable = args[18]; - if (!--noptargs) { - goto skip_optional_kwonly; - } - } - if (!PyBytes_Check(args[19])) { - _PyArg_BadArgument("replace", "argument 'co_exceptiontable'", "bytes", args[19]); + if (!PyBytes_Check(args[17])) { + _PyArg_BadArgument("replace", "argument 'co_exceptiontable'", "bytes", args[17]); goto exit; } - co_exceptiontable = (PyBytesObject *)args[19]; + co_exceptiontable = (PyBytesObject *)args[17]; skip_optional_kwonly: - return_value = code_replace_impl(self, co_argcount, co_posonlyargcount, co_kwonlyargcount, co_nlocals, co_stacksize, co_flags, co_firstlineno, co_code, co_consts, co_names, co_varnames, co_freevars, co_cellvars, co_filename, co_name, co_qualname, co_linetable, co_endlinetable, co_columntable, co_exceptiontable); + return_value = code_replace_impl(self, co_argcount, co_posonlyargcount, co_kwonlyargcount, co_nlocals, co_stacksize, co_flags, co_firstlineno, co_code, co_consts, co_names, co_varnames, co_freevars, co_cellvars, co_filename, co_name, co_qualname, co_linetable, co_exceptiontable); exit: return return_value; @@ -456,4 +436,4 @@ code__varname_from_oparg(PyCodeObject *self, PyObject *const *args, Py_ssize_t n exit: return return_value; } -/*[clinic end generated code: output=b1b83a70ffc5b7cd input=a9049054013a1b77]*/ +/*[clinic end generated code: output=ebfeec29d2cff674 input=a9049054013a1b77]*/ diff --git a/Objects/codeobject.c b/Objects/codeobject.c index e872b39..9a57815 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -244,10 +244,6 @@ _PyCode_Validate(struct _PyCodeConstructor *con) con->qualname == NULL || !PyUnicode_Check(con->qualname) || con->filename == NULL || !PyUnicode_Check(con->filename) || con->linetable == NULL || !PyBytes_Check(con->linetable) || - con->endlinetable == NULL || - (con->endlinetable != Py_None && !PyBytes_Check(con->endlinetable)) || - con->columntable == NULL || - (con->columntable != Py_None && !PyBytes_Check(con->columntable)) || con->exceptiontable == NULL || !PyBytes_Check(con->exceptiontable) ) { PyErr_BadInternalCall(); @@ -307,10 +303,6 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con) co->co_firstlineno = con->firstlineno; Py_INCREF(con->linetable); co->co_linetable = con->linetable; - Py_INCREF(con->endlinetable); - co->co_endlinetable = con->endlinetable; - Py_INCREF(con->columntable); - co->co_columntable = con->columntable; Py_INCREF(con->consts); co->co_consts = con->consts; @@ -347,6 +339,97 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con) PyBytes_GET_SIZE(con->code)); } +static int +scan_varint(const uint8_t *ptr) +{ + int read = *ptr++; + int val = read & 63; + int shift = 0; + while (read & 64) { + read = *ptr++; + shift += 6; + val |= (read & 63) << shift; + } + return val; +} + +static int +scan_signed_varint(const uint8_t *ptr) +{ + int uval = scan_varint(ptr); + if (uval & 1) { + return -(int)(uval >> 1); + } + else { + return uval >> 1; + } +} + +static int +get_line_delta(const uint8_t *ptr) +{ + int code = ((*ptr) >> 3) & 15; + switch (code) { + case PY_CODE_LOCATION_INFO_NONE: + return 0; + case PY_CODE_LOCATION_INFO_NO_COLUMNS: + case PY_CODE_LOCATION_INFO_LONG: + return scan_signed_varint(ptr+1); + case PY_CODE_LOCATION_INFO_ONE_LINE0: + return 0; + case PY_CODE_LOCATION_INFO_ONE_LINE1: + return 1; + case PY_CODE_LOCATION_INFO_ONE_LINE2: + return 2; + default: + /* Same line */ + return 0; + } +} + +static PyObject * +remove_column_info(PyObject *locations) +{ + int offset = 0; + const uint8_t *data = (const uint8_t *)PyBytes_AS_STRING(locations); + PyObject *res = PyBytes_FromStringAndSize(NULL, 32); + if (res == NULL) { + PyErr_NoMemory(); + return NULL; + } + uint8_t *output = (uint8_t *)PyBytes_AS_STRING(res); + while (offset < PyBytes_GET_SIZE(locations)) { + Py_ssize_t write_offset = output - (uint8_t *)PyBytes_AS_STRING(res); + if (write_offset + 16 >= PyBytes_GET_SIZE(res)) { + if (_PyBytes_Resize(&res, PyBytes_GET_SIZE(res) * 2) < 0) { + return NULL; + } + output = (uint8_t *)PyBytes_AS_STRING(res) + write_offset; + } + int code = (data[offset] >> 3) & 15; + if (code == PY_CODE_LOCATION_INFO_NONE) { + *output++ = data[offset]; + } + else { + int blength = (data[offset] & 7)+1; + output += write_location_entry_start( + output, PY_CODE_LOCATION_INFO_NO_COLUMNS, blength); + int ldelta = get_line_delta(&data[offset]); + output += write_signed_varint(output, ldelta); + } + offset++; + while (offset < PyBytes_GET_SIZE(locations) && + (data[offset] & 128) == 0) { + offset++; + } + } + Py_ssize_t write_offset = output - (uint8_t *)PyBytes_AS_STRING(res); + if (_PyBytes_Resize(&res, write_offset)) { + return NULL; + } + return res; +} + /* The caller is responsible for ensuring that the given data is valid. */ PyCodeObject * @@ -373,21 +456,26 @@ _PyCode_New(struct _PyCodeConstructor *con) return NULL; } - // Discard the endlinetable and columntable if we are opted out of debug + PyObject *replacement_locations = NULL; + // Compact the linetable if we are opted out of debug // ranges. if (!_Py_GetConfig()->code_debug_ranges) { - con->endlinetable = Py_None; - con->columntable = Py_None; + replacement_locations = remove_column_info(con->linetable); + if (replacement_locations == NULL) { + return NULL; + } + con->linetable = replacement_locations; } Py_ssize_t size = PyBytes_GET_SIZE(con->code) / sizeof(_Py_CODEUNIT); PyCodeObject *co = PyObject_NewVar(PyCodeObject, &PyCode_Type, size); if (co == NULL) { + Py_XDECREF(replacement_locations); PyErr_NoMemory(); return NULL; } init_code(co, con); - + Py_XDECREF(replacement_locations); return co; } @@ -403,8 +491,8 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount, PyObject *varnames, PyObject *freevars, PyObject *cellvars, PyObject *filename, PyObject *name, PyObject *qualname, int firstlineno, - PyObject *linetable, PyObject *endlinetable, - PyObject *columntable, PyObject *exceptiontable) + PyObject *linetable, + PyObject *exceptiontable) { PyCodeObject *co = NULL; PyObject *localsplusnames = NULL; @@ -482,8 +570,6 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount, .code = code, .firstlineno = firstlineno, .linetable = linetable, - .endlinetable = endlinetable, - .columntable = columntable, .consts = consts, .names = names, @@ -528,14 +614,16 @@ PyCode_New(int argcount, int kwonlyargcount, PyObject *code, PyObject *consts, PyObject *names, PyObject *varnames, PyObject *freevars, PyObject *cellvars, PyObject *filename, PyObject *name, PyObject *qualname, - int firstlineno, PyObject *linetable, PyObject *endlinetable, - PyObject *columntable, PyObject *exceptiontable) + int firstlineno, + PyObject *linetable, + PyObject *exceptiontable) { return PyCode_NewWithPosOnlyArgs(argcount, 0, kwonlyargcount, nlocals, stacksize, flags, code, consts, names, varnames, freevars, cellvars, filename, - name, qualname, firstlineno, linetable, - endlinetable, columntable, exceptiontable); + name, qualname, firstlineno, + linetable, + exceptiontable); } PyCodeObject * @@ -567,8 +655,6 @@ PyCode_NewEmpty(const char *filename, const char *funcname, int firstlineno) .code = emptystring, .firstlineno = firstlineno, .linetable = emptystring, - .endlinetable = emptystring, - .columntable = emptystring, .consts = nulltuple, .names = nulltuple, .localsplusnames = nulltuple, @@ -605,68 +691,10 @@ PyCode_Addr2Line(PyCodeObject *co, int addrq) return _PyCode_CheckLineNumber(addrq, &bounds); } -int -PyCode_Addr2Location(PyCodeObject *co, int addrq, - int *start_line, int *start_column, - int *end_line, int *end_column) -{ - *start_line = PyCode_Addr2Line(co, addrq); - *start_column = _PyCode_Addr2Offset(co, addrq); - *end_line = _PyCode_Addr2EndLine(co, addrq); - *end_column = _PyCode_Addr2EndOffset(co, addrq); - return 1; -} - -int -_PyCode_Addr2EndLine(PyCodeObject* co, int addrq) -{ - if (addrq < 0) { - return co->co_firstlineno; - } - else if (co->co_endlinetable == Py_None) { - return -1; - } - - assert(addrq >= 0 && addrq < _PyCode_NBYTES(co)); - PyCodeAddressRange bounds; - _PyCode_InitEndAddressRange(co, &bounds); - return _PyCode_CheckLineNumber(addrq, &bounds); -} - -int -_PyCode_Addr2Offset(PyCodeObject* co, int addrq) -{ - if (co->co_columntable == Py_None || addrq < 0) { - return -1; - } - addrq /= sizeof(_Py_CODEUNIT); - if (addrq*2 >= PyBytes_GET_SIZE(co->co_columntable)) { - return -1; - } - - unsigned char* bytes = (unsigned char*)PyBytes_AS_STRING(co->co_columntable); - return bytes[addrq*2] - 1; -} - -int -_PyCode_Addr2EndOffset(PyCodeObject* co, int addrq) -{ - if (co->co_columntable == Py_None || addrq < 0) { - return -1; - } - addrq /= sizeof(_Py_CODEUNIT); - if (addrq*2+1 >= PyBytes_GET_SIZE(co->co_columntable)) { - return -1; - } - - unsigned char* bytes = (unsigned char*)PyBytes_AS_STRING(co->co_columntable); - return bytes[addrq*2+1] - 1; -} - void _PyLineTable_InitAddressRange(const char *linetable, Py_ssize_t length, int firstlineno, PyCodeAddressRange *range) { - range->opaque.lo_next = linetable; + range->opaque.lo_next = (const uint8_t *)linetable; range->opaque.limit = range->opaque.lo_next + length; range->ar_start = -1; range->ar_end = 0; @@ -677,21 +705,13 @@ _PyLineTable_InitAddressRange(const char *linetable, Py_ssize_t length, int firs int _PyCode_InitAddressRange(PyCodeObject* co, PyCodeAddressRange *bounds) { + assert(co->co_linetable != NULL); const char *linetable = PyBytes_AS_STRING(co->co_linetable); Py_ssize_t length = PyBytes_GET_SIZE(co->co_linetable); _PyLineTable_InitAddressRange(linetable, length, co->co_firstlineno, bounds); return bounds->ar_line; } -int -_PyCode_InitEndAddressRange(PyCodeObject* co, PyCodeAddressRange* bounds) -{ - char* linetable = PyBytes_AS_STRING(co->co_endlinetable); - Py_ssize_t length = PyBytes_GET_SIZE(co->co_endlinetable); - _PyLineTable_InitAddressRange(linetable, length, co->co_firstlineno, bounds); - return bounds->ar_line; -} - /* Update *bounds to describe the first and one-past-the-last instructions in the same line as lasti. Return the number of that line, or -1 if lasti is out of bounds. */ int @@ -710,43 +730,182 @@ _PyCode_CheckLineNumber(int lasti, PyCodeAddressRange *bounds) return bounds->ar_line; } +static int +is_no_line_marker(uint8_t b) +{ + return (b >> 3) == 0x1f; +} + + +#define ASSERT_VALID_BOUNDS(bounds) \ + assert(bounds->opaque.lo_next <= bounds->opaque.limit && \ + (bounds->ar_line == -1 || bounds->ar_line == bounds->opaque.computed_line) && \ + (bounds->opaque.lo_next == bounds->opaque.limit || \ + (*bounds->opaque.lo_next) & 128)) + +static int +next_code_delta(PyCodeAddressRange *bounds) +{ + assert((*bounds->opaque.lo_next) & 128); + return (((*bounds->opaque.lo_next) & 7) + 1) * sizeof(_Py_CODEUNIT); +} + +static int +previous_code_delta(PyCodeAddressRange *bounds) +{ + const uint8_t *ptr = bounds->opaque.lo_next-1; + while (((*ptr) & 128) == 0) { + ptr--; + } + return (((*ptr) & 7) + 1) * sizeof(_Py_CODEUNIT); +} + +static int +read_byte(PyCodeAddressRange *bounds) +{ + return *bounds->opaque.lo_next++; +} + +static int +read_varint(PyCodeAddressRange *bounds) +{ + int read = read_byte(bounds); + int val = read & 63; + int shift = 0; + while (read & 64) { + read = read_byte(bounds); + shift += 6; + val |= (read & 63) << shift; + } + return val; +} + +static int +read_signed_varint(PyCodeAddressRange *bounds) +{ + int uval = read_varint(bounds); + if (uval & 1) { + return -(int)(uval >> 1); + } + else { + return uval >> 1; + } +} + static void retreat(PyCodeAddressRange *bounds) { - int ldelta = ((signed char *)bounds->opaque.lo_next)[-1]; - if (ldelta == -128) { - ldelta = 0; - } - bounds->opaque.computed_line -= ldelta; - bounds->opaque.lo_next -= 2; + ASSERT_VALID_BOUNDS(bounds); + assert(bounds->ar_start > 0); + do { + bounds->opaque.lo_next--; + } while (((*bounds->opaque.lo_next) & 128) == 0); + bounds->opaque.computed_line -= get_line_delta(bounds->opaque.lo_next); bounds->ar_end = bounds->ar_start; - bounds->ar_start -= ((unsigned char *)bounds->opaque.lo_next)[-2]; - ldelta = ((signed char *)bounds->opaque.lo_next)[-1]; - if (ldelta == -128) { + bounds->ar_start -= previous_code_delta(bounds); + if (is_no_line_marker(bounds->opaque.lo_next[-1])) { bounds->ar_line = -1; } else { bounds->ar_line = bounds->opaque.computed_line; } + ASSERT_VALID_BOUNDS(bounds); } static void advance(PyCodeAddressRange *bounds) { - bounds->ar_start = bounds->ar_end; - int delta = ((unsigned char *)bounds->opaque.lo_next)[0]; - bounds->ar_end += delta; - int ldelta = ((signed char *)bounds->opaque.lo_next)[1]; - bounds->opaque.lo_next += 2; - if (ldelta == -128) { + ASSERT_VALID_BOUNDS(bounds); + bounds->opaque.computed_line += get_line_delta(bounds->opaque.lo_next); + if (is_no_line_marker(*bounds->opaque.lo_next)) { bounds->ar_line = -1; } else { - bounds->opaque.computed_line += ldelta; bounds->ar_line = bounds->opaque.computed_line; } + bounds->ar_start = bounds->ar_end; + bounds->ar_end += next_code_delta(bounds); + do { + bounds->opaque.lo_next++; + } while (bounds->opaque.lo_next < bounds->opaque.limit && + ((*bounds->opaque.lo_next) & 128) == 0); + ASSERT_VALID_BOUNDS(bounds); } +static void +advance_with_locations(PyCodeAddressRange *bounds, int *endline, int *column, int *endcolumn) +{ + ASSERT_VALID_BOUNDS(bounds); + int first_byte = read_byte(bounds); + int code = (first_byte >> 3) & 15; + bounds->ar_start = bounds->ar_end; + bounds->ar_end = bounds->ar_start + ((first_byte & 7) + 1) * sizeof(_Py_CODEUNIT); + switch(code) { + case PY_CODE_LOCATION_INFO_NONE: + bounds->ar_line = *endline = -1; + *column = *endcolumn = -1; + break; + case PY_CODE_LOCATION_INFO_LONG: + { + bounds->opaque.computed_line += read_signed_varint(bounds); + bounds->ar_line = bounds->opaque.computed_line; + *endline = bounds->ar_line + read_varint(bounds); + *column = read_varint(bounds)-1; + *endcolumn = read_varint(bounds)-1; + break; + } + case PY_CODE_LOCATION_INFO_NO_COLUMNS: + { + /* No column */ + bounds->opaque.computed_line += read_signed_varint(bounds); + *endline = bounds->ar_line = bounds->opaque.computed_line; + *column = *endcolumn = -1; + break; + } + case PY_CODE_LOCATION_INFO_ONE_LINE0: + case PY_CODE_LOCATION_INFO_ONE_LINE1: + case PY_CODE_LOCATION_INFO_ONE_LINE2: + { + /* one line form */ + int line_delta = code - 10; + bounds->opaque.computed_line += line_delta; + *endline = bounds->ar_line = bounds->opaque.computed_line; + *column = read_byte(bounds); + *endcolumn = read_byte(bounds); + break; + } + default: + { + /* Short forms */ + int second_byte = read_byte(bounds); + assert((second_byte & 128) == 0); + *endline = bounds->ar_line = bounds->opaque.computed_line; + *column = code << 3 | (second_byte >> 4); + *endcolumn = *column + (second_byte & 15); + } + } + ASSERT_VALID_BOUNDS(bounds); +} +int +PyCode_Addr2Location(PyCodeObject *co, int addrq, + int *start_line, int *start_column, + int *end_line, int *end_column) +{ + if (addrq < 0) { + *start_line = *end_line = co->co_firstlineno; + *start_column = *end_column = 0; + } + assert(addrq >= 0 && addrq < _PyCode_NBYTES(co)); + PyCodeAddressRange bounds; + _PyCode_InitAddressRange(co, &bounds); + _PyCode_CheckLineNumber(addrq, &bounds); + retreat(&bounds); + advance_with_locations(&bounds, end_line, start_column, end_column); + *start_line = bounds.ar_line; + return 1; +} + + static inline int at_end(PyCodeAddressRange *bounds) { return bounds->opaque.lo_next >= bounds->opaque.limit; @@ -759,10 +918,7 @@ _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range) return 0; } retreat(range); - while (range->ar_start == range->ar_end) { - assert(range->ar_start > 0); - retreat(range); - } + assert(range->ar_end > range->ar_start); return 1; } @@ -773,13 +929,37 @@ _PyLineTable_NextAddressRange(PyCodeAddressRange *range) return 0; } advance(range); - while (range->ar_start == range->ar_end) { - assert(!at_end(range)); - advance(range); - } + assert(range->ar_end > range->ar_start); return 1; } +int +_PyLineTable_StartsLine(PyCodeAddressRange *range) +{ + if (range->ar_start <= 0) { + return 0; + } + const uint8_t *ptr = range->opaque.lo_next; + do { + ptr--; + } while (((*ptr) & 128) == 0); + int code = ((*ptr)>> 3) & 15; + switch(code) { + case PY_CODE_LOCATION_INFO_LONG: + return 0; + case PY_CODE_LOCATION_INFO_NO_COLUMNS: + case PY_CODE_LOCATION_INFO_NONE: + return ptr[1] != 0; + case PY_CODE_LOCATION_INFO_ONE_LINE0: + return 0; + case PY_CODE_LOCATION_INFO_ONE_LINE1: + case PY_CODE_LOCATION_INFO_ONE_LINE2: + return 1; + default: + return 0; + } +} + static int emit_pair(PyObject **bytes, int *offset, int a, int b) { @@ -856,7 +1036,6 @@ typedef struct { PyObject_HEAD PyCodeObject *li_code; PyCodeAddressRange li_line; - char *li_end; } lineiterator; @@ -962,7 +1141,11 @@ new_linesiterator(PyCodeObject *code) typedef struct { PyObject_HEAD PyCodeObject* pi_code; + PyCodeAddressRange pi_range; int pi_offset; + int pi_endline; + int pi_column; + int pi_endcolumn; } positionsiterator; static void @@ -983,22 +1166,19 @@ _source_offset_converter(int* value) { static PyObject* positionsiter_next(positionsiterator* pi) { - if (pi->pi_offset >= _PyCode_NBYTES(pi->pi_code)) { - return NULL; - } - - int start_line, start_col, end_line, end_col; - if (!PyCode_Addr2Location(pi->pi_code, pi->pi_offset, &start_line, - &start_col, &end_line, &end_col)) { - return NULL; + if (pi->pi_offset >= pi->pi_range.ar_end) { + assert(pi->pi_offset == pi->pi_range.ar_end); + if (at_end(&pi->pi_range)) { + return NULL; + } + advance_with_locations(&pi->pi_range, &pi->pi_endline, &pi->pi_column, &pi->pi_endcolumn); } - pi->pi_offset += 2; return Py_BuildValue("(O&O&O&O&)", - _source_offset_converter, &start_line, - _source_offset_converter, &end_line, - _source_offset_converter, &start_col, - _source_offset_converter, &end_col); + _source_offset_converter, &pi->pi_range.ar_line, + _source_offset_converter, &pi->pi_endline, + _source_offset_converter, &pi->pi_column, + _source_offset_converter, &pi->pi_endcolumn); } static PyTypeObject PositionsIterator = { @@ -1053,7 +1233,8 @@ code_positionsiterator(PyCodeObject* code, PyObject* Py_UNUSED(args)) } Py_INCREF(code); pi->pi_code = code; - pi->pi_offset = 0; + _PyCode_InitAddressRange(code, &pi->pi_range); + pi->pi_offset = pi->pi_range.ar_end; return (PyObject*)pi; } @@ -1203,8 +1384,6 @@ code.__new__ as code_new qualname: unicode firstlineno: int linetable: object(subclass_of="&PyBytes_Type") - endlinetable: object - columntable: object exceptiontable: object(subclass_of="&PyBytes_Type") freevars: object(subclass_of="&PyTuple_Type", c_default="NULL") = () cellvars: object(subclass_of="&PyTuple_Type", c_default="NULL") = () @@ -1219,10 +1398,9 @@ code_new_impl(PyTypeObject *type, int argcount, int posonlyargcount, PyObject *code, PyObject *consts, PyObject *names, PyObject *varnames, PyObject *filename, PyObject *name, PyObject *qualname, int firstlineno, PyObject *linetable, - PyObject *endlinetable, PyObject *columntable, PyObject *exceptiontable, PyObject *freevars, PyObject *cellvars) -/*[clinic end generated code: output=e1d2086aa8da7c08 input=a06cd92369134063]*/ +/*[clinic end generated code: output=069fa20d299f9dda input=e31da3c41ad8064a]*/ { PyObject *co = NULL; PyObject *ournames = NULL; @@ -1263,17 +1441,6 @@ code_new_impl(PyTypeObject *type, int argcount, int posonlyargcount, goto cleanup; } - if (!Py_IsNone(endlinetable) && !PyBytes_Check(endlinetable)) { - PyErr_SetString(PyExc_ValueError, - "code: endlinetable must be None or bytes"); - goto cleanup; - } - if (!Py_IsNone(columntable) && !PyBytes_Check(columntable)) { - PyErr_SetString(PyExc_ValueError, - "code: columntable must be None or bytes"); - goto cleanup; - } - ournames = validate_and_copy_tuple(names); if (ournames == NULL) goto cleanup; @@ -1300,8 +1467,8 @@ code_new_impl(PyTypeObject *type, int argcount, int posonlyargcount, ourvarnames, ourfreevars, ourcellvars, filename, name, qualname, firstlineno, - linetable, endlinetable, - columntable, exceptiontable + linetable, + exceptiontable ); cleanup: Py_XDECREF(ournames); @@ -1337,8 +1504,6 @@ code_dealloc(PyCodeObject *co) Py_XDECREF(co->co_name); Py_XDECREF(co->co_qualname); Py_XDECREF(co->co_linetable); - Py_XDECREF(co->co_endlinetable); - Py_XDECREF(co->co_columntable); Py_XDECREF(co->co_exceptiontable); if (co->co_weakreflist != NULL) { PyObject_ClearWeakRefs((PyObject*)co); @@ -1488,8 +1653,6 @@ static PyMemberDef code_memberlist[] = { {"co_qualname", T_OBJECT, OFF(co_qualname), READONLY}, {"co_firstlineno", T_INT, OFF(co_firstlineno), READONLY}, {"co_linetable", T_OBJECT, OFF(co_linetable), READONLY}, - {"co_endlinetable", T_OBJECT, OFF(co_endlinetable), READONLY}, - {"co_columntable", T_OBJECT, OFF(co_columntable), READONLY}, {"co_exceptiontable", T_OBJECT, OFF(co_exceptiontable), READONLY}, {NULL} /* Sentinel */ }; @@ -1585,8 +1748,6 @@ code.replace co_name: unicode(c_default="self->co_name") = None co_qualname: unicode(c_default="self->co_qualname") = None co_linetable: PyBytesObject(c_default="(PyBytesObject *)self->co_linetable") = None - co_endlinetable: object(c_default="self->co_endlinetable") = None - co_columntable: object(c_default="self->co_columntable") = None co_exceptiontable: PyBytesObject(c_default="(PyBytesObject *)self->co_exceptiontable") = None Return a copy of the code object with new values for the specified fields. @@ -1601,9 +1762,9 @@ code_replace_impl(PyCodeObject *self, int co_argcount, PyObject *co_varnames, PyObject *co_freevars, PyObject *co_cellvars, PyObject *co_filename, PyObject *co_name, PyObject *co_qualname, - PyBytesObject *co_linetable, PyObject *co_endlinetable, - PyObject *co_columntable, PyBytesObject *co_exceptiontable) -/*[clinic end generated code: output=f046bf0be3bab91f input=78dbe204dbd06c2f]*/ + PyBytesObject *co_linetable, + PyBytesObject *co_exceptiontable) +/*[clinic end generated code: output=b6cd9988391d5711 input=f6f68e03571f8d7c]*/ { #define CHECK_INT_ARG(ARG) \ if (ARG < 0) { \ @@ -1664,24 +1825,12 @@ code_replace_impl(PyCodeObject *self, int co_argcount, co_freevars = freevars; } - if (!Py_IsNone(co_endlinetable) && !PyBytes_Check(co_endlinetable)) { - PyErr_SetString(PyExc_ValueError, - "co_endlinetable must be None or bytes"); - goto error; - } - if (!Py_IsNone(co_columntable) && !PyBytes_Check(co_columntable)) { - PyErr_SetString(PyExc_ValueError, - "co_columntable must be None or bytes"); - goto error; - } - co = PyCode_NewWithPosOnlyArgs( co_argcount, co_posonlyargcount, co_kwonlyargcount, co_nlocals, co_stacksize, co_flags, (PyObject*)co_code, co_consts, co_names, co_varnames, co_freevars, co_cellvars, co_filename, co_name, - co_qualname, co_firstlineno, (PyObject*)co_linetable, - (PyObject*)co_endlinetable, (PyObject*)co_columntable, - (PyObject*)co_exceptiontable); + co_qualname, co_firstlineno, + (PyObject*)co_linetable, (PyObject*)co_exceptiontable); error: Py_XDECREF(code); diff --git a/Objects/frameobject.c b/Objects/frameobject.c index e65395e..7278ca1 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -378,6 +378,7 @@ marklines(PyCodeObject *code, int len) PyCodeAddressRange bounds; _PyCode_InitAddressRange(code, &bounds); assert (bounds.ar_end == 0); + int last_line = -1; int *linestarts = PyMem_New(int, len); if (linestarts == NULL) { @@ -389,7 +390,10 @@ marklines(PyCodeObject *code, int len) while (_PyLineTable_NextAddressRange(&bounds)) { assert(bounds.ar_start / (int)sizeof(_Py_CODEUNIT) < len); - linestarts[bounds.ar_start / sizeof(_Py_CODEUNIT)] = bounds.ar_line; + if (bounds.ar_line != last_line && bounds.ar_line != -1) { + linestarts[bounds.ar_start / sizeof(_Py_CODEUNIT)] = bounds.ar_line; + last_line = bounds.ar_line; + } } return linestarts; } diff --git a/Objects/locations.md b/Objects/locations.md new file mode 100644 index 0000000..18a338a --- /dev/null +++ b/Objects/locations.md @@ -0,0 +1,69 @@ +# Locations table + +For versions up to 3.10 see ./lnotab_notes.txt + +In version 3.11 the `co_linetable` bytes object of code objects contains a compact representation of the positions returned by the `co_positions()` iterator. + +The `co_linetable` consists of a sequence of location entries. +Each entry starts with a byte with the most significant bit set, followed by zero or more bytes with most significant bit unset. + +Each entry contains the following information: +* The number of code units covered by this entry (length) +* The start line +* The end line +* The start column +* The end column + +The first byte has the following format: + +Bit 7 | Bits 3-6 | Bits 0-2 + ---- | ---- | ---- + 1 | Code | Length (in code units) - 1 + +The codes are enumerated in the `_PyCodeLocationInfoKind` enum. + +## Variable length integer encodings + +Integers are often encoded using a variable length integer encoding + +### Unsigned integers (varint) + +Unsigned integers are encoded in 6 bit chunks, least significant first. +Each chunk but the last has bit 6 set. +For example: + +* 63 is encoded as `0x3f` +* 200 is encoded as `0x48`, `0x03` + +### Signed integers (svarint) + +Signed integers are encoded by converting them to unsigned integers, using the following function: +```Python +def convert(s): + if s < 0: + return ((-s)<<1) | 1 + else: + return (s<<1) +``` + +## Location entries + +The meaning of the codes and the following bytes are as follows: + +Code | Meaning | Start line | End line | Start column | End column + ---- | ---- | ---- | ---- | ---- | ---- + 0-9 | Short form | Δ 0 | Δ 0 | See below | See below + 10-12 | One line form | Δ (code - 10) | Δ 0 | unsigned byte | unsigned byte + 13 | No column info | Δ svarint | Δ 0 | None | None + 14 | Long form | Δ svarint | Δ varint | varint | varint + 15 | No location | None | None | None | None + +The Δ means the value is encoded as a delta from another value: +* Start line: Delta from the previous start line, or `co_firstlineno` for the first entry. +* End line: Delta from the start line + +### The short forms + +Codes 0-9 are the short forms. The short form consists of two bytes, the second byte holding additional column information. The code is the start column divided by 8 (and rounded down). +* Start column: `(code*8) + ((second_byte>>4)&7)` +* End column: `start_column + (second_byte&15)` |