From cd8295ff758891f21084a6a5ad3403d35dda38f7 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 11 Apr 2020 10:48:40 +0300 Subject: bpo-39943: Add the const qualifier to pointers on non-mutable PyUnicode data. (GH-19345) --- Modules/_csv.c | 8 +- Modules/_decimal/_decimal.c | 4 +- Modules/_elementtree.c | 2 +- Modules/_io/textio.c | 10 +- Modules/_json.c | 14 +- Modules/_operator.c | 2 +- Modules/_pickle.c | 2 +- Modules/_sqlite/connection.c | 2 +- Modules/_sre.c | 12 +- Modules/cjkcodecs/cjkcodecs.h | 2 +- Modules/cjkcodecs/multibytecodec.c | 4 +- Modules/cjkcodecs/multibytecodec.h | 2 +- Modules/pyexpat.c | 2 +- Modules/sre.h | 14 +- Modules/sre_lib.h | 28 ++-- Modules/unicodedata.c | 6 +- Objects/bytesobject.c | 10 +- Objects/exceptions.c | 4 +- Objects/stringlib/codecs.h | 2 +- Objects/typeobject.c | 2 +- Objects/unicodeobject.c | 295 ++++++++++++++++++++----------------- Python/_warnings.c | 4 +- Python/ast.c | 2 +- Python/codecs.c | 26 ++-- Python/formatter_unicode.c | 6 +- Python/getargs.c | 2 +- Python/traceback.c | 4 +- 27 files changed, 250 insertions(+), 221 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 9c49715..950b0d7 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -789,7 +789,7 @@ Reader_iternext(ReaderObj *self) Py_UCS4 c; Py_ssize_t pos, linelen; unsigned int kind; - void *data; + const void *data; PyObject *lineobj; if (parse_reset(self) < 0) @@ -996,7 +996,7 @@ join_reset(WriterObj *self) * record length. */ static Py_ssize_t -join_append_data(WriterObj *self, unsigned int field_kind, void *field_data, +join_append_data(WriterObj *self, unsigned int field_kind, const void *field_data, Py_ssize_t field_len, int *quoted, int copy_phase) { @@ -1107,7 +1107,7 @@ static int join_append(WriterObj *self, PyObject *field, int quoted) { unsigned int field_kind = -1; - void *field_data = NULL; + const void *field_data = NULL; Py_ssize_t field_len = 0; Py_ssize_t rec_len; @@ -1139,7 +1139,7 @@ join_append_lineterminator(WriterObj *self) { Py_ssize_t terminator_len, i; unsigned int term_kind; - void *term_data; + const void *term_data; terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator); if (terminator_len == -1) diff --git a/Modules/_decimal/_decimal.c b/Modules/_decimal/_decimal.c index b36e309..cdc942f 100644 --- a/Modules/_decimal/_decimal.c +++ b/Modules/_decimal/_decimal.c @@ -1878,7 +1878,7 @@ dec_dealloc(PyObject *dec) /******************************************************************************/ Py_LOCAL_INLINE(int) -is_space(enum PyUnicode_Kind kind, void *data, Py_ssize_t pos) +is_space(enum PyUnicode_Kind kind, const void *data, Py_ssize_t pos) { Py_UCS4 ch = PyUnicode_READ(kind, data, pos); return Py_UNICODE_ISSPACE(ch); @@ -1896,7 +1896,7 @@ static char * numeric_as_ascii(const PyObject *u, int strip_ws, int ignore_underscores) { enum PyUnicode_Kind kind; - void *data; + const void *data; Py_UCS4 ch; char *res, *cp; Py_ssize_t j, len; diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 49c372d..c0c741e 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -1132,7 +1132,7 @@ checkpath(PyObject* tag) if (PyUnicode_Check(tag)) { const Py_ssize_t len = PyUnicode_GET_LENGTH(tag); - void *data = PyUnicode_DATA(tag); + const void *data = PyUnicode_DATA(tag); unsigned int kind = PyUnicode_KIND(tag); if (len >= 3 && PyUnicode_READ(kind, data, 0) == '{' && ( PyUnicode_READ(kind, data, 1) == '}' || ( diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index dedbefe..12dba38 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -340,7 +340,7 @@ _PyIncrementalNewlineDecoder_decode(PyObject *myself, goto error; kind = PyUnicode_KIND(modified); out = PyUnicode_DATA(modified); - PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r'); + PyUnicode_WRITE(kind, out, 0, '\r'); memcpy(out + kind, PyUnicode_DATA(output), kind * output_len); Py_DECREF(output); output = modified; /* output remains ready */ @@ -367,7 +367,7 @@ _PyIncrementalNewlineDecoder_decode(PyObject *myself, /* Record which newlines are read and do newline translation if desired, all in one pass. */ { - void *in_str; + const void *in_str; Py_ssize_t len; int seennl = self->seennl; int only_lf = 0; @@ -447,7 +447,7 @@ _PyIncrementalNewlineDecoder_decode(PyObject *myself, else { void *translated; int kind = PyUnicode_KIND(output); - void *in_str = PyUnicode_DATA(output); + const void *in_str = PyUnicode_DATA(output); Py_ssize_t in, out; /* XXX: Previous in-place translation here is disabled as resizing is not possible anymore */ @@ -2085,7 +2085,7 @@ _PyIO_find_line_ending( else { /* Non-universal mode. */ Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl); - Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl); + const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl); /* Assume that readnl is an ASCII character. */ assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND); if (readnl_len == 1) { @@ -2139,7 +2139,7 @@ _textiowrapper_readline(textio *self, Py_ssize_t limit) chunked = 0; while (1) { - char *ptr; + const char *ptr; Py_ssize_t line_len; int kind; Py_ssize_t consumed = 0; diff --git a/Modules/_json.c b/Modules/_json.c index 3ab1cb3..1754416 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -159,8 +159,8 @@ ascii_escape_unicode(PyObject *pystr) Py_ssize_t output_size; Py_ssize_t chars; PyObject *rval; - void *input; - unsigned char *output; + const void *input; + Py_UCS1 *output; int kind; if (PyUnicode_READY(pystr) == -1) @@ -225,7 +225,7 @@ escape_unicode(PyObject *pystr) Py_ssize_t output_size; Py_ssize_t chars; PyObject *rval; - void *input; + const void *input; int kind; Py_UCS4 maxchar; @@ -678,7 +678,7 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss Returns a new PyObject (usually a dict, but object_hook can change that) */ - void *str; + const void *str; int kind; Py_ssize_t end_idx; PyObject *val = NULL; @@ -808,7 +808,7 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi Returns a new PyList */ - void *str; + const void *str; int kind; Py_ssize_t end_idx; PyObject *val = NULL; @@ -911,7 +911,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ PyLong, or PyFloat. May return other types if parse_int or parse_float are set */ - void *str; + const void *str; int kind; Py_ssize_t end_idx; Py_ssize_t idx = start; @@ -1028,7 +1028,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ Returns a new PyObject representation of the term. */ PyObject *res; - void *str; + const void *str; int kind; Py_ssize_t length; diff --git a/Modules/_operator.c b/Modules/_operator.c index 007c21b..19026b6 100644 --- a/Modules/_operator.c +++ b/Modules/_operator.c @@ -1170,7 +1170,7 @@ attrgetter_new(PyTypeObject *type, PyObject *args, PyObject *kwds) for (idx = 0; idx < nattrs; ++idx) { PyObject *item = PyTuple_GET_ITEM(args, idx); Py_ssize_t item_len; - void *data; + const void *data; unsigned int kind; int dot_count; diff --git a/Modules/_pickle.c b/Modules/_pickle.c index c3385ad..4b46c1f 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -2581,7 +2581,7 @@ raw_unicode_escape(PyObject *obj) { char *p; Py_ssize_t i, size; - void *data; + const void *data; unsigned int kind; _PyBytesWriter writer; diff --git a/Modules/_sqlite/connection.c b/Modules/_sqlite/connection.c index 697295d..92bdfe3 100644 --- a/Modules/_sqlite/connection.c +++ b/Modules/_sqlite/connection.c @@ -1644,7 +1644,7 @@ pysqlite_connection_create_collation(pysqlite_Connection* self, PyObject* args) const char *uppercase_name_str; int rc; unsigned int kind; - void *data; + const void *data; if (!pysqlite_check_thread(self) || !pysqlite_check_connection(self)) { goto finally; diff --git a/Modules/_sre.c b/Modules/_sre.c index bee2e12..836d796 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -351,7 +351,7 @@ state_reset(SRE_STATE* state) data_stack_dealloc(state); } -static void* +static const void* getstring(PyObject* string, Py_ssize_t* p_length, int* p_isbytes, int* p_charsize, Py_buffer *view) @@ -398,11 +398,11 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, Py_ssize_t length; int isbytes, charsize; - void* ptr; + const void* ptr; memset(state, 0, sizeof(SRE_STATE)); - state->mark = PyMem_New(void *, pattern->groups * 2); + state->mark = PyMem_New(const void *, pattern->groups * 2); if (!state->mark) { PyErr_NoMemory(); goto err; @@ -891,7 +891,7 @@ _sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string, Py_ssize_t status; Py_ssize_t n; Py_ssize_t i; - void* last; + const void* last; assert(self->codesize != 0); @@ -984,7 +984,7 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string, PyObject* item; PyObject* filter; PyObject* match; - void* ptr; + const void* ptr; Py_ssize_t status; Py_ssize_t n; Py_ssize_t i, b, e; @@ -1895,7 +1895,7 @@ match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def) int isbytes, charsize; Py_buffer view; PyObject *result; - void* ptr; + const void* ptr; Py_ssize_t i, j; assert(0 <= index && index < self->groups); diff --git a/Modules/cjkcodecs/cjkcodecs.h b/Modules/cjkcodecs/cjkcodecs.h index 8f6f880..e41755b 100644 --- a/Modules/cjkcodecs/cjkcodecs.h +++ b/Modules/cjkcodecs/cjkcodecs.h @@ -72,7 +72,7 @@ static const struct dbcs_map *mapping_list; #define ENCODER(encoding) \ static Py_ssize_t encoding##_encode( \ MultibyteCodec_State *state, const void *config, \ - int kind, void *data, \ + int kind, const void *data, \ Py_ssize_t *inpos, Py_ssize_t inlen, \ unsigned char **outbuf, Py_ssize_t outleft, int flags) #define ENCODER_RESET(encoding) \ diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c index a09c75d..9f9fbeb 100644 --- a/Modules/cjkcodecs/multibytecodec.c +++ b/Modules/cjkcodecs/multibytecodec.c @@ -228,7 +228,7 @@ multibytecodec_encerror(MultibyteCodec *codec, Py_ssize_t r; Py_ssize_t inpos; int kind; - void *data; + const void *data; replchar = PyUnicode_FromOrdinal('?'); if (replchar == NULL) @@ -457,7 +457,7 @@ multibytecodec_encode(MultibyteCodec *codec, Py_ssize_t finalsize, r = 0; Py_ssize_t datalen; int kind; - void *data; + const void *data; if (PyUnicode_READY(text) < 0) return NULL; diff --git a/Modules/cjkcodecs/multibytecodec.h b/Modules/cjkcodecs/multibytecodec.h index 4d2b355..5946821 100644 --- a/Modules/cjkcodecs/multibytecodec.h +++ b/Modules/cjkcodecs/multibytecodec.h @@ -30,7 +30,7 @@ typedef struct { typedef int (*mbcodec_init)(const void *config); typedef Py_ssize_t (*mbencode_func)(MultibyteCodec_State *state, const void *config, - int kind, void *data, + int kind, const void *data, Py_ssize_t *inpos, Py_ssize_t inlen, unsigned char **outbuf, Py_ssize_t outleft, int flags); diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index a7f8b50..d930e3e 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1060,7 +1060,7 @@ PyUnknownEncodingHandler(void *encodingHandlerData, static unsigned char template_buffer[256] = {0}; PyObject* u; int i; - void *data; + const void *data; unsigned int kind; if (PyErr_Occurred()) diff --git a/Modules/sre.h b/Modules/sre.h index a728488..9b0d8b1 100644 --- a/Modules/sre.h +++ b/Modules/sre.h @@ -54,17 +54,17 @@ typedef struct { typedef struct SRE_REPEAT_T { Py_ssize_t count; - SRE_CODE* pattern; /* points to REPEAT operator arguments */ - void* last_ptr; /* helper to check for infinite loops */ + const SRE_CODE* pattern; /* points to REPEAT operator arguments */ + const void* last_ptr; /* helper to check for infinite loops */ struct SRE_REPEAT_T *prev; /* points to previous repeat context */ } SRE_REPEAT; typedef struct { /* string pointers */ - void* ptr; /* current position (also end of current slice) */ - void* beginning; /* start of original string */ - void* start; /* start of current slice */ - void* end; /* end of original string */ + const void* ptr; /* current position (also end of current slice) */ + const void* beginning; /* start of original string */ + const void* start; /* start of current slice */ + const void* end; /* end of original string */ /* attributes for the match object */ PyObject* string; Py_buffer buffer; @@ -74,7 +74,7 @@ typedef struct { /* registers */ Py_ssize_t lastindex; Py_ssize_t lastmark; - void** mark; + const void** mark; int match_all; int must_advance; /* dynamically allocated stuff */ diff --git a/Modules/sre_lib.h b/Modules/sre_lib.h index 437ab43..9cc7863 100644 --- a/Modules/sre_lib.h +++ b/Modules/sre_lib.h @@ -13,7 +13,7 @@ /* This file is included three times, with different character settings */ LOCAL(int) -SRE(at)(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at) +SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at) { /* check if pointer is at given position */ @@ -101,7 +101,7 @@ SRE(at)(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at) } LOCAL(int) -SRE(charset)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch) +SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch) { /* check if character is a member of the given set */ @@ -188,7 +188,7 @@ SRE(charset)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch) } LOCAL(int) -SRE(charset_loc_ignore)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch) +SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch) { SRE_CODE lo, up; lo = sre_lower_locale(ch); @@ -199,15 +199,15 @@ SRE(charset_loc_ignore)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch) return up != lo && SRE(charset)(state, set, up); } -LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int toplevel); +LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel); LOCAL(Py_ssize_t) -SRE(count)(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount) +SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount) { SRE_CODE chr; SRE_CHAR c; - SRE_CHAR* ptr = (SRE_CHAR *)state->ptr; - SRE_CHAR* end = (SRE_CHAR *)state->end; + const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr; + const SRE_CHAR* end = (const SRE_CHAR *)state->end; Py_ssize_t i; /* adjust end */ @@ -335,14 +335,14 @@ SRE(count)(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount) #if 0 /* not used in this release */ LOCAL(int) -SRE(info)(SRE_STATE* state, SRE_CODE* pattern) +SRE(info)(SRE_STATE* state, const SRE_CODE* pattern) { /* check if an SRE_OP_INFO block matches at the current position. returns the number of SRE_CODE objects to skip if successful, 0 if no match */ - SRE_CHAR* end = (SRE_CHAR*) state->end; - SRE_CHAR* ptr = (SRE_CHAR*) state->ptr; + const SRE_CHAR* end = (const SRE_CHAR*) state->end; + const SRE_CHAR* ptr = (const SRE_CHAR*) state->ptr; Py_ssize_t i; /* check minimal length */ @@ -531,8 +531,8 @@ do { \ typedef struct { Py_ssize_t last_ctx_pos; Py_ssize_t jump; - SRE_CHAR* ptr; - SRE_CODE* pattern; + const SRE_CHAR* ptr; + const SRE_CODE* pattern; Py_ssize_t count; Py_ssize_t lastmark; Py_ssize_t lastindex; @@ -546,9 +546,9 @@ typedef struct { /* check if string matches the given pattern. returns <0 for error, 0 for failure, and 1 for success */ LOCAL(Py_ssize_t) -SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int toplevel) +SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) { - SRE_CHAR* end = (SRE_CHAR *)state->end; + const SRE_CHAR* end = (const SRE_CHAR *)state->end; Py_ssize_t alloc_pos, ctx_pos = -1; Py_ssize_t i, ret = 0; Py_ssize_t jump; diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c index 1a9e1c0..569e785 100644 --- a/Modules/unicodedata.c +++ b/Modules/unicodedata.c @@ -496,7 +496,7 @@ nfd_nfkd(PyObject *self, PyObject *input, int k) Py_UCS4 *output; Py_ssize_t i, o, osize; int kind; - void *data; + const void *data; /* Longest decomposition in Unicode 3.2: U+FDFA */ Py_UCS4 stack[20]; Py_ssize_t space, isize; @@ -643,7 +643,7 @@ nfc_nfkc(PyObject *self, PyObject *input, int k) { PyObject *result; int kind; - void *data; + const void *data; Py_UCS4 *output; Py_ssize_t i, i1, o, len; int f,l,index,index1,comb; @@ -804,7 +804,7 @@ is_normalized_quickcheck(PyObject *self, PyObject *input, Py_ssize_t i, len; int kind; - void *data; + const void *data; unsigned char prev_combining = 0; /* The two quickcheck bits at this shift have type QuickcheckResult. */ diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 03cd7dd..987d98d 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1265,12 +1265,14 @@ PyBytes_Repr(PyObject *obj, int smartquotes) Py_ssize_t i, length = Py_SIZE(op); Py_ssize_t newsize, squotes, dquotes; PyObject *v; - unsigned char quote, *s, *p; + unsigned char quote; + const unsigned char *s; + Py_UCS1 *p; /* Compute size of output string */ squotes = dquotes = 0; newsize = 3; /* b'' */ - s = (unsigned char*)op->ob_sval; + s = (const unsigned char*)op->ob_sval; for (i = 0; i < length; i++) { Py_ssize_t incr = 1; switch(s[i]) { @@ -2271,7 +2273,7 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) char *buf; Py_ssize_t hexlen, invalid_char; unsigned int top, bot; - Py_UCS1 *str, *end; + const Py_UCS1 *str, *end; _PyBytesWriter writer; _PyBytesWriter_Init(&writer); @@ -2283,7 +2285,7 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) hexlen = PyUnicode_GET_LENGTH(string); if (!PyUnicode_IS_ASCII(string)) { - void *data = PyUnicode_DATA(string); + const void *data = PyUnicode_DATA(string); unsigned int kind = PyUnicode_KIND(string); Py_ssize_t i; diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 2baec5e..dad177a 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -1428,7 +1428,7 @@ my_basename(PyObject *name) { Py_ssize_t i, size, offset; int kind; - void *data; + const void *data; if (PyUnicode_READY(name)) return NULL; @@ -2953,7 +2953,7 @@ _check_for_legacy_statements(PySyntaxErrorObject *self, Py_ssize_t start) static PyObject *exec_prefix = NULL; Py_ssize_t text_len = PyUnicode_GET_LENGTH(self->text), match; int kind = PyUnicode_KIND(self->text); - void *data = PyUnicode_DATA(self->text); + const void *data = PyUnicode_DATA(self->text); /* Ignore leading whitespace */ while (start < text_len) { diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h index 39c1553..cd7aa69 100644 --- a/Objects/stringlib/codecs.h +++ b/Objects/stringlib/codecs.h @@ -259,7 +259,7 @@ InvalidContinuation3: Py_LOCAL_INLINE(char *) STRINGLIB(utf8_encoder)(_PyBytesWriter *writer, PyObject *unicode, - STRINGLIB_CHAR *data, + const STRINGLIB_CHAR *data, Py_ssize_t size, _Py_error_handler error_handler, const char *errors) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index bc42e2d..209c6a5 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -3208,7 +3208,7 @@ is_dunder_name(PyObject *name) int kind = PyUnicode_KIND(name); /* Special names contain at least "__x__" and are always ASCII. */ if (length > 4 && kind == PyUnicode_1BYTE_KIND) { - Py_UCS1 *characters = PyUnicode_1BYTE_DATA(name); + const Py_UCS1 *characters = PyUnicode_1BYTE_DATA(name); return ( ((characters[length-2] == '_') && (characters[length-1] == '_')) && ((characters[0] == '_') && (characters[1] == '_')) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 1e1f257..3c79feb 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -578,7 +578,7 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) if (check_content && kind != PyUnicode_WCHAR_KIND) { Py_ssize_t i; Py_UCS4 maxchar = 0; - void *data; + const void *data; Py_UCS4 ch; data = PyUnicode_DATA(ascii); @@ -662,7 +662,7 @@ unicode_result_ready(PyObject *unicode) } if (length == 1) { - void *data = PyUnicode_DATA(unicode); + const void *data = PyUnicode_DATA(unicode); int kind = PyUnicode_KIND(unicode); Py_UCS4 ch = PyUnicode_READ(kind, data, 0); if (ch < 256) { @@ -720,7 +720,7 @@ backslashreplace(_PyBytesWriter *writer, char *str, Py_ssize_t size, i; Py_UCS4 ch; enum PyUnicode_Kind kind; - void *data; + const void *data; assert(PyUnicode_IS_READY(unicode)); kind = PyUnicode_KIND(unicode); @@ -787,7 +787,7 @@ xmlcharrefreplace(_PyBytesWriter *writer, char *str, Py_ssize_t size, i; Py_UCS4 ch; enum PyUnicode_Kind kind; - void *data; + const void *data; assert(PyUnicode_IS_READY(unicode)); kind = PyUnicode_KIND(unicode); @@ -863,7 +863,7 @@ static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0; (BLOOM(bloom_linebreak, (ch)) && Py_UNICODE_ISLINEBREAK(ch))) static inline BLOOM_MASK -make_bloom_mask(int kind, void* ptr, Py_ssize_t len) +make_bloom_mask(int kind, const void* ptr, Py_ssize_t len) { #define BLOOM_UPDATE(TYPE, MASK, PTR, LEN) \ do { \ @@ -1302,16 +1302,16 @@ unicode_kind_name(PyObject *unicode) #ifdef Py_DEBUG /* Functions wrapping macros for use in debugger */ -char *_PyUnicode_utf8(void *unicode_raw){ +const char *_PyUnicode_utf8(void *unicode_raw){ PyObject *unicode = _PyObject_CAST(unicode_raw); return PyUnicode_UTF8(unicode); } -void *_PyUnicode_compact_data(void *unicode_raw) { +const void *_PyUnicode_compact_data(void *unicode_raw) { PyObject *unicode = _PyObject_CAST(unicode_raw); return _PyUnicode_COMPACT_DATA(unicode); } -void *_PyUnicode_data(void *unicode_raw) { +const void *_PyUnicode_data(void *unicode_raw) { PyObject *unicode = _PyObject_CAST(unicode_raw); printf("obj %p\n", (void*)unicode); printf("compact %d\n", PyUnicode_IS_COMPACT(unicode)); @@ -1328,7 +1328,7 @@ _PyUnicode_Dump(PyObject *op) PyASCIIObject *ascii = (PyASCIIObject *)op; PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op; PyUnicodeObject *unicode = (PyUnicodeObject *)op; - void *data; + const void *data; if (ascii->state.compact) { @@ -1528,7 +1528,8 @@ _copy_characters(PyObject *to, Py_ssize_t to_start, Py_ssize_t how_many, int check_maxchar) { unsigned int from_kind, to_kind; - void *from_data, *to_data; + const void *from_data; + void *to_data; assert(0 <= how_many); assert(0 <= from_start); @@ -1553,7 +1554,7 @@ _copy_characters(PyObject *to, Py_ssize_t to_start, if (!check_maxchar && PyUnicode_MAX_CHAR_VALUE(from) > PyUnicode_MAX_CHAR_VALUE(to)) { - const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to); + Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to); Py_UCS4 ch; Py_ssize_t i; for (i=0; i < how_many; i++) { @@ -1571,12 +1572,12 @@ _copy_characters(PyObject *to, Py_ssize_t to_start, check that all written characters are pure ASCII */ Py_UCS4 max_char; max_char = ucs1lib_find_max_char(from_data, - (Py_UCS1*)from_data + how_many); + (const Py_UCS1*)from_data + how_many); if (max_char >= 128) return -1; } memcpy((char*)to_data + to_kind * to_start, - (char*)from_data + from_kind * from_start, + (const char*)from_data + from_kind * from_start, to_kind * how_many); } else if (from_kind == PyUnicode_1BYTE_KIND @@ -2047,7 +2048,7 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index, const char *str, Py_ssize_t len) { enum PyUnicode_Kind kind = PyUnicode_KIND(unicode); - void *data = PyUnicode_DATA(unicode); + const void *data = PyUnicode_DATA(unicode); const char *end = str + len; assert(index + len <= PyUnicode_GET_LENGTH(unicode)); @@ -2402,7 +2403,7 @@ Py_UCS4 _PyUnicode_FindMaxChar(PyObject *unicode, Py_ssize_t start, Py_ssize_t end) { enum PyUnicode_Kind kind; - void *startptr, *endptr; + const void *startptr, *endptr; assert(PyUnicode_IS_READY(unicode)); assert(0 <= start); @@ -2559,7 +2560,7 @@ as_ucs4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize, int copy_null) { int kind; - void *data; + const void *data; Py_ssize_t len, targetlen; if (PyUnicode_READY(string) == -1) return NULL; @@ -2586,17 +2587,19 @@ as_ucs4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize, } } if (kind == PyUnicode_1BYTE_KIND) { - Py_UCS1 *start = (Py_UCS1 *) data; + const Py_UCS1 *start = (const Py_UCS1 *) data; _PyUnicode_CONVERT_BYTES(Py_UCS1, Py_UCS4, start, start + len, target); } else if (kind == PyUnicode_2BYTE_KIND) { - Py_UCS2 *start = (Py_UCS2 *) data; + const Py_UCS2 *start = (const Py_UCS2 *) data; _PyUnicode_CONVERT_BYTES(Py_UCS2, Py_UCS4, start, start + len, target); } - else { - assert(kind == PyUnicode_4BYTE_KIND); + else if (kind == PyUnicode_4BYTE_KIND) { memcpy(target, data, len * sizeof(Py_UCS4)); } + else { + Py_UNREACHABLE(); + } if (copy_null) target[len] = 0; return target; @@ -4105,7 +4108,7 @@ PyUnicode_GetLength(PyObject *unicode) Py_UCS4 PyUnicode_ReadChar(PyObject *unicode, Py_ssize_t index) { - void *data; + const void *data; int kind; if (!PyUnicode_Check(unicode)) { @@ -4707,7 +4710,7 @@ _PyUnicode_EncodeUTF7(PyObject *str, const char *errors) { int kind; - void *data; + const void *data; Py_ssize_t len; PyObject *v; int inShift = 0; @@ -4950,7 +4953,7 @@ unicode_decode_utf8(const char *s, Py_ssize_t size, if (u == NULL) { return NULL; } - s += ascii_decode(s, end, PyUnicode_DATA(u)); + s += ascii_decode(s, end, PyUnicode_1BYTE_DATA(u)); if (s == end) { return u; } @@ -5380,7 +5383,7 @@ unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler, PyUnicode_UTF8_LENGTH(unicode)); enum PyUnicode_Kind kind = PyUnicode_KIND(unicode); - void *data = PyUnicode_DATA(unicode); + const void *data = PyUnicode_DATA(unicode); Py_ssize_t size = PyUnicode_GET_LENGTH(unicode); _PyBytesWriter writer; @@ -5416,7 +5419,7 @@ unicode_fill_utf8(PyObject *unicode) assert(!PyUnicode_IS_ASCII(unicode)); enum PyUnicode_Kind kind = PyUnicode_KIND(unicode); - void *data = PyUnicode_DATA(unicode); + const void *data = PyUnicode_DATA(unicode); Py_ssize_t size = PyUnicode_GET_LENGTH(unicode); _PyBytesWriter writer; @@ -6425,7 +6428,7 @@ PyUnicode_AsUnicodeEscapeString(PyObject *unicode) PyObject *repr; char *p; enum PyUnicode_Kind kind; - void *data; + const void *data; Py_ssize_t expandsize; /* Initial allocation is based on the longest-possible character @@ -6679,7 +6682,7 @@ PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode) char *p; Py_ssize_t expandsize, pos; int kind; - void *data; + const void *data; Py_ssize_t len; if (!PyUnicode_Check(unicode)) { @@ -6885,7 +6888,7 @@ unicode_encode_ucs1(PyObject *unicode, /* input state */ Py_ssize_t pos=0, size; int kind; - void *data; + const void *data; /* pointer into the output */ char *str; const char *encoding = (limit == 256) ? "latin-1" : "ascii"; @@ -7113,7 +7116,7 @@ PyUnicode_DecodeASCII(const char *s, if (u == NULL) { return NULL; } - Py_ssize_t outpos = ascii_decode(s, e, PyUnicode_DATA(u)); + Py_ssize_t outpos = ascii_decode(s, e, PyUnicode_1BYTE_DATA(u)); if (outpos == size) { return u; } @@ -7800,7 +7803,7 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes, else { Py_ssize_t i; enum PyUnicode_Kind kind; - void *data; + const void *data; if (PyUnicode_READY(rep) == -1) { Py_DECREF(rep); @@ -7958,7 +7961,7 @@ charmap_decode_string(const char *s, PyObject *errorHandler = NULL, *exc = NULL; Py_ssize_t maplen; enum PyUnicode_Kind mapkind; - void *mapdata; + const void *mapdata; Py_UCS4 x; unsigned char ch; @@ -7975,7 +7978,7 @@ charmap_decode_string(const char *s, /* fast-path for cp037, cp500 and iso8859_1 encodings. iso8859_1 * is disabled in encoding aliases, latin1 is preferred because * its implementation is faster. */ - Py_UCS1 *mapdata_ucs1 = (Py_UCS1 *)mapdata; + const Py_UCS1 *mapdata_ucs1 = (const Py_UCS1 *)mapdata; Py_UCS1 *outdata = (Py_UCS1 *)writer->data; Py_UCS4 maxchar = writer->maxchar; @@ -7999,7 +8002,7 @@ charmap_decode_string(const char *s, while (s < e) { if (mapkind == PyUnicode_2BYTE_KIND && maplen >= 256) { enum PyUnicode_Kind outkind = writer->kind; - Py_UCS2 *mapdata_ucs2 = (Py_UCS2 *)mapdata; + const Py_UCS2 *mapdata_ucs2 = (const Py_UCS2 *)mapdata; if (outkind == PyUnicode_1BYTE_KIND) { Py_UCS1 *outdata = (Py_UCS1 *)writer->data; Py_UCS4 maxchar = writer->maxchar; @@ -8279,7 +8282,7 @@ PyUnicode_BuildEncodingMap(PyObject* string) unsigned char *mlevel1, *mlevel2, *mlevel3; int count2 = 0, count3 = 0; int kind; - void *data; + const void *data; Py_ssize_t length; Py_UCS4 ch; @@ -8543,7 +8546,7 @@ charmap_encoding_error( Py_ssize_t size, repsize; Py_ssize_t newpos; enum PyUnicode_Kind kind; - void *data; + const void *data; Py_ssize_t index; /* startpos for collecting unencodable chars */ Py_ssize_t collstartpos = *inpos; @@ -8693,7 +8696,7 @@ _PyUnicode_EncodeCharmap(PyObject *unicode, PyObject *error_handler_obj = NULL; PyObject *exc = NULL; _Py_error_handler error_handler = _Py_ERROR_UNKNOWN; - void *data; + const void *data; int kind; if (PyUnicode_READY(unicode) == -1) @@ -9025,7 +9028,8 @@ unicode_fast_translate(PyObject *input, PyObject *mapping, { Py_UCS1 ascii_table[128], ch, ch2; Py_ssize_t len; - Py_UCS1 *in, *end, *out; + const Py_UCS1 *in, *end; + Py_UCS1 *out; int res = 0; len = PyUnicode_GET_LENGTH(input); @@ -9074,7 +9078,7 @@ _PyUnicode_TranslateCharmap(PyObject *input, const char *errors) { /* input object */ - char *data; + const void *data; Py_ssize_t size, i; int kind; /* output buffer */ @@ -9093,7 +9097,7 @@ _PyUnicode_TranslateCharmap(PyObject *input, if (PyUnicode_READY(input) == -1) return NULL; - data = (char*)PyUnicode_DATA(input); + data = PyUnicode_DATA(input); kind = PyUnicode_KIND(input); size = PyUnicode_GET_LENGTH(input); @@ -9271,7 +9275,7 @@ PyUnicode_TransformDecimalToASCII(Py_UNICODE *s, Py_ssize_t i; Py_UCS4 maxchar; enum PyUnicode_Kind kind; - void *data; + const void *data; maxchar = 127; for (i = 0; i < length; i++) { @@ -9313,7 +9317,7 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s, PyObject *unicode; Py_ssize_t i; enum PyUnicode_Kind kind; - void *data; + const void *data; if (output == NULL) { PyErr_BadArgument(); @@ -9391,7 +9395,7 @@ any_find_slice(PyObject* s1, PyObject* s2, int direction) { int kind1, kind2; - void *buf1, *buf2; + const void *buf1, *buf2; Py_ssize_t len1, len2, result; kind1 = PyUnicode_KIND(s1); @@ -9460,8 +9464,9 @@ any_find_slice(PyObject* s1, PyObject* s2, } } + assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(s2))); if (kind2 != kind1) - PyMem_Free(buf2); + PyMem_Free((void *)buf2); return result; } @@ -9620,7 +9625,7 @@ PyUnicode_Count(PyObject *str, { Py_ssize_t result; int kind1, kind2; - void *buf1 = NULL, *buf2 = NULL; + const void *buf1 = NULL, *buf2 = NULL; Py_ssize_t len1, len2; if (ensure_unicode(str) < 0 || ensure_unicode(substr) < 0) @@ -9649,24 +9654,24 @@ PyUnicode_Count(PyObject *str, case PyUnicode_1BYTE_KIND: if (PyUnicode_IS_ASCII(str) && PyUnicode_IS_ASCII(substr)) result = asciilib_count( - ((Py_UCS1*)buf1) + start, end - start, + ((const Py_UCS1*)buf1) + start, end - start, buf2, len2, PY_SSIZE_T_MAX ); else result = ucs1lib_count( - ((Py_UCS1*)buf1) + start, end - start, + ((const Py_UCS1*)buf1) + start, end - start, buf2, len2, PY_SSIZE_T_MAX ); break; case PyUnicode_2BYTE_KIND: result = ucs2lib_count( - ((Py_UCS2*)buf1) + start, end - start, + ((const Py_UCS2*)buf1) + start, end - start, buf2, len2, PY_SSIZE_T_MAX ); break; case PyUnicode_4BYTE_KIND: result = ucs4lib_count( - ((Py_UCS4*)buf1) + start, end - start, + ((const Py_UCS4*)buf1) + start, end - start, buf2, len2, PY_SSIZE_T_MAX ); break; @@ -9674,13 +9679,15 @@ PyUnicode_Count(PyObject *str, Py_UNREACHABLE(); } + assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr))); if (kind2 != kind1) - PyMem_Free(buf2); + PyMem_Free((void *)buf2); return result; onError: - if (kind2 != kind1 && buf2) - PyMem_Free(buf2); + assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr))); + if (kind2 != kind1) + PyMem_Free((void *)buf2); return -1; } @@ -9728,8 +9735,8 @@ tailmatch(PyObject *self, { int kind_self; int kind_sub; - void *data_self; - void *data_sub; + const void *data_self; + const void *data_sub; Py_ssize_t offset; Py_ssize_t i; Py_ssize_t end_sub; @@ -9803,7 +9810,8 @@ static PyObject * ascii_upper_or_lower(PyObject *self, int lower) { Py_ssize_t len = PyUnicode_GET_LENGTH(self); - char *resdata, *data = PyUnicode_DATA(self); + const char *data = PyUnicode_DATA(self); + char *resdata; PyObject *res; res = PyUnicode_New(len, 127); @@ -9818,7 +9826,7 @@ ascii_upper_or_lower(PyObject *self, int lower) } static Py_UCS4 -handle_capital_sigma(int kind, void *data, Py_ssize_t length, Py_ssize_t i) +handle_capital_sigma(int kind, const void *data, Py_ssize_t length, Py_ssize_t i) { Py_ssize_t j; int final_sigma; @@ -9847,7 +9855,7 @@ handle_capital_sigma(int kind, void *data, Py_ssize_t length, Py_ssize_t i) } static int -lower_ucs4(int kind, void *data, Py_ssize_t length, Py_ssize_t i, +lower_ucs4(int kind, const void *data, Py_ssize_t length, Py_ssize_t i, Py_UCS4 c, Py_UCS4 *mapped) { /* Obscure special case. */ @@ -9859,7 +9867,7 @@ lower_ucs4(int kind, void *data, Py_ssize_t length, Py_ssize_t i, } static Py_ssize_t -do_capitalize(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar) +do_capitalize(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar) { Py_ssize_t i, k = 0; int n_res, j; @@ -9883,7 +9891,7 @@ do_capitalize(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *ma } static Py_ssize_t -do_swapcase(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar) { +do_swapcase(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar) { Py_ssize_t i, k = 0; for (i = 0; i < length; i++) { @@ -9908,7 +9916,7 @@ do_swapcase(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxc } static Py_ssize_t -do_upper_or_lower(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, +do_upper_or_lower(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar, int lower) { Py_ssize_t i, k = 0; @@ -9929,19 +9937,19 @@ do_upper_or_lower(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, } static Py_ssize_t -do_upper(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar) +do_upper(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar) { return do_upper_or_lower(kind, data, length, res, maxchar, 0); } static Py_ssize_t -do_lower(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar) +do_lower(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar) { return do_upper_or_lower(kind, data, length, res, maxchar, 1); } static Py_ssize_t -do_casefold(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar) +do_casefold(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar) { Py_ssize_t i, k = 0; @@ -9958,7 +9966,7 @@ do_casefold(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxc } static Py_ssize_t -do_title(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar) +do_title(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar) { Py_ssize_t i, k = 0; int previous_is_cased; @@ -9986,12 +9994,13 @@ do_title(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar static PyObject * case_operation(PyObject *self, - Py_ssize_t (*perform)(int, void *, Py_ssize_t, Py_UCS4 *, Py_UCS4 *)) + Py_ssize_t (*perform)(int, const void *, Py_ssize_t, Py_UCS4 *, Py_UCS4 *)) { PyObject *res = NULL; Py_ssize_t length, newlength = 0; int kind, outkind; - void *data, *outdata; + const void *data; + void *outdata; Py_UCS4 maxchar = 0, *tmp, *tmpend; assert(PyUnicode_IS_READY(self)); @@ -10358,7 +10367,7 @@ split(PyObject *self, Py_ssize_t maxcount) { int kind1, kind2; - void *buf1, *buf2; + const void *buf1, *buf2; Py_ssize_t len1, len2; PyObject* out; @@ -10438,8 +10447,9 @@ split(PyObject *self, default: out = NULL; } + assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substring))); if (kind2 != kind1) - PyMem_Free(buf2); + PyMem_Free((void *)buf2); return out; } @@ -10449,7 +10459,7 @@ rsplit(PyObject *self, Py_ssize_t maxcount) { int kind1, kind2; - void *buf1, *buf2; + const void *buf1, *buf2; Py_ssize_t len1, len2; PyObject* out; @@ -10529,14 +10539,15 @@ rsplit(PyObject *self, default: out = NULL; } + assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substring))); if (kind2 != kind1) - PyMem_Free(buf2); + PyMem_Free((void *)buf2); return out; } static Py_ssize_t -anylib_find(int kind, PyObject *str1, void *buf1, Py_ssize_t len1, - PyObject *str2, void *buf2, Py_ssize_t len2, Py_ssize_t offset) +anylib_find(int kind, PyObject *str1, const void *buf1, Py_ssize_t len1, + PyObject *str2, const void *buf2, Py_ssize_t len2, Py_ssize_t offset) { switch (kind) { case PyUnicode_1BYTE_KIND: @@ -10553,8 +10564,8 @@ anylib_find(int kind, PyObject *str1, void *buf1, Py_ssize_t len1, } static Py_ssize_t -anylib_count(int kind, PyObject *sstr, void* sbuf, Py_ssize_t slen, - PyObject *str1, void *buf1, Py_ssize_t len1, Py_ssize_t maxcount) +anylib_count(int kind, PyObject *sstr, const void* sbuf, Py_ssize_t slen, + PyObject *str1, const void *buf1, Py_ssize_t len1, Py_ssize_t maxcount) { switch (kind) { case PyUnicode_1BYTE_KIND: @@ -10600,9 +10611,9 @@ replace(PyObject *self, PyObject *str1, PyObject *str2, Py_ssize_t maxcount) { PyObject *u; - char *sbuf = PyUnicode_DATA(self); - char *buf1 = PyUnicode_DATA(str1); - char *buf2 = PyUnicode_DATA(str2); + const char *sbuf = PyUnicode_DATA(self); + const void *buf1 = PyUnicode_DATA(str1); + const void *buf2 = PyUnicode_DATA(str2); int srelease = 0, release1 = 0, release2 = 0; int skind = PyUnicode_KIND(self); int kind1 = PyUnicode_KIND(str1); @@ -10680,7 +10691,8 @@ replace(PyObject *self, PyObject *str1, /* widen self and buf1 */ rkind = kind2; if (release1) { - PyMem_Free(buf1); + assert(buf1 != PyUnicode_DATA(str1)); + PyMem_Free((void *)buf1); buf1 = PyUnicode_DATA(str1); release1 = 0; } @@ -10745,7 +10757,8 @@ replace(PyObject *self, PyObject *str1, if (!sbuf) goto error; srelease = 1; if (release1) { - PyMem_Free(buf1); + assert(buf1 != PyUnicode_DATA(str1)); + PyMem_Free((void *)buf1); buf1 = PyUnicode_DATA(str1); release1 = 0; } @@ -10837,32 +10850,41 @@ replace(PyObject *self, PyObject *str1, } done: + assert(srelease == (sbuf != PyUnicode_DATA(self))); + assert(release1 == (buf1 != PyUnicode_DATA(str1))); + assert(release2 == (buf2 != PyUnicode_DATA(str2))); if (srelease) - PyMem_FREE(sbuf); + PyMem_FREE((void *)sbuf); if (release1) - PyMem_FREE(buf1); + PyMem_FREE((void *)buf1); if (release2) - PyMem_FREE(buf2); + PyMem_FREE((void *)buf2); assert(_PyUnicode_CheckConsistency(u, 1)); return u; nothing: /* nothing to replace; return original string (when possible) */ + assert(srelease == (sbuf != PyUnicode_DATA(self))); + assert(release1 == (buf1 != PyUnicode_DATA(str1))); + assert(release2 == (buf2 != PyUnicode_DATA(str2))); if (srelease) - PyMem_FREE(sbuf); + PyMem_FREE((void *)sbuf); if (release1) - PyMem_FREE(buf1); + PyMem_FREE((void *)buf1); if (release2) - PyMem_FREE(buf2); + PyMem_FREE((void *)buf2); return unicode_result_unchanged(self); error: - if (srelease && sbuf) - PyMem_FREE(sbuf); - if (release1 && buf1) - PyMem_FREE(buf1); - if (release2 && buf2) - PyMem_FREE(buf2); + assert(srelease == (sbuf != PyUnicode_DATA(self))); + assert(release1 == (buf1 != PyUnicode_DATA(str1))); + assert(release2 == (buf2 != PyUnicode_DATA(str2))); + if (srelease) + PyMem_FREE((void *)sbuf); + if (release1) + PyMem_FREE((void *)buf1); + if (release2) + PyMem_FREE((void *)buf2); return NULL; } @@ -10999,7 +11021,7 @@ unicode_compare(PyObject *str1, PyObject *str2) while (0) int kind1, kind2; - void *data1, *data2; + const void *data1, *data2; Py_ssize_t len1, len2, len; kind1 = PyUnicode_KIND(str1); @@ -11100,7 +11122,7 @@ static int unicode_compare_eq(PyObject *str1, PyObject *str2) { int kind; - void *data1, *data2; + const void *data1, *data2; Py_ssize_t len; int cmp; @@ -11185,7 +11207,7 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) return 0; } else { - void *data = PyUnicode_DATA(uni); + const void *data = PyUnicode_DATA(uni); /* Compare Unicode string and source character set string */ for (i = 0; (chr = PyUnicode_READ(kind, data, i)) && str[i]; i++) if (chr != (unsigned char)str[i]) @@ -11334,7 +11356,7 @@ int PyUnicode_Contains(PyObject *str, PyObject *substr) { int kind1, kind2; - void *buf1, *buf2; + const void *buf1, *buf2; Py_ssize_t len1, len2; int result; @@ -11384,8 +11406,9 @@ PyUnicode_Contains(PyObject *str, PyObject *substr) Py_UNREACHABLE(); } + assert((kind2 == kind1) == (buf2 == PyUnicode_DATA(substr))); if (kind2 != kind1) - PyMem_Free(buf2); + PyMem_Free((void *)buf2); return result; } @@ -11562,7 +11585,7 @@ unicode_count(PyObject *self, PyObject *args) Py_ssize_t end = PY_SSIZE_T_MAX; PyObject *result; int kind1, kind2; - void *buf1, *buf2; + const void *buf1, *buf2; Py_ssize_t len1, len2, iresult; if (!parse_args_finds_unicode("count", args, &substring, &start, &end)) @@ -11589,19 +11612,19 @@ unicode_count(PyObject *self, PyObject *args) switch (kind1) { case PyUnicode_1BYTE_KIND: iresult = ucs1lib_count( - ((Py_UCS1*)buf1) + start, end - start, + ((const Py_UCS1*)buf1) + start, end - start, buf2, len2, PY_SSIZE_T_MAX ); break; case PyUnicode_2BYTE_KIND: iresult = ucs2lib_count( - ((Py_UCS2*)buf1) + start, end - start, + ((const Py_UCS2*)buf1) + start, end - start, buf2, len2, PY_SSIZE_T_MAX ); break; case PyUnicode_4BYTE_KIND: iresult = ucs4lib_count( - ((Py_UCS4*)buf1) + start, end - start, + ((const Py_UCS4*)buf1) + start, end - start, buf2, len2, PY_SSIZE_T_MAX ); break; @@ -11611,8 +11634,9 @@ unicode_count(PyObject *self, PyObject *args) result = PyLong_FromSsize_t(iresult); + assert((kind2 == kind1) == (buf2 == PyUnicode_DATA(substring))); if (kind2 != kind1) - PyMem_Free(buf2); + PyMem_Free((void *)buf2); return result; } @@ -11656,7 +11680,8 @@ unicode_expandtabs_impl(PyObject *self, int tabsize) Py_ssize_t i, j, line_pos, src_len, incr; Py_UCS4 ch; PyObject *u; - void *src_data, *dest_data; + const void *src_data; + void *dest_data; int kind; int found; @@ -11762,7 +11787,7 @@ unicode_find(PyObject *self, PyObject *args) static PyObject * unicode_getitem(PyObject *self, Py_ssize_t index) { - void *data; + const void *data; enum PyUnicode_Kind kind; Py_UCS4 ch; @@ -11875,7 +11900,7 @@ unicode_islower_impl(PyObject *self) { Py_ssize_t i, length; int kind; - void *data; + const void *data; int cased; if (PyUnicode_READY(self) == -1) @@ -11920,7 +11945,7 @@ unicode_isupper_impl(PyObject *self) { Py_ssize_t i, length; int kind; - void *data; + const void *data; int cased; if (PyUnicode_READY(self) == -1) @@ -11965,7 +11990,7 @@ unicode_istitle_impl(PyObject *self) { Py_ssize_t i, length; int kind; - void *data; + const void *data; int cased, previous_is_cased; if (PyUnicode_READY(self) == -1) @@ -12023,7 +12048,7 @@ unicode_isspace_impl(PyObject *self) { Py_ssize_t i, length; int kind; - void *data; + const void *data; if (PyUnicode_READY(self) == -1) return NULL; @@ -12063,7 +12088,7 @@ unicode_isalpha_impl(PyObject *self) { Py_ssize_t i, length; int kind; - void *data; + const void *data; if (PyUnicode_READY(self) == -1) return NULL; @@ -12101,7 +12126,7 @@ unicode_isalnum_impl(PyObject *self) /*[clinic end generated code: output=a5a23490ffc3660c input=5c6579bf2e04758c]*/ { int kind; - void *data; + const void *data; Py_ssize_t len, i; if (PyUnicode_READY(self) == -1) @@ -12144,7 +12169,7 @@ unicode_isdecimal_impl(PyObject *self) { Py_ssize_t i, length; int kind; - void *data; + const void *data; if (PyUnicode_READY(self) == -1) return NULL; @@ -12183,7 +12208,7 @@ unicode_isdigit_impl(PyObject *self) { Py_ssize_t i, length; int kind; - void *data; + const void *data; if (PyUnicode_READY(self) == -1) return NULL; @@ -12223,7 +12248,7 @@ unicode_isnumeric_impl(PyObject *self) { Py_ssize_t i, length; int kind; - void *data; + const void *data; if (PyUnicode_READY(self) == -1) return NULL; @@ -12260,7 +12285,7 @@ PyUnicode_IsIdentifier(PyObject *self) } int kind = 0; - void *data = NULL; + const void *data = NULL; const wchar_t *wstr = NULL; Py_UCS4 ch; if (ready) { @@ -12329,7 +12354,7 @@ unicode_isprintable_impl(PyObject *self) { Py_ssize_t i, length; int kind; - void *data; + const void *data; if (PyUnicode_READY(self) == -1) return NULL; @@ -12434,7 +12459,7 @@ static const char *stripfuncnames[] = {"lstrip", "rstrip", "strip"}; PyObject * _PyUnicode_XStrip(PyObject *self, int striptype, PyObject *sepobj) { - void *data; + const void *data; int kind; Py_ssize_t i, j, len; BLOOM_MASK sepmask; @@ -12484,7 +12509,7 @@ _PyUnicode_XStrip(PyObject *self, int striptype, PyObject *sepobj) PyObject* PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end) { - unsigned char *data; + const unsigned char *data; int kind; Py_ssize_t length; @@ -12507,7 +12532,7 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end) length = end - start; if (PyUnicode_IS_ASCII(self)) { data = PyUnicode_1BYTE_DATA(self); - return _PyUnicode_FromASCII((char*)(data + start), length); + return _PyUnicode_FromASCII((const char*)(data + start), length); } else { kind = PyUnicode_KIND(self); @@ -12529,7 +12554,7 @@ do_strip(PyObject *self, int striptype) len = PyUnicode_GET_LENGTH(self); if (PyUnicode_IS_ASCII(self)) { - Py_UCS1 *data = PyUnicode_1BYTE_DATA(self); + const Py_UCS1 *data = PyUnicode_1BYTE_DATA(self); i = 0; if (striptype != RIGHTSTRIP) { @@ -12555,7 +12580,7 @@ do_strip(PyObject *self, int striptype) } else { int kind = PyUnicode_KIND(self); - void *data = PyUnicode_DATA(self); + const void *data = PyUnicode_DATA(self); i = 0; if (striptype != RIGHTSTRIP) { @@ -12688,8 +12713,8 @@ unicode_repeat(PyObject *str, Py_ssize_t len) assert(PyUnicode_KIND(u) == PyUnicode_KIND(str)); if (PyUnicode_GET_LENGTH(str) == 1) { - const int kind = PyUnicode_KIND(str); - const Py_UCS4 fill_char = PyUnicode_READ(kind, PyUnicode_DATA(str), 0); + int kind = PyUnicode_KIND(str); + Py_UCS4 fill_char = PyUnicode_READ(kind, PyUnicode_DATA(str), 0); if (kind == PyUnicode_1BYTE_KIND) { void *to = PyUnicode_DATA(u); memset(to, (unsigned char)fill_char, len); @@ -12708,7 +12733,7 @@ unicode_repeat(PyObject *str, Py_ssize_t len) else { /* number of characters copied this far */ Py_ssize_t done = PyUnicode_GET_LENGTH(str); - const Py_ssize_t char_size = PyUnicode_KIND(str); + Py_ssize_t char_size = PyUnicode_KIND(str); char *to = (char *) PyUnicode_DATA(u); memcpy(to, PyUnicode_DATA(str), PyUnicode_GET_LENGTH(str) * char_size); @@ -12769,7 +12794,8 @@ unicode_repr(PyObject *unicode) Py_ssize_t osize, squote, dquote, i, o; Py_UCS4 max, quote; int ikind, okind, unchanged; - void *idata, *odata; + const void *idata; + void *odata; if (PyUnicode_READY(unicode) == -1) return NULL; @@ -13062,7 +13088,7 @@ PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj) { PyObject* out; int kind1, kind2; - void *buf1, *buf2; + const void *buf1, *buf2; Py_ssize_t len1, len2; if (ensure_unicode(str_obj) < 0 || ensure_unicode(sep_obj) < 0) @@ -13107,8 +13133,9 @@ PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj) Py_UNREACHABLE(); } + assert((kind2 == kind1) == (buf2 == PyUnicode_DATA(sep_obj))); if (kind2 != kind1) - PyMem_Free(buf2); + PyMem_Free((void *)buf2); return out; } @@ -13119,7 +13146,7 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj) { PyObject* out; int kind1, kind2; - void *buf1, *buf2; + const void *buf1, *buf2; Py_ssize_t len1, len2; if (ensure_unicode(str_obj) < 0 || ensure_unicode(sep_obj) < 0) @@ -13164,8 +13191,9 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj) Py_UNREACHABLE(); } + assert((kind2 == kind1) == (buf2 == PyUnicode_DATA(sep_obj))); if (kind2 != kind1) - PyMem_Free(buf2); + PyMem_Free((void *)buf2); return out; } @@ -13321,7 +13349,7 @@ unicode_maketrans_impl(PyObject *x, PyObject *y, PyObject *z) return NULL; if (y != NULL) { int x_kind, y_kind, z_kind; - void *x_data, *y_data, *z_data; + const void *x_data, *y_data, *z_data; /* x must be a string too, of equal length */ if (!PyUnicode_Check(x)) { @@ -13370,7 +13398,7 @@ unicode_maketrans_impl(PyObject *x, PyObject *y, PyObject *z) } } else { int kind; - void *data; + const void *data; /* x must be a dict */ if (!PyDict_CheckExact(x)) { @@ -13471,7 +13499,7 @@ unicode_zfill_impl(PyObject *self, Py_ssize_t width) Py_ssize_t fill; PyObject *u; int kind; - void *data; + const void *data; Py_UCS4 chr; if (PyUnicode_READY(self) == -1) @@ -14144,7 +14172,8 @@ unicode_subscript(PyObject* self, PyObject* item) Py_ssize_t start, stop, step, slicelength, i; size_t cur; PyObject *result; - void *src_data, *dest_data; + const void *src_data; + void *dest_data; int src_kind, dest_kind; Py_UCS4 ch, max_char, kind_limit; @@ -14215,7 +14244,7 @@ struct unicode_formatter_t { enum PyUnicode_Kind fmtkind; Py_ssize_t fmtcnt, fmtpos; - void *fmtdata; + const void *fmtdata; PyObject *fmtstr; _PyUnicodeWriter writer; @@ -14889,7 +14918,7 @@ unicode_format_arg_output(struct unicode_formatter_t *ctx, { Py_ssize_t len; enum PyUnicode_Kind kind; - void *pbuf; + const void *pbuf; Py_ssize_t pindex; Py_UCS4 signchar; Py_ssize_t buflen; @@ -15556,7 +15585,7 @@ unicodeiter_next(unicodeiterobject *it) if (it->it_index < PyUnicode_GET_LENGTH(seq)) { int kind = PyUnicode_KIND(seq); - void *data = PyUnicode_DATA(seq); + const void *data = PyUnicode_DATA(seq); Py_UCS4 chr = PyUnicode_READ(kind, data, it->it_index); item = PyUnicode_FromOrdinal(chr); if (item != NULL) diff --git a/Python/_warnings.c b/Python/_warnings.c index fd3ca60..e4dfb73 100644 --- a/Python/_warnings.c +++ b/Python/_warnings.c @@ -435,7 +435,7 @@ normalize_module(PyObject *filename) { PyObject *module; int kind; - void *data; + const void *data; Py_ssize_t len; len = PyUnicode_GetLength(filename); @@ -519,7 +519,7 @@ show_warning(PyObject *filename, int lineno, PyObject *text, /* Print " source_line\n" */ if (sourceline) { int kind; - void *data; + const void *data; Py_ssize_t i, len; Py_UCS4 ch; PyObject *truncated; diff --git a/Python/ast.c b/Python/ast.c index 0f23f67..1a4a311 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -4588,7 +4588,7 @@ decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s, if (*s & 0x80) { /* XXX inefficient */ PyObject *w; int kind; - void *data; + const void *data; Py_ssize_t len, i; w = decode_utf8(c, &s, end); if (w == NULL) { diff --git a/Python/codecs.c b/Python/codecs.c index bbbf774..7b35ded 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -701,8 +701,7 @@ PyObject *PyCodec_ReplaceErrors(PyObject *exc) if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) { PyObject *res; - int kind; - void *data; + Py_UCS1 *outp; if (PyUnicodeEncodeError_GetStart(exc, &start)) return NULL; if (PyUnicodeEncodeError_GetEnd(exc, &end)) @@ -711,10 +710,10 @@ PyObject *PyCodec_ReplaceErrors(PyObject *exc) res = PyUnicode_New(len, '?'); if (res == NULL) return NULL; - kind = PyUnicode_KIND(res); - data = PyUnicode_DATA(res); + assert(PyUnicode_KIND(res) == PyUnicode_1BYTE_KIND); + outp = PyUnicode_1BYTE_DATA(res); for (i = 0; i < len; ++i) - PyUnicode_WRITE(kind, data, i, '?'); + outp[i] = '?'; assert(_PyUnicode_CheckConsistency(res, 1)); return Py_BuildValue("(Nn)", res, end); } @@ -727,8 +726,7 @@ PyObject *PyCodec_ReplaceErrors(PyObject *exc) } else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)) { PyObject *res; - int kind; - void *data; + Py_UCS2 *outp; if (PyUnicodeTranslateError_GetStart(exc, &start)) return NULL; if (PyUnicodeTranslateError_GetEnd(exc, &end)) @@ -737,10 +735,10 @@ PyObject *PyCodec_ReplaceErrors(PyObject *exc) res = PyUnicode_New(len, Py_UNICODE_REPLACEMENT_CHARACTER); if (res == NULL) return NULL; - kind = PyUnicode_KIND(res); - data = PyUnicode_DATA(res); - for (i=0; i < len; i++) - PyUnicode_WRITE(kind, data, i, Py_UNICODE_REPLACEMENT_CHARACTER); + assert(PyUnicode_KIND(res) == PyUnicode_2BYTE_KIND); + outp = PyUnicode_2BYTE_DATA(res); + for (i = 0; i < len; i++) + outp[i] = Py_UNICODE_REPLACEMENT_CHARACTER; assert(_PyUnicode_CheckConsistency(res, 1)); return Py_BuildValue("(Nn)", res, end); } @@ -759,7 +757,7 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc) Py_ssize_t start; Py_ssize_t end; PyObject *res; - unsigned char *outp; + Py_UCS1 *outp; Py_ssize_t ressize; Py_UCS4 ch; if (PyUnicodeEncodeError_GetStart(exc, &start)) @@ -855,7 +853,7 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) Py_ssize_t start; Py_ssize_t end; PyObject *res; - unsigned char *outp; + Py_UCS1 *outp; int ressize; Py_UCS4 c; @@ -966,7 +964,7 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc) Py_ssize_t start; Py_ssize_t end; PyObject *res; - unsigned char *outp; + Py_UCS1 *outp; Py_ssize_t ressize; int replsize; Py_UCS4 c; diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index 841b25a..74638ca 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -62,7 +62,7 @@ get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end, Py_ssize_t accumulator, digitval, pos = *ppos; int numdigits; int kind = PyUnicode_KIND(str); - void *data = PyUnicode_DATA(str); + const void *data = PyUnicode_DATA(str); accumulator = numdigits = 0; for (; pos < end; pos++, numdigits++) { @@ -170,7 +170,7 @@ parse_internal_render_format_spec(PyObject *format_spec, { Py_ssize_t pos = start; int kind = PyUnicode_KIND(format_spec); - void *data = PyUnicode_DATA(format_spec); + const void *data = PyUnicode_DATA(format_spec); /* end-pos is used throughout this code to specify the length of the input string */ #define READ_spec(index) PyUnicode_READ(kind, data, index) @@ -443,7 +443,7 @@ parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end, { Py_ssize_t remainder; int kind = PyUnicode_KIND(s); - void *data = PyUnicode_DATA(s); + const void *data = PyUnicode_DATA(s); while (pos