summaryrefslogtreecommitdiffstats
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c123
1 files changed, 64 insertions, 59 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 7fbce14..e62c774 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -36,6 +36,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
+#define PY_SSIZE_T_CLEAN
#include "Python.h"
#include "unicodeobject.h"
@@ -83,6 +84,11 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/* Free list for Unicode objects */
static PyUnicodeObject *unicode_freelist;
static int unicode_freelist_size;
@@ -130,14 +136,9 @@ int unicode_resize(register PyUnicodeObject *unicode,
/* Resizing shared object (unicode_empty or single character
objects) in-place is not allowed. Use PyUnicode_Resize()
instead ! */
- if (unicode == unicode_empty ||
- (unicode->length == 1 &&
- /* MvL said unicode->str[] may be signed. Python generally assumes
- * an int contains at least 32 bits, and we don't use more than
- * 32 bits even in a UCS4 build, so casting to unsigned int should
- * be correct.
- */
- (unsigned int)unicode->str[0] < 256U &&
+ if (unicode == unicode_empty ||
+ (unicode->length == 1 &&
+ unicode->str[0] < 256U &&
unicode_latin1[unicode->str[0]] == unicode)) {
PyErr_SetString(PyExc_SystemError,
"can't resize shared unicode objects");
@@ -149,13 +150,12 @@ int unicode_resize(register PyUnicodeObject *unicode,
oldstr = unicode->str;
PyMem_RESIZE(unicode->str, Py_UNICODE, length + 1);
if (!unicode->str) {
- unicode->str = oldstr;
+ unicode->str = (Py_UNICODE *)oldstr;
PyErr_NoMemory();
return -1;
}
unicode->str[length] = 0;
- assert(length < INT_MAX);
- unicode->length = (int)length;
+ unicode->length = length;
reset:
/* Reset the object caches */
@@ -226,8 +226,7 @@ PyUnicodeObject *_PyUnicode_New(Py_ssize_t length)
*/
unicode->str[0] = 0;
unicode->str[length] = 0;
- assert(length<INT_MAX);
- unicode->length = (int)length;
+ unicode->length = length;
unicode->hash = -1;
unicode->defenc = NULL;
return unicode;
@@ -368,7 +367,7 @@ PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
#else
{
register Py_UNICODE *u;
- register int i;
+ register Py_ssize_t i;
u = PyUnicode_AS_UNICODE(unicode);
for (i = size; i > 0; i--)
*u++ = *w++;
@@ -396,7 +395,7 @@ Py_ssize_t PyUnicode_AsWideChar(PyUnicodeObject *unicode,
#else
{
register Py_UNICODE *u;
- register int i;
+ register Py_ssize_t i;
u = PyUnicode_AS_UNICODE(unicode);
for (i = size; i > 0; i--)
*w++ = *u++;
@@ -1358,7 +1357,7 @@ PyUnicode_EncodeUTF8(const Py_UNICODE *s,
PyObject *v; /* result string object */
char *p; /* next free byte in output buffer */
Py_ssize_t nallocated; /* number of result bytes allocated */
- int nneeded; /* number of result bytes needed */
+ Py_ssize_t nneeded; /* number of result bytes needed */
char stackbuf[MAX_SHORT_UNICHARS * 4];
assert(s != NULL);
@@ -1427,13 +1426,13 @@ encodeUCS4:
if (v == NULL) {
/* This was stack allocated. */
- nneeded = Py_SAFE_DOWNCAST(p - stackbuf, long, int);
+ nneeded = p - stackbuf;
assert(nneeded <= nallocated);
v = PyString_FromStringAndSize(stackbuf, nneeded);
}
else {
/* Cut back to size actually needed. */
- nneeded = Py_SAFE_DOWNCAST(p - PyString_AS_STRING(v), long, int);
+ nneeded = p - PyString_AS_STRING(v);
assert(nneeded <= nallocated);
_PyString_Resize(&v, nneeded);
}
@@ -1884,7 +1883,7 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
Py_DECREF(m);
if (api == NULL)
goto ucnhashError;
- ucnhash_CAPI = PyCObject_AsVoidPtr(api);
+ ucnhash_CAPI = (_PyUnicode_Name_CAPI *)PyCObject_AsVoidPtr(api);
Py_DECREF(api);
if (ucnhash_CAPI == NULL)
goto ucnhashError;
@@ -1934,7 +1933,7 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
nextByte:
;
}
- if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))) < 0)
+ if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0)
goto onError;
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
@@ -2003,7 +2002,7 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
#ifdef Py_UNICODE_WIDE
/* Map 21-bit characters to '\U00xxxxxx' */
else if (ch >= 0x10000) {
- int offset = p - PyString_AS_STRING(repr);
+ Py_ssize_t offset = p - PyString_AS_STRING(repr);
/* Resize the string if necessary */
if (offset + 12 > PyString_GET_SIZE(repr)) {
@@ -2205,7 +2204,7 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
nextByte:
;
}
- if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))) < 0)
+ if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0)
goto onError;
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
@@ -2348,7 +2347,7 @@ PyObject *_PyUnicode_DecodeUnicodeInternal(const char *s,
}
}
- if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))) < 0)
+ if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0)
goto onError;
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
@@ -2499,8 +2498,8 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
/* current output position */
Py_ssize_t respos = 0;
Py_ssize_t ressize;
- char *encoding = (limit == 256) ? "latin-1" : "ascii";
- char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)";
+ const char *encoding = (limit == 256) ? "latin-1" : "ascii";
+ const char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)";
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
/* the following variable is used for caching string comparisons
@@ -2723,7 +2722,7 @@ PyObject *PyUnicode_DecodeASCII(const char *s,
}
}
if (p - PyUnicode_AS_UNICODE(v) < PyString_GET_SIZE(v))
- if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))) < 0)
+ if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0)
goto onError;
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
@@ -2982,7 +2981,7 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
}
}
if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
- if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))) < 0)
+ if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0)
goto onError;
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
@@ -3336,9 +3335,9 @@ static PyObject *unicode_translate_call_errorhandler(const char *errors,
Py_ssize_t startpos, Py_ssize_t endpos,
Py_ssize_t *newpos)
{
- static char *argparse = "O!i;translating error handler must return (unicode, int) tuple";
+ static char *argparse = "O!n;translating error handler must return (unicode, int) tuple";
- int i_newpos;
+ Py_ssize_t i_newpos;
PyObject *restuple;
PyObject *resunicode;
@@ -3798,7 +3797,7 @@ Py_ssize_t count(PyUnicodeObject *self,
Py_ssize_t end,
PyUnicodeObject *substring)
{
- int count = 0;
+ Py_ssize_t count = 0;
if (start < 0)
start += self->length;
@@ -4157,7 +4156,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
PyObject *fseq; /* PySequence_Fast(seq) */
Py_ssize_t seqlen; /* len(fseq) -- number of items in sequence */
PyObject *item;
- int i;
+ Py_ssize_t i;
fseq = PySequence_Fast(seq, "");
if (fseq == NULL) {
@@ -4206,7 +4205,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
}
/* Get space. */
- res = _PyUnicode_New((int)res_alloc);
+ res = _PyUnicode_New(res_alloc);
if (res == NULL)
goto onError;
res_p = PyUnicode_AS_UNICODE(res);
@@ -4220,7 +4219,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
/* Convert item to Unicode. */
if (! PyUnicode_Check(item) && ! PyString_Check(item)) {
PyErr_Format(PyExc_TypeError,
- "sequence item %i: expected string or Unicode,"
+ "sequence item %zd: expected string or Unicode,"
" %.80s found",
i, item->ob_type->tp_name);
goto onError;
@@ -4236,11 +4235,11 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
/* Make sure we have enough space for the separator and the item. */
itemlen = PyUnicode_GET_SIZE(item);
new_res_used = res_used + itemlen;
- if (new_res_used < res_used || new_res_used > INT_MAX)
+ if (new_res_used < res_used || new_res_used > PY_SSIZE_T_MAX)
goto Overflow;
if (i < seqlen - 1) {
new_res_used += seplen;
- if (new_res_used < res_used || new_res_used > INT_MAX)
+ if (new_res_used < res_used || new_res_used > PY_SSIZE_T_MAX)
goto Overflow;
}
if (new_res_used > res_alloc) {
@@ -4248,10 +4247,10 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
do {
size_t oldsize = res_alloc;
res_alloc += res_alloc;
- if (res_alloc < oldsize || res_alloc > INT_MAX)
+ if (res_alloc < oldsize || res_alloc > PY_SSIZE_T_MAX)
goto Overflow;
} while (new_res_used > res_alloc);
- if (_PyUnicode_Resize(&res, (int)res_alloc) < 0) {
+ if (_PyUnicode_Resize(&res, res_alloc) < 0) {
Py_DECREF(item);
goto onError;
}
@@ -4259,10 +4258,10 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
}
/* Copy item, and maybe the separator. */
- Py_UNICODE_COPY(res_p, PyUnicode_AS_UNICODE(item), (int)itemlen);
+ Py_UNICODE_COPY(res_p, PyUnicode_AS_UNICODE(item), itemlen);
res_p += itemlen;
if (i < seqlen - 1) {
- Py_UNICODE_COPY(res_p, sep, (int)seplen);
+ Py_UNICODE_COPY(res_p, sep, seplen);
res_p += seplen;
}
Py_DECREF(item);
@@ -4272,7 +4271,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
/* Shrink res to match the used area; this probably can't fail,
* but it's cheap to check.
*/
- if (_PyUnicode_Resize(&res, (int)res_used) < 0)
+ if (_PyUnicode_Resize(&res, res_used) < 0)
goto onError;
Done:
@@ -4605,7 +4604,7 @@ PyObject *split(PyUnicodeObject *self,
PyObject *list;
if (maxcount < 0)
- maxcount = INT_MAX;
+ maxcount = PY_SSIZE_T_MAX;
list = PyList_New(0);
if (!list)
@@ -4634,7 +4633,7 @@ PyObject *rsplit(PyUnicodeObject *self,
PyObject *list;
if (maxcount < 0)
- maxcount = INT_MAX;
+ maxcount = PY_SSIZE_T_MAX;
list = PyList_New(0);
if (!list)
@@ -4664,10 +4663,10 @@ PyObject *replace(PyUnicodeObject *self,
PyUnicodeObject *u;
if (maxcount < 0)
- maxcount = INT_MAX;
+ maxcount = PY_SSIZE_T_MAX;
if (str1->length == 1 && str2->length == 1) {
- int i;
+ Py_ssize_t i;
/* replace characters */
if (!findchar(self->str, self->length, str1->str[0]) &&
@@ -5088,7 +5087,7 @@ unicode_count(PyUnicodeObject *self, PyObject *args)
{
PyUnicodeObject *substring;
Py_ssize_t start = 0;
- Py_ssize_t end = INT_MAX;
+ Py_ssize_t end = PY_SSIZE_T_MAX;
PyObject *result;
if (!PyArg_ParseTuple(args, "O|O&O&:count", &substring,
@@ -5265,7 +5264,7 @@ unicode_find(PyUnicodeObject *self, PyObject *args)
{
PyUnicodeObject *substring;
Py_ssize_t start = 0;
- Py_ssize_t end = INT_MAX;
+ Py_ssize_t end = PY_SSIZE_T_MAX;
PyObject *result;
if (!PyArg_ParseTuple(args, "O|O&O&:find", &substring,
@@ -5331,7 +5330,7 @@ unicode_index(PyUnicodeObject *self, PyObject *args)
Py_ssize_t result;
PyUnicodeObject *substring;
Py_ssize_t start = 0;
- Py_ssize_t end = INT_MAX;
+ Py_ssize_t end = PY_SSIZE_T_MAX;
if (!PyArg_ParseTuple(args, "O|O&O&:index", &substring,
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
@@ -5669,10 +5668,10 @@ done using the specified fill character (default is a space).");
static PyObject *
unicode_ljust(PyUnicodeObject *self, PyObject *args)
{
- int width;
+ Py_ssize_t width;
Py_UNICODE fillchar = ' ';
- if (!PyArg_ParseTuple(args, "i|O&:ljust", &width, convert_uc, &fillchar))
+ if (!PyArg_ParseTuple(args, "n|O&:ljust", &width, convert_uc, &fillchar))
return NULL;
if (self->length >= width && PyUnicode_CheckExact(self)) {
@@ -5996,7 +5995,7 @@ unicode_rfind(PyUnicodeObject *self, PyObject *args)
{
PyUnicodeObject *substring;
Py_ssize_t start = 0;
- Py_ssize_t end = INT_MAX;
+ Py_ssize_t end = PY_SSIZE_T_MAX;
PyObject *result;
if (!PyArg_ParseTuple(args, "O|O&O&:rfind", &substring,
@@ -6024,7 +6023,7 @@ unicode_rindex(PyUnicodeObject *self, PyObject *args)
Py_ssize_t result;
PyUnicodeObject *substring;
Py_ssize_t start = 0;
- Py_ssize_t end = INT_MAX;
+ Py_ssize_t end = PY_SSIZE_T_MAX;
if (!PyArg_ParseTuple(args, "O|O&O&:rindex", &substring,
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
@@ -6053,10 +6052,10 @@ done using the specified fill character (default is a space).");
static PyObject *
unicode_rjust(PyUnicodeObject *self, PyObject *args)
{
- int width;
+ Py_ssize_t width;
Py_UNICODE fillchar = ' ';
- if (!PyArg_ParseTuple(args, "i|O&:rjust", &width, convert_uc, &fillchar))
+ if (!PyArg_ParseTuple(args, "n|O&:rjust", &width, convert_uc, &fillchar))
return NULL;
if (self->length >= width && PyUnicode_CheckExact(self)) {
@@ -6318,7 +6317,7 @@ unicode_startswith(PyUnicodeObject *self,
{
PyUnicodeObject *substring;
Py_ssize_t start = 0;
- Py_ssize_t end = INT_MAX;
+ Py_ssize_t end = PY_SSIZE_T_MAX;
PyObject *result;
if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &substring,
@@ -6349,7 +6348,7 @@ unicode_endswith(PyUnicodeObject *self,
{
PyUnicodeObject *substring;
Py_ssize_t start = 0;
- Py_ssize_t end = INT_MAX;
+ Py_ssize_t end = PY_SSIZE_T_MAX;
PyObject *result;
if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &substring,
@@ -6450,13 +6449,13 @@ static PyNumberMethods unicode_as_number = {
static PySequenceMethods unicode_as_sequence = {
(lenfunc) unicode_length, /* sq_length */
- (binaryfunc) PyUnicode_Concat, /* sq_concat */
+ PyUnicode_Concat, /* sq_concat */
(ssizeargfunc) unicode_repeat, /* sq_repeat */
(ssizeargfunc) unicode_getitem, /* sq_item */
(ssizessizeargfunc) unicode_slice, /* sq_slice */
0, /* sq_ass_item */
0, /* sq_ass_slice */
- (objobjproc)PyUnicode_Contains, /*sq_contains*/
+ PyUnicode_Contains, /* sq_contains */
};
#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
@@ -6487,7 +6486,8 @@ unicode_subscript(PyUnicodeObject* self, PyObject* item)
return PyUnicode_FromUnicode(NULL, 0);
} else {
source_buf = PyUnicode_AS_UNICODE((PyObject*)self);
- result_buf = PyMem_MALLOC(slicelength*sizeof(Py_UNICODE));
+ result_buf = (Py_UNICODE *)PyMem_MALLOC(slicelength*
+ sizeof(Py_UNICODE));
if (result_buf == NULL)
return PyErr_NoMemory();
@@ -7336,7 +7336,7 @@ PyTypeObject PyUnicode_Type = {
0, /* tp_getattr */
0, /* tp_setattr */
(cmpfunc) unicode_compare, /* tp_compare */
- (reprfunc) unicode_repr, /* tp_repr */
+ unicode_repr, /* tp_repr */
&unicode_as_number, /* tp_as_number */
&unicode_as_sequence, /* tp_as_sequence */
&unicode_as_mapping, /* tp_as_mapping */
@@ -7416,6 +7416,11 @@ _PyUnicode_Fini(void)
unicode_freelist_size = 0;
}
+#ifdef __cplusplus
+}
+#endif
+
+
/*
Local variables:
c-basic-offset: 4