summaryrefslogtreecommitdiffstats
path: root/Objects/stringlib
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/stringlib')
-rw-r--r--Objects/stringlib/README.txt2
-rw-r--r--Objects/stringlib/asciilib.h29
-rw-r--r--Objects/stringlib/clinic/transmogrify.h.h277
-rw-r--r--Objects/stringlib/codecs.h822
-rw-r--r--Objects/stringlib/count.h9
-rw-r--r--Objects/stringlib/ctype.h35
-rw-r--r--Objects/stringlib/eq.h24
-rw-r--r--Objects/stringlib/fastsearch.h155
-rw-r--r--Objects/stringlib/find.h84
-rw-r--r--Objects/stringlib/find_max_char.h134
-rw-r--r--Objects/stringlib/formatter.h1547
-rw-r--r--Objects/stringlib/join.h140
-rw-r--r--Objects/stringlib/localeutil.h198
-rw-r--r--Objects/stringlib/partition.h22
-rw-r--r--Objects/stringlib/replace.h53
-rw-r--r--Objects/stringlib/split.h30
-rw-r--r--Objects/stringlib/string_format.h (renamed from Objects/stringlib/unicode_format.h)578
-rw-r--r--Objects/stringlib/stringdefs.h25
-rw-r--r--Objects/stringlib/transmogrify.h720
-rw-r--r--Objects/stringlib/ucs1lib.h30
-rw-r--r--Objects/stringlib/ucs2lib.h29
-rw-r--r--Objects/stringlib/ucs4lib.h29
-rw-r--r--Objects/stringlib/undef.h11
-rw-r--r--Objects/stringlib/unicodedefs.h13
24 files changed, 2313 insertions, 2683 deletions
diff --git a/Objects/stringlib/README.txt b/Objects/stringlib/README.txt
index 8ff6ad8..ab506d6 100644
--- a/Objects/stringlib/README.txt
+++ b/Objects/stringlib/README.txt
@@ -1,4 +1,4 @@
-bits shared by the bytesobject and unicodeobject implementations (and
+bits shared by the stringobject and unicodeobject implementations (and
possibly other modules, in a not too distant future).
the stuff in here is included into relevant places; see the individual
diff --git a/Objects/stringlib/asciilib.h b/Objects/stringlib/asciilib.h
deleted file mode 100644
index d0fc18d..0000000
--- a/Objects/stringlib/asciilib.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* this is sort of a hack. there's at least one place (formatting
- floats) where some stringlib code takes a different path if it's
- compiled as unicode. */
-#define STRINGLIB_IS_UNICODE 1
-
-#define FASTSEARCH asciilib_fastsearch
-#define STRINGLIB(F) asciilib_##F
-#define STRINGLIB_OBJECT PyUnicodeObject
-#define STRINGLIB_SIZEOF_CHAR 1
-#define STRINGLIB_MAX_CHAR 0x7Fu
-#define STRINGLIB_CHAR Py_UCS1
-#define STRINGLIB_TYPE_NAME "unicode"
-#define STRINGLIB_PARSE_CODE "U"
-#define STRINGLIB_EMPTY unicode_empty
-#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
-#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
-#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL
-#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL
-#define STRINGLIB_STR PyUnicode_1BYTE_DATA
-#define STRINGLIB_LEN PyUnicode_GET_LENGTH
-#define STRINGLIB_NEW(STR,LEN) _PyUnicode_FromASCII((char*)(STR),(LEN))
-#define STRINGLIB_CHECK PyUnicode_Check
-#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
-
-#define STRINGLIB_TOSTR PyObject_Str
-#define STRINGLIB_TOASCII PyObject_ASCII
-
-#define _Py_InsertThousandsGrouping _PyUnicode_ascii_InsertThousandsGrouping
-
diff --git a/Objects/stringlib/clinic/transmogrify.h.h b/Objects/stringlib/clinic/transmogrify.h.h
deleted file mode 100644
index 8a3a060..0000000
--- a/Objects/stringlib/clinic/transmogrify.h.h
+++ /dev/null
@@ -1,277 +0,0 @@
-/*[clinic input]
-preserve
-[clinic start generated code]*/
-
-PyDoc_STRVAR(stringlib_expandtabs__doc__,
-"expandtabs($self, /, tabsize=8)\n"
-"--\n"
-"\n"
-"Return a copy where all tab characters are expanded using spaces.\n"
-"\n"
-"If tabsize is not given, a tab size of 8 characters is assumed.");
-
-#define STRINGLIB_EXPANDTABS_METHODDEF \
- {"expandtabs", (PyCFunction)(void(*)(void))stringlib_expandtabs, METH_FASTCALL|METH_KEYWORDS, stringlib_expandtabs__doc__},
-
-static PyObject *
-stringlib_expandtabs_impl(PyObject *self, int tabsize);
-
-static PyObject *
-stringlib_expandtabs(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
-{
- PyObject *return_value = NULL;
- static const char * const _keywords[] = {"tabsize", NULL};
- static _PyArg_Parser _parser = {NULL, _keywords, "expandtabs", 0};
- PyObject *argsbuf[1];
- Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0;
- int tabsize = 8;
-
- args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 1, 0, argsbuf);
- if (!args) {
- goto exit;
- }
- if (!noptargs) {
- goto skip_optional_pos;
- }
- if (PyFloat_Check(args[0])) {
- PyErr_SetString(PyExc_TypeError,
- "integer argument expected, got float" );
- goto exit;
- }
- tabsize = _PyLong_AsInt(args[0]);
- if (tabsize == -1 && PyErr_Occurred()) {
- goto exit;
- }
-skip_optional_pos:
- return_value = stringlib_expandtabs_impl(self, tabsize);
-
-exit:
- return return_value;
-}
-
-PyDoc_STRVAR(stringlib_ljust__doc__,
-"ljust($self, width, fillchar=b\' \', /)\n"
-"--\n"
-"\n"
-"Return a left-justified string of length width.\n"
-"\n"
-"Padding is done using the specified fill character.");
-
-#define STRINGLIB_LJUST_METHODDEF \
- {"ljust", (PyCFunction)(void(*)(void))stringlib_ljust, METH_FASTCALL, stringlib_ljust__doc__},
-
-static PyObject *
-stringlib_ljust_impl(PyObject *self, Py_ssize_t width, char fillchar);
-
-static PyObject *
-stringlib_ljust(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
-{
- PyObject *return_value = NULL;
- Py_ssize_t width;
- char fillchar = ' ';
-
- if (!_PyArg_CheckPositional("ljust", nargs, 1, 2)) {
- goto exit;
- }
- if (PyFloat_Check(args[0])) {
- PyErr_SetString(PyExc_TypeError,
- "integer argument expected, got float" );
- goto exit;
- }
- {
- Py_ssize_t ival = -1;
- PyObject *iobj = PyNumber_Index(args[0]);
- if (iobj != NULL) {
- ival = PyLong_AsSsize_t(iobj);
- Py_DECREF(iobj);
- }
- if (ival == -1 && PyErr_Occurred()) {
- goto exit;
- }
- width = ival;
- }
- if (nargs < 2) {
- goto skip_optional;
- }
- if (PyBytes_Check(args[1]) && PyBytes_GET_SIZE(args[1]) == 1) {
- fillchar = PyBytes_AS_STRING(args[1])[0];
- }
- else if (PyByteArray_Check(args[1]) && PyByteArray_GET_SIZE(args[1]) == 1) {
- fillchar = PyByteArray_AS_STRING(args[1])[0];
- }
- else {
- _PyArg_BadArgument("ljust", "argument 2", "a byte string of length 1", args[1]);
- goto exit;
- }
-skip_optional:
- return_value = stringlib_ljust_impl(self, width, fillchar);
-
-exit:
- return return_value;
-}
-
-PyDoc_STRVAR(stringlib_rjust__doc__,
-"rjust($self, width, fillchar=b\' \', /)\n"
-"--\n"
-"\n"
-"Return a right-justified string of length width.\n"
-"\n"
-"Padding is done using the specified fill character.");
-
-#define STRINGLIB_RJUST_METHODDEF \
- {"rjust", (PyCFunction)(void(*)(void))stringlib_rjust, METH_FASTCALL, stringlib_rjust__doc__},
-
-static PyObject *
-stringlib_rjust_impl(PyObject *self, Py_ssize_t width, char fillchar);
-
-static PyObject *
-stringlib_rjust(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
-{
- PyObject *return_value = NULL;
- Py_ssize_t width;
- char fillchar = ' ';
-
- if (!_PyArg_CheckPositional("rjust", nargs, 1, 2)) {
- goto exit;
- }
- if (PyFloat_Check(args[0])) {
- PyErr_SetString(PyExc_TypeError,
- "integer argument expected, got float" );
- goto exit;
- }
- {
- Py_ssize_t ival = -1;
- PyObject *iobj = PyNumber_Index(args[0]);
- if (iobj != NULL) {
- ival = PyLong_AsSsize_t(iobj);
- Py_DECREF(iobj);
- }
- if (ival == -1 && PyErr_Occurred()) {
- goto exit;
- }
- width = ival;
- }
- if (nargs < 2) {
- goto skip_optional;
- }
- if (PyBytes_Check(args[1]) && PyBytes_GET_SIZE(args[1]) == 1) {
- fillchar = PyBytes_AS_STRING(args[1])[0];
- }
- else if (PyByteArray_Check(args[1]) && PyByteArray_GET_SIZE(args[1]) == 1) {
- fillchar = PyByteArray_AS_STRING(args[1])[0];
- }
- else {
- _PyArg_BadArgument("rjust", "argument 2", "a byte string of length 1", args[1]);
- goto exit;
- }
-skip_optional:
- return_value = stringlib_rjust_impl(self, width, fillchar);
-
-exit:
- return return_value;
-}
-
-PyDoc_STRVAR(stringlib_center__doc__,
-"center($self, width, fillchar=b\' \', /)\n"
-"--\n"
-"\n"
-"Return a centered string of length width.\n"
-"\n"
-"Padding is done using the specified fill character.");
-
-#define STRINGLIB_CENTER_METHODDEF \
- {"center", (PyCFunction)(void(*)(void))stringlib_center, METH_FASTCALL, stringlib_center__doc__},
-
-static PyObject *
-stringlib_center_impl(PyObject *self, Py_ssize_t width, char fillchar);
-
-static PyObject *
-stringlib_center(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
-{
- PyObject *return_value = NULL;
- Py_ssize_t width;
- char fillchar = ' ';
-
- if (!_PyArg_CheckPositional("center", nargs, 1, 2)) {
- goto exit;
- }
- if (PyFloat_Check(args[0])) {
- PyErr_SetString(PyExc_TypeError,
- "integer argument expected, got float" );
- goto exit;
- }
- {
- Py_ssize_t ival = -1;
- PyObject *iobj = PyNumber_Index(args[0]);
- if (iobj != NULL) {
- ival = PyLong_AsSsize_t(iobj);
- Py_DECREF(iobj);
- }
- if (ival == -1 && PyErr_Occurred()) {
- goto exit;
- }
- width = ival;
- }
- if (nargs < 2) {
- goto skip_optional;
- }
- if (PyBytes_Check(args[1]) && PyBytes_GET_SIZE(args[1]) == 1) {
- fillchar = PyBytes_AS_STRING(args[1])[0];
- }
- else if (PyByteArray_Check(args[1]) && PyByteArray_GET_SIZE(args[1]) == 1) {
- fillchar = PyByteArray_AS_STRING(args[1])[0];
- }
- else {
- _PyArg_BadArgument("center", "argument 2", "a byte string of length 1", args[1]);
- goto exit;
- }
-skip_optional:
- return_value = stringlib_center_impl(self, width, fillchar);
-
-exit:
- return return_value;
-}
-
-PyDoc_STRVAR(stringlib_zfill__doc__,
-"zfill($self, width, /)\n"
-"--\n"
-"\n"
-"Pad a numeric string with zeros on the left, to fill a field of the given width.\n"
-"\n"
-"The original string is never truncated.");
-
-#define STRINGLIB_ZFILL_METHODDEF \
- {"zfill", (PyCFunction)stringlib_zfill, METH_O, stringlib_zfill__doc__},
-
-static PyObject *
-stringlib_zfill_impl(PyObject *self, Py_ssize_t width);
-
-static PyObject *
-stringlib_zfill(PyObject *self, PyObject *arg)
-{
- PyObject *return_value = NULL;
- Py_ssize_t width;
-
- if (PyFloat_Check(arg)) {
- PyErr_SetString(PyExc_TypeError,
- "integer argument expected, got float" );
- goto exit;
- }
- {
- Py_ssize_t ival = -1;
- PyObject *iobj = PyNumber_Index(arg);
- if (iobj != NULL) {
- ival = PyLong_AsSsize_t(iobj);
- Py_DECREF(iobj);
- }
- if (ival == -1 && PyErr_Occurred()) {
- goto exit;
- }
- width = ival;
- }
- return_value = stringlib_zfill_impl(self, width);
-
-exit:
- return return_value;
-}
-/*[clinic end generated code: output=15be047aef999b4e input=a9049054013a1b77]*/
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h
deleted file mode 100644
index d6f2b98..0000000
--- a/Objects/stringlib/codecs.h
+++ /dev/null
@@ -1,822 +0,0 @@
-/* stringlib: codec implementations */
-
-#if !STRINGLIB_IS_UNICODE
-# error "codecs.h is specific to Unicode"
-#endif
-
-/* Mask to quickly check whether a C 'long' contains a
- non-ASCII, UTF8-encoded char. */
-#if (SIZEOF_LONG == 8)
-# define ASCII_CHAR_MASK 0x8080808080808080UL
-#elif (SIZEOF_LONG == 4)
-# define ASCII_CHAR_MASK 0x80808080UL
-#else
-# error C 'long' size should be either 4 or 8!
-#endif
-
-/* 10xxxxxx */
-#define IS_CONTINUATION_BYTE(ch) ((ch) >= 0x80 && (ch) < 0xC0)
-
-Py_LOCAL_INLINE(Py_UCS4)
-STRINGLIB(utf8_decode)(const char **inptr, const char *end,
- STRINGLIB_CHAR *dest,
- Py_ssize_t *outpos)
-{
- Py_UCS4 ch;
- const char *s = *inptr;
- const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
- STRINGLIB_CHAR *p = dest + *outpos;
-
- while (s < end) {
- ch = (unsigned char)*s;
-
- if (ch < 0x80) {
- /* Fast path for runs of ASCII characters. Given that common UTF-8
- input will consist of an overwhelming majority of ASCII
- characters, we try to optimize for this case by checking
- as many characters as a C 'long' can contain.
- First, check if we can do an aligned read, as most CPUs have
- a penalty for unaligned reads.
- */
- if (_Py_IS_ALIGNED(s, SIZEOF_LONG)) {
- /* Help register allocation */
- const char *_s = s;
- STRINGLIB_CHAR *_p = p;
- while (_s < aligned_end) {
- /* Read a whole long at a time (either 4 or 8 bytes),
- and do a fast unrolled copy if it only contains ASCII
- characters. */
- unsigned long value = *(unsigned long *) _s;
- if (value & ASCII_CHAR_MASK)
- break;
-#if PY_LITTLE_ENDIAN
- _p[0] = (STRINGLIB_CHAR)(value & 0xFFu);
- _p[1] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);
- _p[2] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);
- _p[3] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);
-# if SIZEOF_LONG == 8
- _p[4] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu);
- _p[5] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);
- _p[6] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);
- _p[7] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);
-# endif
-#else
-# if SIZEOF_LONG == 8
- _p[0] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);
- _p[1] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);
- _p[2] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);
- _p[3] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu);
- _p[4] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);
- _p[5] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);
- _p[6] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);
- _p[7] = (STRINGLIB_CHAR)(value & 0xFFu);
-# else
- _p[0] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);
- _p[1] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);
- _p[2] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);
- _p[3] = (STRINGLIB_CHAR)(value & 0xFFu);
-# endif
-#endif
- _s += SIZEOF_LONG;
- _p += SIZEOF_LONG;
- }
- s = _s;
- p = _p;
- if (s == end)
- break;
- ch = (unsigned char)*s;
- }
- if (ch < 0x80) {
- s++;
- *p++ = ch;
- continue;
- }
- }
-
- if (ch < 0xE0) {
- /* \xC2\x80-\xDF\xBF -- 0080-07FF */
- Py_UCS4 ch2;
- if (ch < 0xC2) {
- /* invalid sequence
- \x80-\xBF -- continuation byte
- \xC0-\xC1 -- fake 0000-007F */
- goto InvalidStart;
- }
- if (end - s < 2) {
- /* unexpected end of data: the caller will decide whether
- it's an error or not */
- break;
- }
- ch2 = (unsigned char)s[1];
- if (!IS_CONTINUATION_BYTE(ch2))
- /* invalid continuation byte */
- goto InvalidContinuation1;
- ch = (ch << 6) + ch2 -
- ((0xC0 << 6) + 0x80);
- assert ((ch > 0x007F) && (ch <= 0x07FF));
- s += 2;
- if (STRINGLIB_MAX_CHAR <= 0x007F ||
- (STRINGLIB_MAX_CHAR < 0x07FF && ch > STRINGLIB_MAX_CHAR))
- /* Out-of-range */
- goto Return;
- *p++ = ch;
- continue;
- }
-
- if (ch < 0xF0) {
- /* \xE0\xA0\x80-\xEF\xBF\xBF -- 0800-FFFF */
- Py_UCS4 ch2, ch3;
- if (end - s < 3) {
- /* unexpected end of data: the caller will decide whether
- it's an error or not */
- if (end - s < 2)
- break;
- ch2 = (unsigned char)s[1];
- if (!IS_CONTINUATION_BYTE(ch2) ||
- (ch2 < 0xA0 ? ch == 0xE0 : ch == 0xED))
- /* for clarification see comments below */
- goto InvalidContinuation1;
- break;
- }
- ch2 = (unsigned char)s[1];
- ch3 = (unsigned char)s[2];
- if (!IS_CONTINUATION_BYTE(ch2)) {
- /* invalid continuation byte */
- goto InvalidContinuation1;
- }
- if (ch == 0xE0) {
- if (ch2 < 0xA0)
- /* invalid sequence
- \xE0\x80\x80-\xE0\x9F\xBF -- fake 0000-0800 */
- goto InvalidContinuation1;
- } else if (ch == 0xED && ch2 >= 0xA0) {
- /* Decoding UTF-8 sequences in range \xED\xA0\x80-\xED\xBF\xBF
- will result in surrogates in range D800-DFFF. Surrogates are
- not valid UTF-8 so they are rejected.
- See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf
- (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */
- goto InvalidContinuation1;
- }
- if (!IS_CONTINUATION_BYTE(ch3)) {
- /* invalid continuation byte */
- goto InvalidContinuation2;
- }
- ch = (ch << 12) + (ch2 << 6) + ch3 -
- ((0xE0 << 12) + (0x80 << 6) + 0x80);
- assert ((ch > 0x07FF) && (ch <= 0xFFFF));
- s += 3;
- if (STRINGLIB_MAX_CHAR <= 0x07FF ||
- (STRINGLIB_MAX_CHAR < 0xFFFF && ch > STRINGLIB_MAX_CHAR))
- /* Out-of-range */
- goto Return;
- *p++ = ch;
- continue;
- }
-
- if (ch < 0xF5) {
- /* \xF0\x90\x80\x80-\xF4\x8F\xBF\xBF -- 10000-10FFFF */
- Py_UCS4 ch2, ch3, ch4;
- if (end - s < 4) {
- /* unexpected end of data: the caller will decide whether
- it's an error or not */
- if (end - s < 2)
- break;
- ch2 = (unsigned char)s[1];
- if (!IS_CONTINUATION_BYTE(ch2) ||
- (ch2 < 0x90 ? ch == 0xF0 : ch == 0xF4))
- /* for clarification see comments below */
- goto InvalidContinuation1;
- if (end - s < 3)
- break;
- ch3 = (unsigned char)s[2];
- if (!IS_CONTINUATION_BYTE(ch3))
- goto InvalidContinuation2;
- break;
- }
- ch2 = (unsigned char)s[1];
- ch3 = (unsigned char)s[2];
- ch4 = (unsigned char)s[3];
- if (!IS_CONTINUATION_BYTE(ch2)) {
- /* invalid continuation byte */
- goto InvalidContinuation1;
- }
- if (ch == 0xF0) {
- if (ch2 < 0x90)
- /* invalid sequence
- \xF0\x80\x80\x80-\xF0\x8F\xBF\xBF -- fake 0000-FFFF */
- goto InvalidContinuation1;
- } else if (ch == 0xF4 && ch2 >= 0x90) {
- /* invalid sequence
- \xF4\x90\x80\x80- -- 110000- overflow */
- goto InvalidContinuation1;
- }
- if (!IS_CONTINUATION_BYTE(ch3)) {
- /* invalid continuation byte */
- goto InvalidContinuation2;
- }
- if (!IS_CONTINUATION_BYTE(ch4)) {
- /* invalid continuation byte */
- goto InvalidContinuation3;
- }
- ch = (ch << 18) + (ch2 << 12) + (ch3 << 6) + ch4 -
- ((0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80);
- assert ((ch > 0xFFFF) && (ch <= 0x10FFFF));
- s += 4;
- if (STRINGLIB_MAX_CHAR <= 0xFFFF ||
- (STRINGLIB_MAX_CHAR < 0x10FFFF && ch > STRINGLIB_MAX_CHAR))
- /* Out-of-range */
- goto Return;
- *p++ = ch;
- continue;
- }
- goto InvalidStart;
- }
- ch = 0;
-Return:
- *inptr = s;
- *outpos = p - dest;
- return ch;
-InvalidStart:
- ch = 1;
- goto Return;
-InvalidContinuation1:
- ch = 2;
- goto Return;
-InvalidContinuation2:
- ch = 3;
- goto Return;
-InvalidContinuation3:
- ch = 4;
- goto Return;
-}
-
-#undef ASCII_CHAR_MASK
-
-
-/* UTF-8 encoder specialized for a Unicode kind to avoid the slow
- PyUnicode_READ() macro. Delete some parts of the code depending on the kind:
- UCS-1 strings don't need to handle surrogates for example. */
-Py_LOCAL_INLINE(PyObject *)
-STRINGLIB(utf8_encoder)(PyObject *unicode,
- STRINGLIB_CHAR *data,
- Py_ssize_t size,
- _Py_error_handler error_handler,
- const char *errors)
-{
- Py_ssize_t i; /* index into data of next input character */
- char *p; /* next free byte in output buffer */
-#if STRINGLIB_SIZEOF_CHAR > 1
- PyObject *error_handler_obj = NULL;
- PyObject *exc = NULL;
- PyObject *rep = NULL;
-#endif
-#if STRINGLIB_SIZEOF_CHAR == 1
- const Py_ssize_t max_char_size = 2;
-#elif STRINGLIB_SIZEOF_CHAR == 2
- const Py_ssize_t max_char_size = 3;
-#else /* STRINGLIB_SIZEOF_CHAR == 4 */
- const Py_ssize_t max_char_size = 4;
-#endif
- _PyBytesWriter writer;
-
- assert(size >= 0);
- _PyBytesWriter_Init(&writer);
-
- if (size > PY_SSIZE_T_MAX / max_char_size) {
- /* integer overflow */
- return PyErr_NoMemory();
- }
-
- p = _PyBytesWriter_Alloc(&writer, size * max_char_size);
- if (p == NULL)
- return NULL;
-
- for (i = 0; i < size;) {
- Py_UCS4 ch = data[i++];
-
- if (ch < 0x80) {
- /* Encode ASCII */
- *p++ = (char) ch;
-
- }
- else
-#if STRINGLIB_SIZEOF_CHAR > 1
- if (ch < 0x0800)
-#endif
- {
- /* Encode Latin-1 */
- *p++ = (char)(0xc0 | (ch >> 6));
- *p++ = (char)(0x80 | (ch & 0x3f));
- }
-#if STRINGLIB_SIZEOF_CHAR > 1
- else if (Py_UNICODE_IS_SURROGATE(ch)) {
- Py_ssize_t startpos, endpos, newpos;
- Py_ssize_t k;
- if (error_handler == _Py_ERROR_UNKNOWN) {
- error_handler = _Py_GetErrorHandler(errors);
- }
-
- startpos = i-1;
- endpos = startpos+1;
-
- while ((endpos < size) && Py_UNICODE_IS_SURROGATE(data[endpos]))
- endpos++;
-
- /* Only overallocate the buffer if it's not the last write */
- writer.overallocate = (endpos < size);
-
- switch (error_handler)
- {
- case _Py_ERROR_REPLACE:
- memset(p, '?', endpos - startpos);
- p += (endpos - startpos);
- /* fall through */
- case _Py_ERROR_IGNORE:
- i += (endpos - startpos - 1);
- break;
-
- case _Py_ERROR_SURROGATEPASS:
- for (k=startpos; k<endpos; k++) {
- ch = data[k];
- *p++ = (char)(0xe0 | (ch >> 12));
- *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
- *p++ = (char)(0x80 | (ch & 0x3f));
- }
- i += (endpos - startpos - 1);
- break;
-
- case _Py_ERROR_BACKSLASHREPLACE:
- /* subtract preallocated bytes */
- writer.min_size -= max_char_size * (endpos - startpos);
- p = backslashreplace(&writer, p,
- unicode, startpos, endpos);
- if (p == NULL)
- goto error;
- i += (endpos - startpos - 1);
- break;
-
- case _Py_ERROR_XMLCHARREFREPLACE:
- /* subtract preallocated bytes */
- writer.min_size -= max_char_size * (endpos - startpos);
- p = xmlcharrefreplace(&writer, p,
- unicode, startpos, endpos);
- if (p == NULL)
- goto error;
- i += (endpos - startpos - 1);
- break;
-
- case _Py_ERROR_SURROGATEESCAPE:
- for (k=startpos; k<endpos; k++) {
- ch = data[k];
- if (!(0xDC80 <= ch && ch <= 0xDCFF))
- break;
- *p++ = (char)(ch & 0xff);
- }
- if (k >= endpos) {
- i += (endpos - startpos - 1);
- break;
- }
- startpos = k;
- assert(startpos < endpos);
- /* fall through */
- default:
- rep = unicode_encode_call_errorhandler(
- errors, &error_handler_obj, "utf-8", "surrogates not allowed",
- unicode, &exc, startpos, endpos, &newpos);
- if (!rep)
- goto error;
-
- /* subtract preallocated bytes */
- writer.min_size -= max_char_size * (newpos - startpos);
-
- if (PyBytes_Check(rep)) {
- p = _PyBytesWriter_WriteBytes(&writer, p,
- PyBytes_AS_STRING(rep),
- PyBytes_GET_SIZE(rep));
- }
- else {
- /* rep is unicode */
- if (PyUnicode_READY(rep) < 0)
- goto error;
-
- if (!PyUnicode_IS_ASCII(rep)) {
- raise_encode_exception(&exc, "utf-8", unicode,
- startpos, endpos,
- "surrogates not allowed");
- goto error;
- }
-
- p = _PyBytesWriter_WriteBytes(&writer, p,
- PyUnicode_DATA(rep),
- PyUnicode_GET_LENGTH(rep));
- }
-
- if (p == NULL)
- goto error;
- Py_CLEAR(rep);
-
- i = newpos;
- }
-
- /* If overallocation was disabled, ensure that it was the last
- write. Otherwise, we missed an optimization */
- assert(writer.overallocate || i == size);
- }
- else
-#if STRINGLIB_SIZEOF_CHAR > 2
- if (ch < 0x10000)
-#endif
- {
- *p++ = (char)(0xe0 | (ch >> 12));
- *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
- *p++ = (char)(0x80 | (ch & 0x3f));
- }
-#if STRINGLIB_SIZEOF_CHAR > 2
- else /* ch >= 0x10000 */
- {
- assert(ch <= MAX_UNICODE);
- /* Encode UCS4 Unicode ordinals */
- *p++ = (char)(0xf0 | (ch >> 18));
- *p++ = (char)(0x80 | ((ch >> 12) & 0x3f));
- *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
- *p++ = (char)(0x80 | (ch & 0x3f));
- }
-#endif /* STRINGLIB_SIZEOF_CHAR > 2 */
-#endif /* STRINGLIB_SIZEOF_CHAR > 1 */
- }
-
-#if STRINGLIB_SIZEOF_CHAR > 1
- Py_XDECREF(error_handler_obj);
- Py_XDECREF(exc);
-#endif
- return _PyBytesWriter_Finish(&writer, p);
-
-#if STRINGLIB_SIZEOF_CHAR > 1
- error:
- Py_XDECREF(rep);
- Py_XDECREF(error_handler_obj);
- Py_XDECREF(exc);
- _PyBytesWriter_Dealloc(&writer);
- return NULL;
-#endif
-}
-
-/* The pattern for constructing UCS2-repeated masks. */
-#if SIZEOF_LONG == 8
-# define UCS2_REPEAT_MASK 0x0001000100010001ul
-#elif SIZEOF_LONG == 4
-# define UCS2_REPEAT_MASK 0x00010001ul
-#else
-# error C 'long' size should be either 4 or 8!
-#endif
-
-/* The mask for fast checking. */
-#if STRINGLIB_SIZEOF_CHAR == 1
-/* The mask for fast checking of whether a C 'long' contains a
- non-ASCII or non-Latin1 UTF16-encoded characters. */
-# define FAST_CHAR_MASK (UCS2_REPEAT_MASK * (0xFFFFu & ~STRINGLIB_MAX_CHAR))
-#else
-/* The mask for fast checking of whether a C 'long' may contain
- UTF16-encoded surrogate characters. This is an efficient heuristic,
- assuming that non-surrogate characters with a code point >= 0x8000 are
- rare in most input.
-*/
-# define FAST_CHAR_MASK (UCS2_REPEAT_MASK * 0x8000u)
-#endif
-/* The mask for fast byte-swapping. */
-#define STRIPPED_MASK (UCS2_REPEAT_MASK * 0x00FFu)
-/* Swap bytes. */
-#define SWAB(value) ((((value) >> 8) & STRIPPED_MASK) | \
- (((value) & STRIPPED_MASK) << 8))
-
-Py_LOCAL_INLINE(Py_UCS4)
-STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
- STRINGLIB_CHAR *dest, Py_ssize_t *outpos,
- int native_ordering)
-{
- Py_UCS4 ch;
- const unsigned char *aligned_end =
- (const unsigned char *) _Py_ALIGN_DOWN(e, SIZEOF_LONG);
- const unsigned char *q = *inptr;
- STRINGLIB_CHAR *p = dest + *outpos;
- /* Offsets from q for retrieving byte pairs in the right order. */
-#if PY_LITTLE_ENDIAN
- int ihi = !!native_ordering, ilo = !native_ordering;
-#else
- int ihi = !native_ordering, ilo = !!native_ordering;
-#endif
- --e;
-
- while (q < e) {
- Py_UCS4 ch2;
- /* First check for possible aligned read of a C 'long'. Unaligned
- reads are more expensive, better to defer to another iteration. */
- if (_Py_IS_ALIGNED(q, SIZEOF_LONG)) {
- /* Fast path for runs of in-range non-surrogate chars. */
- const unsigned char *_q = q;
- while (_q < aligned_end) {
- unsigned long block = * (unsigned long *) _q;
- if (native_ordering) {
- /* Can use buffer directly */
- if (block & FAST_CHAR_MASK)
- break;
- }
- else {
- /* Need to byte-swap */
- if (block & SWAB(FAST_CHAR_MASK))
- break;
-#if STRINGLIB_SIZEOF_CHAR == 1
- block >>= 8;
-#else
- block = SWAB(block);
-#endif
- }
-#if PY_LITTLE_ENDIAN
-# if SIZEOF_LONG == 4
- p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);
- p[1] = (STRINGLIB_CHAR)(block >> 16);
-# elif SIZEOF_LONG == 8
- p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);
- p[1] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);
- p[2] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);
- p[3] = (STRINGLIB_CHAR)(block >> 48);
-# endif
-#else
-# if SIZEOF_LONG == 4
- p[0] = (STRINGLIB_CHAR)(block >> 16);
- p[1] = (STRINGLIB_CHAR)(block & 0xFFFFu);
-# elif SIZEOF_LONG == 8
- p[0] = (STRINGLIB_CHAR)(block >> 48);
- p[1] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);
- p[2] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);
- p[3] = (STRINGLIB_CHAR)(block & 0xFFFFu);
-# endif
-#endif
- _q += SIZEOF_LONG;
- p += SIZEOF_LONG / 2;
- }
- q = _q;
- if (q >= e)
- break;
- }
-
- ch = (q[ihi] << 8) | q[ilo];
- q += 2;
- if (!Py_UNICODE_IS_SURROGATE(ch)) {
-#if STRINGLIB_SIZEOF_CHAR < 2
- if (ch > STRINGLIB_MAX_CHAR)
- /* Out-of-range */
- goto Return;
-#endif
- *p++ = (STRINGLIB_CHAR)ch;
- continue;
- }
-
- /* UTF-16 code pair: */
- if (!Py_UNICODE_IS_HIGH_SURROGATE(ch))
- goto IllegalEncoding;
- if (q >= e)
- goto UnexpectedEnd;
- ch2 = (q[ihi] << 8) | q[ilo];
- q += 2;
- if (!Py_UNICODE_IS_LOW_SURROGATE(ch2))
- goto IllegalSurrogate;
- ch = Py_UNICODE_JOIN_SURROGATES(ch, ch2);
-#if STRINGLIB_SIZEOF_CHAR < 4
- /* Out-of-range */
- goto Return;
-#else
- *p++ = (STRINGLIB_CHAR)ch;
-#endif
- }
- ch = 0;
-Return:
- *inptr = q;
- *outpos = p - dest;
- return ch;
-UnexpectedEnd:
- ch = 1;
- goto Return;
-IllegalEncoding:
- ch = 2;
- goto Return;
-IllegalSurrogate:
- ch = 3;
- goto Return;
-}
-#undef UCS2_REPEAT_MASK
-#undef FAST_CHAR_MASK
-#undef STRIPPED_MASK
-#undef SWAB
-
-
-#if STRINGLIB_MAX_CHAR >= 0x80
-Py_LOCAL_INLINE(Py_ssize_t)
-STRINGLIB(utf16_encode)(const STRINGLIB_CHAR *in,
- Py_ssize_t len,
- unsigned short **outptr,
- int native_ordering)
-{
- unsigned short *out = *outptr;
- const STRINGLIB_CHAR *end = in + len;
-#if STRINGLIB_SIZEOF_CHAR == 1
- if (native_ordering) {
- const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
- while (in < unrolled_end) {
- out[0] = in[0];
- out[1] = in[1];
- out[2] = in[2];
- out[3] = in[3];
- in += 4; out += 4;
- }
- while (in < end) {
- *out++ = *in++;
- }
- } else {
-# define SWAB2(CH) ((CH) << 8) /* high byte is zero */
- const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
- while (in < unrolled_end) {
- out[0] = SWAB2(in[0]);
- out[1] = SWAB2(in[1]);
- out[2] = SWAB2(in[2]);
- out[3] = SWAB2(in[3]);
- in += 4; out += 4;
- }
- while (in < end) {
- Py_UCS4 ch = *in++;
- *out++ = SWAB2((Py_UCS2)ch);
- }
-#undef SWAB2
- }
- *outptr = out;
- return len;
-#else
- if (native_ordering) {
-#if STRINGLIB_MAX_CHAR < 0x10000
- const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
- while (in < unrolled_end) {
- /* check if any character is a surrogate character */
- if (((in[0] ^ 0xd800) &
- (in[1] ^ 0xd800) &
- (in[2] ^ 0xd800) &
- (in[3] ^ 0xd800) & 0xf800) == 0)
- break;
- out[0] = in[0];
- out[1] = in[1];
- out[2] = in[2];
- out[3] = in[3];
- in += 4; out += 4;
- }
-#endif
- while (in < end) {
- Py_UCS4 ch;
- ch = *in++;
- if (ch < 0xd800)
- *out++ = ch;
- else if (ch < 0xe000)
- /* reject surrogate characters (U+D800-U+DFFF) */
- goto fail;
-#if STRINGLIB_MAX_CHAR >= 0x10000
- else if (ch >= 0x10000) {
- out[0] = Py_UNICODE_HIGH_SURROGATE(ch);
- out[1] = Py_UNICODE_LOW_SURROGATE(ch);
- out += 2;
- }
-#endif
- else
- *out++ = ch;
- }
- } else {
-#define SWAB2(CH) (((CH) << 8) | ((CH) >> 8))
-#if STRINGLIB_MAX_CHAR < 0x10000
- const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
- while (in < unrolled_end) {
- /* check if any character is a surrogate character */
- if (((in[0] ^ 0xd800) &
- (in[1] ^ 0xd800) &
- (in[2] ^ 0xd800) &
- (in[3] ^ 0xd800) & 0xf800) == 0)
- break;
- out[0] = SWAB2(in[0]);
- out[1] = SWAB2(in[1]);
- out[2] = SWAB2(in[2]);
- out[3] = SWAB2(in[3]);
- in += 4; out += 4;
- }
-#endif
- while (in < end) {
- Py_UCS4 ch = *in++;
- if (ch < 0xd800)
- *out++ = SWAB2((Py_UCS2)ch);
- else if (ch < 0xe000)
- /* reject surrogate characters (U+D800-U+DFFF) */
- goto fail;
-#if STRINGLIB_MAX_CHAR >= 0x10000
- else if (ch >= 0x10000) {
- Py_UCS2 ch1 = Py_UNICODE_HIGH_SURROGATE(ch);
- Py_UCS2 ch2 = Py_UNICODE_LOW_SURROGATE(ch);
- out[0] = SWAB2(ch1);
- out[1] = SWAB2(ch2);
- out += 2;
- }
-#endif
- else
- *out++ = SWAB2((Py_UCS2)ch);
- }
-#undef SWAB2
- }
- *outptr = out;
- return len;
- fail:
- *outptr = out;
- return len - (end - in + 1);
-#endif
-}
-
-#if STRINGLIB_SIZEOF_CHAR == 1
-# define SWAB4(CH, tmp) ((CH) << 24) /* high bytes are zero */
-#elif STRINGLIB_SIZEOF_CHAR == 2
-# define SWAB4(CH, tmp) (tmp = (CH), \
- ((tmp & 0x00FFu) << 24) + ((tmp & 0xFF00u) << 8))
- /* high bytes are zero */
-#else
-# define SWAB4(CH, tmp) (tmp = (CH), \
- tmp = ((tmp & 0x00FF00FFu) << 8) + ((tmp >> 8) & 0x00FF00FFu), \
- ((tmp & 0x0000FFFFu) << 16) + ((tmp >> 16) & 0x0000FFFFu))
-#endif
-Py_LOCAL_INLINE(Py_ssize_t)
-STRINGLIB(utf32_encode)(const STRINGLIB_CHAR *in,
- Py_ssize_t len,
- PY_UINT32_T **outptr,
- int native_ordering)
-{
- PY_UINT32_T *out = *outptr;
- const STRINGLIB_CHAR *end = in + len;
- if (native_ordering) {
- const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
- while (in < unrolled_end) {
-#if STRINGLIB_SIZEOF_CHAR > 1
- /* check if any character is a surrogate character */
- if (((in[0] ^ 0xd800) &
- (in[1] ^ 0xd800) &
- (in[2] ^ 0xd800) &
- (in[3] ^ 0xd800) & 0xf800) == 0)
- break;
-#endif
- out[0] = in[0];
- out[1] = in[1];
- out[2] = in[2];
- out[3] = in[3];
- in += 4; out += 4;
- }
- while (in < end) {
- Py_UCS4 ch;
- ch = *in++;
-#if STRINGLIB_SIZEOF_CHAR > 1
- if (Py_UNICODE_IS_SURROGATE(ch)) {
- /* reject surrogate characters (U+D800-U+DFFF) */
- goto fail;
- }
-#endif
- *out++ = ch;
- }
- } else {
- const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
- while (in < unrolled_end) {
-#if STRINGLIB_SIZEOF_CHAR > 1
- Py_UCS4 ch1, ch2, ch3, ch4;
- /* check if any character is a surrogate character */
- if (((in[0] ^ 0xd800) &
- (in[1] ^ 0xd800) &
- (in[2] ^ 0xd800) &
- (in[3] ^ 0xd800) & 0xf800) == 0)
- break;
-#endif
- out[0] = SWAB4(in[0], ch1);
- out[1] = SWAB4(in[1], ch2);
- out[2] = SWAB4(in[2], ch3);
- out[3] = SWAB4(in[3], ch4);
- in += 4; out += 4;
- }
- while (in < end) {
- Py_UCS4 ch = *in++;
-#if STRINGLIB_SIZEOF_CHAR > 1
- if (Py_UNICODE_IS_SURROGATE(ch)) {
- /* reject surrogate characters (U+D800-U+DFFF) */
- goto fail;
- }
-#endif
- *out++ = SWAB4(ch, ch);
- }
- }
- *outptr = out;
- return len;
-#if STRINGLIB_SIZEOF_CHAR > 1
- fail:
- *outptr = out;
- return len - (end - in + 1);
-#endif
-}
-#undef SWAB4
-
-#endif
diff --git a/Objects/stringlib/count.h b/Objects/stringlib/count.h
index f48500b..de34f96 100644
--- a/Objects/stringlib/count.h
+++ b/Objects/stringlib/count.h
@@ -1,11 +1,14 @@
/* stringlib: count implementation */
+#ifndef STRINGLIB_COUNT_H
+#define STRINGLIB_COUNT_H
+
#ifndef STRINGLIB_FASTSEARCH_H
#error must include "stringlib/fastsearch.h" before including this module
#endif
Py_LOCAL_INLINE(Py_ssize_t)
-STRINGLIB(count)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+stringlib_count(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t maxcount)
{
@@ -16,7 +19,7 @@ STRINGLIB(count)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
if (sub_len == 0)
return (str_len < maxcount) ? str_len + 1 : maxcount;
- count = FASTSEARCH(str, str_len, sub, sub_len, maxcount, FAST_COUNT);
+ count = fastsearch(str, str_len, sub, sub_len, maxcount, FAST_COUNT);
if (count < 0)
return 0; /* no match */
@@ -24,4 +27,4 @@ STRINGLIB(count)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
return count;
}
-
+#endif
diff --git a/Objects/stringlib/ctype.h b/Objects/stringlib/ctype.h
index 843cfa2..739cf3d 100644
--- a/Objects/stringlib/ctype.h
+++ b/Objects/stringlib/ctype.h
@@ -1,53 +1,46 @@
-#if STRINGLIB_IS_UNICODE
-# error "ctype.h only compatible with byte-wise strings"
-#endif
+/* NOTE: this API is -ONLY- for use with single byte character strings. */
+/* Do not use it with Unicode. */
#include "bytes_methods.h"
static PyObject*
-stringlib_isspace(PyObject *self, PyObject *Py_UNUSED(ignored))
+stringlib_isspace(PyObject *self)
{
return _Py_bytes_isspace(STRINGLIB_STR(self), STRINGLIB_LEN(self));
}
static PyObject*
-stringlib_isalpha(PyObject *self, PyObject *Py_UNUSED(ignored))
+stringlib_isalpha(PyObject *self)
{
return _Py_bytes_isalpha(STRINGLIB_STR(self), STRINGLIB_LEN(self));
}
static PyObject*
-stringlib_isalnum(PyObject *self, PyObject *Py_UNUSED(ignored))
+stringlib_isalnum(PyObject *self)
{
return _Py_bytes_isalnum(STRINGLIB_STR(self), STRINGLIB_LEN(self));
}
static PyObject*
-stringlib_isascii(PyObject *self, PyObject *Py_UNUSED(ignored))
-{
- return _Py_bytes_isascii(STRINGLIB_STR(self), STRINGLIB_LEN(self));
-}
-
-static PyObject*
-stringlib_isdigit(PyObject *self, PyObject *Py_UNUSED(ignored))
+stringlib_isdigit(PyObject *self)
{
return _Py_bytes_isdigit(STRINGLIB_STR(self), STRINGLIB_LEN(self));
}
static PyObject*
-stringlib_islower(PyObject *self, PyObject *Py_UNUSED(ignored))
+stringlib_islower(PyObject *self)
{
return _Py_bytes_islower(STRINGLIB_STR(self), STRINGLIB_LEN(self));
}
static PyObject*
-stringlib_isupper(PyObject *self, PyObject *Py_UNUSED(ignored))
+stringlib_isupper(PyObject *self)
{
return _Py_bytes_isupper(STRINGLIB_STR(self), STRINGLIB_LEN(self));
}
static PyObject*
-stringlib_istitle(PyObject *self, PyObject *Py_UNUSED(ignored))
+stringlib_istitle(PyObject *self)
{
return _Py_bytes_istitle(STRINGLIB_STR(self), STRINGLIB_LEN(self));
}
@@ -56,7 +49,7 @@ stringlib_istitle(PyObject *self, PyObject *Py_UNUSED(ignored))
/* functions that return a new object partially translated by ctype funcs: */
static PyObject*
-stringlib_lower(PyObject *self, PyObject *Py_UNUSED(ignored))
+stringlib_lower(PyObject *self)
{
PyObject* newobj;
newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
@@ -68,7 +61,7 @@ stringlib_lower(PyObject *self, PyObject *Py_UNUSED(ignored))
}
static PyObject*
-stringlib_upper(PyObject *self, PyObject *Py_UNUSED(ignored))
+stringlib_upper(PyObject *self)
{
PyObject* newobj;
newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
@@ -80,7 +73,7 @@ stringlib_upper(PyObject *self, PyObject *Py_UNUSED(ignored))
}
static PyObject*
-stringlib_title(PyObject *self, PyObject *Py_UNUSED(ignored))
+stringlib_title(PyObject *self)
{
PyObject* newobj;
newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
@@ -92,7 +85,7 @@ stringlib_title(PyObject *self, PyObject *Py_UNUSED(ignored))
}
static PyObject*
-stringlib_capitalize(PyObject *self, PyObject *Py_UNUSED(ignored))
+stringlib_capitalize(PyObject *self)
{
PyObject* newobj;
newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
@@ -104,7 +97,7 @@ stringlib_capitalize(PyObject *self, PyObject *Py_UNUSED(ignored))
}
static PyObject*
-stringlib_swapcase(PyObject *self, PyObject *Py_UNUSED(ignored))
+stringlib_swapcase(PyObject *self)
{
PyObject* newobj;
newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
diff --git a/Objects/stringlib/eq.h b/Objects/stringlib/eq.h
deleted file mode 100644
index ff22f91..0000000
--- a/Objects/stringlib/eq.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* Fast unicode equal function optimized for dictobject.c and setobject.c */
-
-/* Return 1 if two unicode objects are equal, 0 if not.
- * unicode_eq() is called when the hash of two unicode objects is equal.
- */
-Py_LOCAL_INLINE(int)
-unicode_eq(PyObject *aa, PyObject *bb)
-{
- PyUnicodeObject *a = (PyUnicodeObject *)aa;
- PyUnicodeObject *b = (PyUnicodeObject *)bb;
-
- if (PyUnicode_READY(a) == -1 || PyUnicode_READY(b) == -1) {
- Py_UNREACHABLE();
- }
-
- if (PyUnicode_GET_LENGTH(a) != PyUnicode_GET_LENGTH(b))
- return 0;
- if (PyUnicode_GET_LENGTH(a) == 0)
- return 1;
- if (PyUnicode_KIND(a) != PyUnicode_KIND(b))
- return 0;
- return memcmp(PyUnicode_1BYTE_DATA(a), PyUnicode_1BYTE_DATA(b),
- PyUnicode_GET_LENGTH(a) * PyUnicode_KIND(a)) == 0;
-}
diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h
index 56a4467..e231c58 100644
--- a/Objects/stringlib/fastsearch.h
+++ b/Objects/stringlib/fastsearch.h
@@ -1,5 +1,6 @@
/* stringlib: fastsearch implementation */
+#ifndef STRINGLIB_FASTSEARCH_H
#define STRINGLIB_FASTSEARCH_H
/* fast search/count implementation, based on a mix between boyer-
@@ -32,136 +33,8 @@
#define STRINGLIB_BLOOM(mask, ch) \
((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
-#if STRINGLIB_SIZEOF_CHAR == 1
-# define MEMCHR_CUT_OFF 15
-#else
-# define MEMCHR_CUT_OFF 40
-#endif
-
Py_LOCAL_INLINE(Py_ssize_t)
-STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
-{
- const STRINGLIB_CHAR *p, *e;
-
- p = s;
- e = s + n;
- if (n > MEMCHR_CUT_OFF) {
-#if STRINGLIB_SIZEOF_CHAR == 1
- p = memchr(s, ch, n);
- if (p != NULL)
- return (p - s);
- return -1;
-#else
- /* use memchr if we can choose a needle without too many likely
- false positives */
- const STRINGLIB_CHAR *s1, *e1;
- unsigned char needle = ch & 0xff;
- /* If looking for a multiple of 256, we'd have too
- many false positives looking for the '\0' byte in UCS2
- and UCS4 representations. */
- if (needle != 0) {
- do {
- void *candidate = memchr(p, needle,
- (e - p) * sizeof(STRINGLIB_CHAR));
- if (candidate == NULL)
- return -1;
- s1 = p;
- p = (const STRINGLIB_CHAR *)
- _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
- if (*p == ch)
- return (p - s);
- /* False positive */
- p++;
- if (p - s1 > MEMCHR_CUT_OFF)
- continue;
- if (e - p <= MEMCHR_CUT_OFF)
- break;
- e1 = p + MEMCHR_CUT_OFF;
- while (p != e1) {
- if (*p == ch)
- return (p - s);
- p++;
- }
- }
- while (e - p > MEMCHR_CUT_OFF);
- }
-#endif
- }
- while (p < e) {
- if (*p == ch)
- return (p - s);
- p++;
- }
- return -1;
-}
-
-Py_LOCAL_INLINE(Py_ssize_t)
-STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
-{
- const STRINGLIB_CHAR *p;
-#ifdef HAVE_MEMRCHR
- /* memrchr() is a GNU extension, available since glibc 2.1.91.
- it doesn't seem as optimized as memchr(), but is still quite
- faster than our hand-written loop below */
-
- if (n > MEMCHR_CUT_OFF) {
-#if STRINGLIB_SIZEOF_CHAR == 1
- p = memrchr(s, ch, n);
- if (p != NULL)
- return (p - s);
- return -1;
-#else
- /* use memrchr if we can choose a needle without too many likely
- false positives */
- const STRINGLIB_CHAR *s1;
- Py_ssize_t n1;
- unsigned char needle = ch & 0xff;
- /* If looking for a multiple of 256, we'd have too
- many false positives looking for the '\0' byte in UCS2
- and UCS4 representations. */
- if (needle != 0) {
- do {
- void *candidate = memrchr(s, needle,
- n * sizeof(STRINGLIB_CHAR));
- if (candidate == NULL)
- return -1;
- n1 = n;
- p = (const STRINGLIB_CHAR *)
- _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
- n = p - s;
- if (*p == ch)
- return n;
- /* False positive */
- if (n1 - n > MEMCHR_CUT_OFF)
- continue;
- if (n <= MEMCHR_CUT_OFF)
- break;
- s1 = p - MEMCHR_CUT_OFF;
- while (p > s1) {
- p--;
- if (*p == ch)
- return (p - s);
- }
- n = p - s;
- }
- while (n > MEMCHR_CUT_OFF);
- }
-#endif
- }
-#endif /* HAVE_MEMRCHR */
- p = s + n;
- while (p > s) {
- p--;
- if (*p == ch)
- return (p - s);
- }
- return -1;
-}
-
-#undef MEMCHR_CUT_OFF
-
-Py_LOCAL_INLINE(Py_ssize_t)
-FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
+fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
const STRINGLIB_CHAR* p, Py_ssize_t m,
Py_ssize_t maxcount, int mode)
{
@@ -179,11 +52,7 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
if (m <= 0)
return -1;
/* use special case for 1-character strings */
- if (mode == FAST_SEARCH)
- return STRINGLIB(find_char)(s, n, p[0]);
- else if (mode == FAST_RSEARCH)
- return STRINGLIB(rfind_char)(s, n, p[0]);
- else { /* FAST_COUNT */
+ if (mode == FAST_COUNT) {
for (i = 0; i < n; i++)
if (s[i] == p[0]) {
count++;
@@ -191,7 +60,16 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
return maxcount;
}
return count;
+ } else if (mode == FAST_SEARCH) {
+ for (i = 0; i < n; i++)
+ if (s[i] == p[0])
+ return i;
+ } else { /* FAST_RSEARCH */
+ for (i = n - 1; i > -1; i--)
+ if (s[i] == p[0])
+ return i;
}
+ return -1;
}
mlast = m - 1;
@@ -199,8 +77,6 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
mask = 0;
if (mode != FAST_RSEARCH) {
- const STRINGLIB_CHAR *ss = s + m - 1;
- const STRINGLIB_CHAR *pp = p + m - 1;
/* create compressed boyer-moore delta 1 table */
@@ -215,7 +91,7 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
for (i = 0; i <= w; i++) {
/* note: using mlast in the skip path slows things down on x86 */
- if (ss[i] == pp[0]) {
+ if (s[i+m-1] == p[m-1]) {
/* candidate match */
for (j = 0; j < mlast; j++)
if (s[i+j] != p[j])
@@ -231,13 +107,13 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
continue;
}
/* miss: check if next character is part of pattern */
- if (!STRINGLIB_BLOOM(mask, ss[i+1]))
+ if (!STRINGLIB_BLOOM(mask, s[i+m]))
i = i + m;
else
i = i + skip;
} else {
/* skip: check if next character is part of pattern */
- if (!STRINGLIB_BLOOM(mask, ss[i+1]))
+ if (!STRINGLIB_BLOOM(mask, s[i+m]))
i = i + m;
}
}
@@ -281,3 +157,4 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
return count;
}
+#endif
diff --git a/Objects/stringlib/find.h b/Objects/stringlib/find.h
index 509b929..ce615dc 100644
--- a/Objects/stringlib/find.h
+++ b/Objects/stringlib/find.h
@@ -1,21 +1,25 @@
/* stringlib: find/index implementation */
+#ifndef STRINGLIB_FIND_H
+#define STRINGLIB_FIND_H
+
#ifndef STRINGLIB_FASTSEARCH_H
#error must include "stringlib/fastsearch.h" before including this module
#endif
Py_LOCAL_INLINE(Py_ssize_t)
-STRINGLIB(find)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+stringlib_find(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t offset)
{
Py_ssize_t pos;
- assert(str_len >= 0);
+ if (str_len < 0)
+ return -1;
if (sub_len == 0)
return offset;
- pos = FASTSEARCH(str, str_len, sub, sub_len, -1, FAST_SEARCH);
+ pos = fastsearch(str, str_len, sub, sub_len, -1, FAST_SEARCH);
if (pos >= 0)
pos += offset;
@@ -24,17 +28,18 @@ STRINGLIB(find)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
}
Py_LOCAL_INLINE(Py_ssize_t)
-STRINGLIB(rfind)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t offset)
{
Py_ssize_t pos;
- assert(str_len >= 0);
+ if (str_len < 0)
+ return -1;
if (sub_len == 0)
return str_len + offset;
- pos = FASTSEARCH(str, str_len, sub, sub_len, -1, FAST_RSEARCH);
+ pos = fastsearch(str, str_len, sub, sub_len, -1, FAST_RSEARCH);
if (pos >= 0)
pos += offset;
@@ -42,28 +47,45 @@ STRINGLIB(rfind)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
return pos;
}
+/* helper macro to fixup start/end slice values */
+#define ADJUST_INDICES(start, end, len) \
+ if (end > len) \
+ end = len; \
+ else if (end < 0) { \
+ end += len; \
+ if (end < 0) \
+ end = 0; \
+ } \
+ if (start < 0) { \
+ start += len; \
+ if (start < 0) \
+ start = 0; \
+ }
+
Py_LOCAL_INLINE(Py_ssize_t)
-STRINGLIB(find_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+stringlib_find_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t start, Py_ssize_t end)
{
- return STRINGLIB(find)(str + start, end - start, sub, sub_len, start);
+ ADJUST_INDICES(start, end, str_len);
+ return stringlib_find(str + start, end - start, sub, sub_len, start);
}
Py_LOCAL_INLINE(Py_ssize_t)
-STRINGLIB(rfind_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+stringlib_rfind_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t start, Py_ssize_t end)
{
- return STRINGLIB(rfind)(str + start, end - start, sub, sub_len, start);
+ ADJUST_INDICES(start, end, str_len);
+ return stringlib_rfind(str + start, end - start, sub, sub_len, start);
}
#ifdef STRINGLIB_WANT_CONTAINS_OBJ
Py_LOCAL_INLINE(int)
-STRINGLIB(contains_obj)(PyObject* str, PyObject* sub)
+stringlib_contains_obj(PyObject* str, PyObject* sub)
{
- return STRINGLIB(find)(
+ return stringlib_find(
STRINGLIB_STR(str), STRINGLIB_LEN(str),
STRINGLIB_STR(sub), STRINGLIB_LEN(sub), 0
) != -1;
@@ -76,14 +98,14 @@ This function is a helper for the "find" family (find, rfind, index,
rindex) and for count, startswith and endswith, because they all have
the same behaviour for the arguments.
-It does not touch the variables received until it knows everything
+It does not touch the variables received until it knows everything
is ok.
*/
#define FORMAT_BUFFER_SIZE 50
Py_LOCAL_INLINE(int)
-STRINGLIB(parse_args_finds)(const char * function_name, PyObject *args,
+stringlib_parse_args_finds(const char * function_name, PyObject *args,
PyObject **subobj,
Py_ssize_t *start, Py_ssize_t *end)
{
@@ -117,3 +139,37 @@ STRINGLIB(parse_args_finds)(const char * function_name, PyObject *args,
}
#undef FORMAT_BUFFER_SIZE
+
+#if STRINGLIB_IS_UNICODE
+
+/*
+Wraps stringlib_parse_args_finds() and additionally ensures that the
+first argument is a unicode object.
+
+Note that we receive a pointer to the pointer of the substring object,
+so when we create that object in this function we don't DECREF it,
+because it continues living in the caller functions (those functions,
+after finishing using the substring, must DECREF it).
+*/
+
+Py_LOCAL_INLINE(int)
+stringlib_parse_args_finds_unicode(const char * function_name, PyObject *args,
+ PyUnicodeObject **substring,
+ Py_ssize_t *start, Py_ssize_t *end)
+{
+ PyObject *tmp_substring;
+
+ if(stringlib_parse_args_finds(function_name, args, &tmp_substring,
+ start, end)) {
+ tmp_substring = PyUnicode_FromObject(tmp_substring);
+ if (!tmp_substring)
+ return 0;
+ *substring = (PyUnicodeObject *)tmp_substring;
+ return 1;
+ }
+ return 0;
+}
+
+#endif /* STRINGLIB_IS_UNICODE */
+
+#endif /* STRINGLIB_FIND_H */
diff --git a/Objects/stringlib/find_max_char.h b/Objects/stringlib/find_max_char.h
deleted file mode 100644
index 8ccbc30..0000000
--- a/Objects/stringlib/find_max_char.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/* Finding the optimal width of unicode characters in a buffer */
-
-#if !STRINGLIB_IS_UNICODE
-# error "find_max_char.h is specific to Unicode"
-#endif
-
-/* Mask to quickly check whether a C 'long' contains a
- non-ASCII, UTF8-encoded char. */
-#if (SIZEOF_LONG == 8)
-# define UCS1_ASCII_CHAR_MASK 0x8080808080808080UL
-#elif (SIZEOF_LONG == 4)
-# define UCS1_ASCII_CHAR_MASK 0x80808080UL
-#else
-# error C 'long' size should be either 4 or 8!
-#endif
-
-#if STRINGLIB_SIZEOF_CHAR == 1
-
-Py_LOCAL_INLINE(Py_UCS4)
-STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
-{
- const unsigned char *p = (const unsigned char *) begin;
- const unsigned char *aligned_end =
- (const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
-
- while (p < end) {
- if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
- /* Help register allocation */
- const unsigned char *_p = p;
- while (_p < aligned_end) {
- unsigned long value = *(unsigned long *) _p;
- if (value & UCS1_ASCII_CHAR_MASK)
- return 255;
- _p += SIZEOF_LONG;
- }
- p = _p;
- if (p == end)
- break;
- }
- if (*p++ & 0x80)
- return 255;
- }
- return 127;
-}
-
-#undef ASCII_CHAR_MASK
-
-#else /* STRINGLIB_SIZEOF_CHAR == 1 */
-
-#define MASK_ASCII 0xFFFFFF80
-#define MASK_UCS1 0xFFFFFF00
-#define MASK_UCS2 0xFFFF0000
-
-#define MAX_CHAR_ASCII 0x7f
-#define MAX_CHAR_UCS1 0xff
-#define MAX_CHAR_UCS2 0xffff
-#define MAX_CHAR_UCS4 0x10ffff
-
-Py_LOCAL_INLINE(Py_UCS4)
-STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
-{
-#if STRINGLIB_SIZEOF_CHAR == 2
- const Py_UCS4 mask_limit = MASK_UCS1;
- const Py_UCS4 max_char_limit = MAX_CHAR_UCS2;
-#elif STRINGLIB_SIZEOF_CHAR == 4
- const Py_UCS4 mask_limit = MASK_UCS2;
- const Py_UCS4 max_char_limit = MAX_CHAR_UCS4;
-#else
-#error Invalid STRINGLIB_SIZEOF_CHAR (must be 1, 2 or 4)
-#endif
- Py_UCS4 mask;
- Py_ssize_t n = end - begin;
- const STRINGLIB_CHAR *p = begin;
- const STRINGLIB_CHAR *unrolled_end = begin + _Py_SIZE_ROUND_DOWN(n, 4);
- Py_UCS4 max_char;
-
- max_char = MAX_CHAR_ASCII;
- mask = MASK_ASCII;
- while (p < unrolled_end) {
- STRINGLIB_CHAR bits = p[0] | p[1] | p[2] | p[3];
- if (bits & mask) {
- if (mask == mask_limit) {
- /* Limit reached */
- return max_char_limit;
- }
- if (mask == MASK_ASCII) {
- max_char = MAX_CHAR_UCS1;
- mask = MASK_UCS1;
- }
- else {
- /* mask can't be MASK_UCS2 because of mask_limit above */
- assert(mask == MASK_UCS1);
- max_char = MAX_CHAR_UCS2;
- mask = MASK_UCS2;
- }
- /* We check the new mask on the same chars in the next iteration */
- continue;
- }
- p += 4;
- }
- while (p < end) {
- if (p[0] & mask) {
- if (mask == mask_limit) {
- /* Limit reached */
- return max_char_limit;
- }
- if (mask == MASK_ASCII) {
- max_char = MAX_CHAR_UCS1;
- mask = MASK_UCS1;
- }
- else {
- /* mask can't be MASK_UCS2 because of mask_limit above */
- assert(mask == MASK_UCS1);
- max_char = MAX_CHAR_UCS2;
- mask = MASK_UCS2;
- }
- /* We check the new mask on the same chars in the next iteration */
- continue;
- }
- p++;
- }
- return max_char;
-}
-
-#undef MASK_ASCII
-#undef MASK_UCS1
-#undef MASK_UCS2
-#undef MAX_CHAR_ASCII
-#undef MAX_CHAR_UCS1
-#undef MAX_CHAR_UCS2
-#undef MAX_CHAR_UCS4
-
-#endif /* STRINGLIB_SIZEOF_CHAR == 1 */
-
diff --git a/Objects/stringlib/formatter.h b/Objects/stringlib/formatter.h
new file mode 100644
index 0000000..70f574c
--- /dev/null
+++ b/Objects/stringlib/formatter.h
@@ -0,0 +1,1547 @@
+/* implements the string, long, and float formatters. that is,
+ string.__format__, etc. */
+
+#include <locale.h>
+
+/* Before including this, you must include either:
+ stringlib/unicodedefs.h
+ stringlib/stringdefs.h
+
+ Also, you should define the names:
+ FORMAT_STRING
+ FORMAT_LONG
+ FORMAT_FLOAT
+ FORMAT_COMPLEX
+ to be whatever you want the public names of these functions to
+ be. These are the only non-static functions defined here.
+*/
+
+/* Raises an exception about an unknown presentation type for this
+ * type. */
+
+static void
+unknown_presentation_type(STRINGLIB_CHAR presentation_type,
+ const char* type_name)
+{
+#if STRINGLIB_IS_UNICODE
+ /* If STRINGLIB_CHAR is Py_UNICODE, %c might be out-of-range,
+ hence the two cases. If it is char, gcc complains that the
+ condition below is always true, hence the ifdef. */
+ if (presentation_type > 32 && presentation_type < 128)
+#endif
+ PyErr_Format(PyExc_ValueError,
+ "Unknown format code '%c' "
+ "for object of type '%.200s'",
+ (char)presentation_type,
+ type_name);
+#if STRINGLIB_IS_UNICODE
+ else
+ PyErr_Format(PyExc_ValueError,
+ "Unknown format code '\\x%x' "
+ "for object of type '%.200s'",
+ (unsigned int)presentation_type,
+ type_name);
+#endif
+}
+
+static void
+invalid_comma_type(STRINGLIB_CHAR presentation_type)
+{
+#if STRINGLIB_IS_UNICODE
+ /* See comment in unknown_presentation_type */
+ if (presentation_type > 32 && presentation_type < 128)
+#endif
+ PyErr_Format(PyExc_ValueError,
+ "Cannot specify ',' with '%c'.",
+ (char)presentation_type);
+#if STRINGLIB_IS_UNICODE
+ else
+ PyErr_Format(PyExc_ValueError,
+ "Cannot specify ',' with '\\x%x'.",
+ (unsigned int)presentation_type);
+#endif
+}
+
+/*
+ get_integer consumes 0 or more decimal digit characters from an
+ input string, updates *result with the corresponding positive
+ integer, and returns the number of digits consumed.
+
+ returns -1 on error.
+*/
+static int
+get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end,
+ Py_ssize_t *result)
+{
+ Py_ssize_t accumulator, digitval;
+ int numdigits;
+ accumulator = numdigits = 0;
+ for (;;(*ptr)++, numdigits++) {
+ if (*ptr >= end)
+ break;
+ digitval = STRINGLIB_TODECIMAL(**ptr);
+ if (digitval < 0)
+ break;
+ /*
+ Detect possible overflow before it happens:
+
+ accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
+ accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
+ */
+ if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
+ PyErr_Format(PyExc_ValueError,
+ "Too many decimal digits in format string");
+ return -1;
+ }
+ accumulator = accumulator * 10 + digitval;
+ }
+ *result = accumulator;
+ return numdigits;
+}
+
+/************************************************************************/
+/*********** standard format specifier parsing **************************/
+/************************************************************************/
+
+/* returns true if this character is a specifier alignment token */
+Py_LOCAL_INLINE(int)
+is_alignment_token(STRINGLIB_CHAR c)
+{
+ switch (c) {
+ case '<': case '>': case '=': case '^':
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/* returns true if this character is a sign element */
+Py_LOCAL_INLINE(int)
+is_sign_element(STRINGLIB_CHAR c)
+{
+ switch (c) {
+ case ' ': case '+': case '-':
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+
+typedef struct {
+ STRINGLIB_CHAR fill_char;
+ STRINGLIB_CHAR align;
+ int alternate;
+ STRINGLIB_CHAR sign;
+ Py_ssize_t width;
+ int thousands_separators;
+ Py_ssize_t precision;
+ STRINGLIB_CHAR type;
+} InternalFormatSpec;
+
+
+#if 0
+/* Occasionally useful for debugging. Should normally be commented out. */
+static void
+DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
+{
+ printf("internal format spec: fill_char %d\n", format->fill_char);
+ printf("internal format spec: align %d\n", format->align);
+ printf("internal format spec: alternate %d\n", format->alternate);
+ printf("internal format spec: sign %d\n", format->sign);
+ printf("internal format spec: width %zd\n", format->width);
+ printf("internal format spec: thousands_separators %d\n",
+ format->thousands_separators);
+ printf("internal format spec: precision %zd\n", format->precision);
+ printf("internal format spec: type %c\n", format->type);
+ printf("\n");
+}
+#endif
+
+
+/*
+ ptr points to the start of the format_spec, end points just past its end.
+ fills in format with the parsed information.
+ returns 1 on success, 0 on failure.
+ if failure, sets the exception
+*/
+static int
+parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec,
+ Py_ssize_t format_spec_len,
+ InternalFormatSpec *format,
+ char default_type,
+ char default_align)
+{
+ STRINGLIB_CHAR *ptr = format_spec;
+ STRINGLIB_CHAR *end = format_spec + format_spec_len;
+
+ /* end-ptr is used throughout this code to specify the length of
+ the input string */
+
+ Py_ssize_t consumed;
+ int align_specified = 0;
+ int fill_char_specified = 0;
+
+ format->fill_char = ' ';
+ format->align = default_align;
+ format->alternate = 0;
+ format->sign = '\0';
+ format->width = -1;
+ format->thousands_separators = 0;
+ format->precision = -1;
+ format->type = default_type;
+
+ /* If the second char is an alignment token,
+ then parse the fill char */
+ if (end-ptr >= 2 && is_alignment_token(ptr[1])) {
+ format->align = ptr[1];
+ format->fill_char = ptr[0];
+ fill_char_specified = 1;
+ align_specified = 1;
+ ptr += 2;
+ }
+ else if (end-ptr >= 1 && is_alignment_token(ptr[0])) {
+ format->align = ptr[0];
+ align_specified = 1;
+ ++ptr;
+ }
+
+ /* Parse the various sign options */
+ if (end-ptr >= 1 && is_sign_element(ptr[0])) {
+ format->sign = ptr[0];
+ ++ptr;
+ }
+
+ /* If the next character is #, we're in alternate mode. This only
+ applies to integers. */
+ if (end-ptr >= 1 && ptr[0] == '#') {
+ format->alternate = 1;
+ ++ptr;
+ }
+
+ /* The special case for 0-padding (backwards compat) */
+ if (!fill_char_specified && end-ptr >= 1 && ptr[0] == '0') {
+ format->fill_char = '0';
+ if (!align_specified) {
+ format->align = '=';
+ }
+ ++ptr;
+ }
+
+ consumed = get_integer(&ptr, end, &format->width);
+ if (consumed == -1)
+ /* Overflow error. Exception already set. */
+ return 0;
+
+ /* If consumed is 0, we didn't consume any characters for the
+ width. In that case, reset the width to -1, because
+ get_integer() will have set it to zero. -1 is how we record
+ that the width wasn't specified. */
+ if (consumed == 0)
+ format->width = -1;
+
+ /* Comma signifies add thousands separators */
+ if (end-ptr && ptr[0] == ',') {
+ format->thousands_separators = 1;
+ ++ptr;
+ }
+
+ /* Parse field precision */
+ if (end-ptr && ptr[0] == '.') {
+ ++ptr;
+
+ consumed = get_integer(&ptr, end, &format->precision);
+ if (consumed == -1)
+ /* Overflow error. Exception already set. */
+ return 0;
+
+ /* Not having a precision after a dot is an error. */
+ if (consumed == 0) {
+ PyErr_Format(PyExc_ValueError,
+ "Format specifier missing precision");
+ return 0;
+ }
+
+ }
+
+ /* Finally, parse the type field. */
+
+ if (end-ptr > 1) {
+ /* More than one char remain, invalid conversion spec. */
+ PyErr_Format(PyExc_ValueError, "Invalid conversion specification");
+ return 0;
+ }
+
+ if (end-ptr == 1) {
+ format->type = ptr[0];
+ ++ptr;
+ }
+
+ /* Do as much validating as we can, just by looking at the format
+ specifier. Do not take into account what type of formatting
+ we're doing (int, float, string). */
+
+ if (format->thousands_separators) {
+ switch (format->type) {
+ case 'd':
+ case 'e':
+ case 'f':
+ case 'g':
+ case 'E':
+ case 'G':
+ case '%':
+ case 'F':
+ case '\0':
+ /* These are allowed. See PEP 378.*/
+ break;
+ default:
+ invalid_comma_type(format->type);
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+/* Calculate the padding needed. */
+static void
+calc_padding(Py_ssize_t nchars, Py_ssize_t width, STRINGLIB_CHAR align,
+ Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
+ Py_ssize_t *n_total)
+{
+ if (width >= 0) {
+ if (nchars > width)
+ *n_total = nchars;
+ else
+ *n_total = width;
+ }
+ else {
+ /* not specified, use all of the chars and no more */
+ *n_total = nchars;
+ }
+
+ /* Figure out how much leading space we need, based on the
+ aligning */
+ if (align == '>')
+ *n_lpadding = *n_total - nchars;
+ else if (align == '^')
+ *n_lpadding = (*n_total - nchars) / 2;
+ else if (align == '<' || align == '=')
+ *n_lpadding = 0;
+ else {
+ /* We should never have an unspecified alignment. */
+ *n_lpadding = 0;
+ assert(0);
+ }
+
+ *n_rpadding = *n_total - nchars - *n_lpadding;
+}
+
+/* Do the padding, and return a pointer to where the caller-supplied
+ content goes. */
+static STRINGLIB_CHAR *
+fill_padding(STRINGLIB_CHAR *p, Py_ssize_t nchars, STRINGLIB_CHAR fill_char,
+ Py_ssize_t n_lpadding, Py_ssize_t n_rpadding)
+{
+ /* Pad on left. */
+ if (n_lpadding)
+ STRINGLIB_FILL(p, fill_char, n_lpadding);
+
+ /* Pad on right. */
+ if (n_rpadding)
+ STRINGLIB_FILL(p + nchars + n_lpadding, fill_char, n_rpadding);
+
+ /* Pointer to the user content. */
+ return p + n_lpadding;
+}
+
+#if defined FORMAT_FLOAT || defined FORMAT_LONG || defined FORMAT_COMPLEX
+/************************************************************************/
+/*********** common routines for numeric formatting *********************/
+/************************************************************************/
+
+/* Locale type codes. */
+#define LT_CURRENT_LOCALE 0
+#define LT_DEFAULT_LOCALE 1
+#define LT_NO_LOCALE 2
+
+/* Locale info needed for formatting integers and the part of floats
+ before and including the decimal. Note that locales only support
+ 8-bit chars, not unicode. */
+typedef struct {
+ char *decimal_point;
+ char *thousands_sep;
+ char *grouping;
+} LocaleInfo;
+
+/* describes the layout for an integer, see the comment in
+ calc_number_widths() for details */
+typedef struct {
+ Py_ssize_t n_lpadding;
+ Py_ssize_t n_prefix;
+ Py_ssize_t n_spadding;
+ Py_ssize_t n_rpadding;
+ char sign;
+ Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
+ Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
+ any grouping chars. */
+ Py_ssize_t n_decimal; /* 0 if only an integer */
+ Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
+ excluding the decimal itself, if
+ present. */
+
+ /* These 2 are not the widths of fields, but are needed by
+ STRINGLIB_GROUPING. */
+ Py_ssize_t n_digits; /* The number of digits before a decimal
+ or exponent. */
+ Py_ssize_t n_min_width; /* The min_width we used when we computed
+ the n_grouped_digits width. */
+} NumberFieldWidths;
+
+
+/* Given a number of the form:
+ digits[remainder]
+ where ptr points to the start and end points to the end, find where
+ the integer part ends. This could be a decimal, an exponent, both,
+ or neither.
+ If a decimal point is present, set *has_decimal and increment
+ remainder beyond it.
+ Results are undefined (but shouldn't crash) for improperly
+ formatted strings.
+*/
+static void
+parse_number(STRINGLIB_CHAR *ptr, Py_ssize_t len,
+ Py_ssize_t *n_remainder, int *has_decimal)
+{
+ STRINGLIB_CHAR *end = ptr + len;
+ STRINGLIB_CHAR *remainder;
+
+ while (ptr<end && isdigit(*ptr))
+ ++ptr;
+ remainder = ptr;
+
+ /* Does remainder start with a decimal point? */
+ *has_decimal = ptr<end && *remainder == '.';
+
+ /* Skip the decimal point. */
+ if (*has_decimal)
+ remainder++;
+
+ *n_remainder = end - remainder;
+}
+
+/* not all fields of format are used. for example, precision is
+ unused. should this take discrete params in order to be more clear
+ about what it does? or is passing a single format parameter easier
+ and more efficient enough to justify a little obfuscation? */
+static Py_ssize_t
+calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
+ STRINGLIB_CHAR sign_char, STRINGLIB_CHAR *number,
+ Py_ssize_t n_number, Py_ssize_t n_remainder,
+ int has_decimal, const LocaleInfo *locale,
+ const InternalFormatSpec *format)
+{
+ Py_ssize_t n_non_digit_non_padding;
+ Py_ssize_t n_padding;
+
+ spec->n_digits = n_number - n_remainder - (has_decimal?1:0);
+ spec->n_lpadding = 0;
+ spec->n_prefix = n_prefix;
+ spec->n_decimal = has_decimal ? strlen(locale->decimal_point) : 0;
+ spec->n_remainder = n_remainder;
+ spec->n_spadding = 0;
+ spec->n_rpadding = 0;
+ spec->sign = '\0';
+ spec->n_sign = 0;
+
+ /* the output will look like:
+ | |
+ | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
+ | |
+
+ sign is computed from format->sign and the actual
+ sign of the number
+
+ prefix is given (it's for the '0x' prefix)
+
+ digits is already known
+
+ the total width is either given, or computed from the
+ actual digits
+
+ only one of lpadding, spadding, and rpadding can be non-zero,
+ and it's calculated from the width and other fields
+ */
+
+ /* compute the various parts we're going to write */
+ switch (format->sign) {
+ case '+':
+ /* always put a + or - */
+ spec->n_sign = 1;
+ spec->sign = (sign_char == '-' ? '-' : '+');
+ break;
+ case ' ':
+ spec->n_sign = 1;
+ spec->sign = (sign_char == '-' ? '-' : ' ');
+ break;
+ default:
+ /* Not specified, or the default (-) */
+ if (sign_char == '-') {
+ spec->n_sign = 1;
+ spec->sign = '-';
+ }
+ }
+
+ /* The number of chars used for non-digits and non-padding. */
+ n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
+ spec->n_remainder;
+
+ /* min_width can go negative, that's okay. format->width == -1 means
+ we don't care. */
+ if (format->fill_char == '0' && format->align == '=')
+ spec->n_min_width = format->width - n_non_digit_non_padding;
+ else
+ spec->n_min_width = 0;
+
+ if (spec->n_digits == 0)
+ /* This case only occurs when using 'c' formatting, we need
+ to special case it because the grouping code always wants
+ to have at least one character. */
+ spec->n_grouped_digits = 0;
+ else
+ spec->n_grouped_digits = STRINGLIB_GROUPING(NULL, 0, NULL,
+ spec->n_digits,
+ spec->n_min_width,
+ locale->grouping,
+ locale->thousands_sep);
+
+ /* Given the desired width and the total of digit and non-digit
+ space we consume, see if we need any padding. format->width can
+ be negative (meaning no padding), but this code still works in
+ that case. */
+ n_padding = format->width -
+ (n_non_digit_non_padding + spec->n_grouped_digits);
+ if (n_padding > 0) {
+ /* Some padding is needed. Determine if it's left, space, or right. */
+ switch (format->align) {
+ case '<':
+ spec->n_rpadding = n_padding;
+ break;
+ case '^':
+ spec->n_lpadding = n_padding / 2;
+ spec->n_rpadding = n_padding - spec->n_lpadding;
+ break;
+ case '=':
+ spec->n_spadding = n_padding;
+ break;
+ case '>':
+ spec->n_lpadding = n_padding;
+ break;
+ default:
+ /* Shouldn't get here, but treat it as '>' */
+ spec->n_lpadding = n_padding;
+ assert(0);
+ break;
+ }
+ }
+ return spec->n_lpadding + spec->n_sign + spec->n_prefix +
+ spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
+ spec->n_remainder + spec->n_rpadding;
+}
+
+/* Fill in the digit parts of a numbers's string representation,
+ as determined in calc_number_widths().
+ No error checking, since we know the buffer is the correct size. */
+static void
+fill_number(STRINGLIB_CHAR *buf, const NumberFieldWidths *spec,
+ STRINGLIB_CHAR *digits, Py_ssize_t n_digits,
+ STRINGLIB_CHAR *prefix, STRINGLIB_CHAR fill_char,
+ LocaleInfo *locale, int toupper)
+{
+ /* Used to keep track of digits, decimal, and remainder. */
+ STRINGLIB_CHAR *p = digits;
+
+#ifndef NDEBUG
+ Py_ssize_t r;
+#endif
+
+ if (spec->n_lpadding) {
+ STRINGLIB_FILL(buf, fill_char, spec->n_lpadding);
+ buf += spec->n_lpadding;
+ }
+ if (spec->n_sign == 1) {
+ *buf++ = spec->sign;
+ }
+ if (spec->n_prefix) {
+ memmove(buf,
+ prefix,
+ spec->n_prefix * sizeof(STRINGLIB_CHAR));
+ if (toupper) {
+ Py_ssize_t t;
+ for (t = 0; t < spec->n_prefix; ++t)
+ buf[t] = STRINGLIB_TOUPPER(buf[t]);
+ }
+ buf += spec->n_prefix;
+ }
+ if (spec->n_spadding) {
+ STRINGLIB_FILL(buf, fill_char, spec->n_spadding);
+ buf += spec->n_spadding;
+ }
+
+ /* Only for type 'c' special case, it has no digits. */
+ if (spec->n_digits != 0) {
+ /* Fill the digits with InsertThousandsGrouping. */
+#ifndef NDEBUG
+ r =
+#endif
+ STRINGLIB_GROUPING(buf, spec->n_grouped_digits, digits,
+ spec->n_digits, spec->n_min_width,
+ locale->grouping, locale->thousands_sep);
+#ifndef NDEBUG
+ assert(r == spec->n_grouped_digits);
+#endif
+ p += spec->n_digits;
+ }
+ if (toupper) {
+ Py_ssize_t t;
+ for (t = 0; t < spec->n_grouped_digits; ++t)
+ buf[t] = STRINGLIB_TOUPPER(buf[t]);
+ }
+ buf += spec->n_grouped_digits;
+
+ if (spec->n_decimal) {
+ Py_ssize_t t;
+ for (t = 0; t < spec->n_decimal; ++t)
+ buf[t] = locale->decimal_point[t];
+ buf += spec->n_decimal;
+ p += 1;
+ }
+
+ if (spec->n_remainder) {
+ memcpy(buf, p, spec->n_remainder * sizeof(STRINGLIB_CHAR));
+ buf += spec->n_remainder;
+ p += spec->n_remainder;
+ }
+
+ if (spec->n_rpadding) {
+ STRINGLIB_FILL(buf, fill_char, spec->n_rpadding);
+ buf += spec->n_rpadding;
+ }
+}
+
+static char no_grouping[1] = {CHAR_MAX};
+
+/* Find the decimal point character(s?), thousands_separator(s?), and
+ grouping description, either for the current locale if type is
+ LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or
+ none if LT_NO_LOCALE. */
+static void
+get_locale_info(int type, LocaleInfo *locale_info)
+{
+ switch (type) {
+ case LT_CURRENT_LOCALE: {
+ struct lconv *locale_data = localeconv();
+ locale_info->decimal_point = locale_data->decimal_point;
+ locale_info->thousands_sep = locale_data->thousands_sep;
+ locale_info->grouping = locale_data->grouping;
+ break;
+ }
+ case LT_DEFAULT_LOCALE:
+ locale_info->decimal_point = ".";
+ locale_info->thousands_sep = ",";
+ locale_info->grouping = "\3"; /* Group every 3 characters. The
+ (implicit) trailing 0 means repeat
+ infinitely. */
+ break;
+ case LT_NO_LOCALE:
+ locale_info->decimal_point = ".";
+ locale_info->thousands_sep = "";
+ locale_info->grouping = no_grouping;
+ break;
+ default:
+ assert(0);
+ }
+}
+
+#endif /* FORMAT_FLOAT || FORMAT_LONG || FORMAT_COMPLEX */
+
+/************************************************************************/
+/*********** string formatting ******************************************/
+/************************************************************************/
+
+static PyObject *
+format_string_internal(PyObject *value, const InternalFormatSpec *format)
+{
+ Py_ssize_t lpad;
+ Py_ssize_t rpad;
+ Py_ssize_t total;
+ STRINGLIB_CHAR *p;
+ Py_ssize_t len = STRINGLIB_LEN(value);
+ PyObject *result = NULL;
+
+ /* sign is not allowed on strings */
+ if (format->sign != '\0') {
+ PyErr_SetString(PyExc_ValueError,
+ "Sign not allowed in string format specifier");
+ goto done;
+ }
+
+ /* alternate is not allowed on strings */
+ if (format->alternate) {
+ PyErr_SetString(PyExc_ValueError,
+ "Alternate form (#) not allowed in string format "
+ "specifier");
+ goto done;
+ }
+
+ /* '=' alignment not allowed on strings */
+ if (format->align == '=') {
+ PyErr_SetString(PyExc_ValueError,
+ "'=' alignment not allowed "
+ "in string format specifier");
+ goto done;
+ }
+
+ /* if precision is specified, output no more that format.precision
+ characters */
+ if (format->precision >= 0 && len >= format->precision) {
+ len = format->precision;
+ }
+
+ calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
+
+ /* allocate the resulting string */
+ result = STRINGLIB_NEW(NULL, total);
+ if (result == NULL)
+ goto done;
+
+ /* Write into that space. First the padding. */
+ p = fill_padding(STRINGLIB_STR(result), len,
+ format->fill_char, lpad, rpad);
+
+ /* Then the source string. */
+ memcpy(p, STRINGLIB_STR(value), len * sizeof(STRINGLIB_CHAR));
+
+done:
+ return result;
+}
+
+
+/************************************************************************/
+/*********** long formatting ********************************************/
+/************************************************************************/
+
+#if defined FORMAT_LONG || defined FORMAT_INT
+typedef PyObject*
+(*IntOrLongToString)(PyObject *value, int base);
+
+static PyObject *
+format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
+ IntOrLongToString tostring)
+{
+ PyObject *result = NULL;
+ PyObject *tmp = NULL;
+ STRINGLIB_CHAR *pnumeric_chars;
+ STRINGLIB_CHAR numeric_char;
+ STRINGLIB_CHAR sign_char = '\0';
+ Py_ssize_t n_digits; /* count of digits need from the computed
+ string */
+ Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
+ produces non-digits */
+ Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
+ Py_ssize_t n_total;
+ STRINGLIB_CHAR *prefix = NULL;
+ NumberFieldWidths spec;
+ long x;
+
+ /* Locale settings, either from the actual locale or
+ from a hard-code pseudo-locale */
+ LocaleInfo locale;
+
+ /* no precision allowed on integers */
+ if (format->precision != -1) {
+ PyErr_SetString(PyExc_ValueError,
+ "Precision not allowed in integer format specifier");
+ goto done;
+ }
+
+ /* special case for character formatting */
+ if (format->type == 'c') {
+ /* error to specify a sign */
+ if (format->sign != '\0') {
+ PyErr_SetString(PyExc_ValueError,
+ "Sign not allowed with integer"
+ " format specifier 'c'");
+ goto done;
+ }
+
+ /* Error to specify a comma. */
+ if (format->thousands_separators) {
+ PyErr_SetString(PyExc_ValueError,
+ "Thousands separators not allowed with integer"
+ " format specifier 'c'");
+ goto done;
+ }
+
+ /* taken from unicodeobject.c formatchar() */
+ /* Integer input truncated to a character */
+/* XXX: won't work for int */
+ x = PyLong_AsLong(value);
+ if (x == -1 && PyErr_Occurred())
+ goto done;
+#if STRINGLIB_IS_UNICODE
+#ifdef Py_UNICODE_WIDE
+ if (x < 0 || x > 0x10ffff) {
+ PyErr_SetString(PyExc_OverflowError,
+ "%c arg not in range(0x110000) "
+ "(wide Python build)");
+ goto done;
+ }
+#else
+ if (x < 0 || x > 0xffff) {
+ PyErr_SetString(PyExc_OverflowError,
+ "%c arg not in range(0x10000) "
+ "(narrow Python build)");
+ goto done;
+ }
+#endif
+#else
+ if (x < 0 || x > 0xff) {
+ PyErr_SetString(PyExc_OverflowError,
+ "%c arg not in range(0x100)");
+ goto done;
+ }
+#endif
+ numeric_char = (STRINGLIB_CHAR)x;
+ pnumeric_chars = &numeric_char;
+ n_digits = 1;
+
+ /* As a sort-of hack, we tell calc_number_widths that we only
+ have "remainder" characters. calc_number_widths thinks
+ these are characters that don't get formatted, only copied
+ into the output string. We do this for 'c' formatting,
+ because the characters are likely to be non-digits. */
+ n_remainder = 1;
+ }
+ else {
+ int base;
+ int leading_chars_to_skip = 0; /* Number of characters added by
+ PyNumber_ToBase that we want to
+ skip over. */
+
+ /* Compute the base and how many characters will be added by
+ PyNumber_ToBase */
+ switch (format->type) {
+ case 'b':
+ base = 2;
+ leading_chars_to_skip = 2; /* 0b */
+ break;
+ case 'o':
+ base = 8;
+ leading_chars_to_skip = 2; /* 0o */
+ break;
+ case 'x':
+ case 'X':
+ base = 16;
+ leading_chars_to_skip = 2; /* 0x */
+ break;
+ default: /* shouldn't be needed, but stops a compiler warning */
+ case 'd':
+ case 'n':
+ base = 10;
+ break;
+ }
+
+ /* The number of prefix chars is the same as the leading
+ chars to skip */
+ if (format->alternate)
+ n_prefix = leading_chars_to_skip;
+
+ /* Do the hard part, converting to a string in a given base */
+ tmp = tostring(value, base);
+ if (tmp == NULL)
+ goto done;
+
+ pnumeric_chars = STRINGLIB_STR(tmp);
+ n_digits = STRINGLIB_LEN(tmp);
+
+ prefix = pnumeric_chars;
+
+ /* Remember not to modify what pnumeric_chars points to. it
+ might be interned. Only modify it after we copy it into a
+ newly allocated output buffer. */
+
+ /* Is a sign character present in the output? If so, remember it
+ and skip it */
+ if (pnumeric_chars[0] == '-') {
+ sign_char = pnumeric_chars[0];
+ ++prefix;
+ ++leading_chars_to_skip;
+ }
+
+ /* Skip over the leading chars (0x, 0b, etc.) */
+ n_digits -= leading_chars_to_skip;
+ pnumeric_chars += leading_chars_to_skip;
+ }
+
+ /* Determine the grouping, separator, and decimal point, if any. */
+ get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
+ (format->thousands_separators ?
+ LT_DEFAULT_LOCALE :
+ LT_NO_LOCALE),
+ &locale);
+
+ /* Calculate how much memory we'll need. */
+ n_total = calc_number_widths(&spec, n_prefix, sign_char, pnumeric_chars,
+ n_digits, n_remainder, 0, &locale, format);
+
+ /* Allocate the memory. */
+ result = STRINGLIB_NEW(NULL, n_total);
+ if (!result)
+ goto done;
+
+ /* Populate the memory. */
+ fill_number(STRINGLIB_STR(result), &spec, pnumeric_chars, n_digits,
+ prefix, format->fill_char, &locale, format->type == 'X');
+
+done:
+ Py_XDECREF(tmp);
+ return result;
+}
+#endif /* defined FORMAT_LONG || defined FORMAT_INT */
+
+/************************************************************************/
+/*********** float formatting *******************************************/
+/************************************************************************/
+
+#ifdef FORMAT_FLOAT
+#if STRINGLIB_IS_UNICODE
+static void
+strtounicode(Py_UNICODE *buffer, const char *charbuffer, Py_ssize_t len)
+{
+ Py_ssize_t i;
+ for (i = 0; i < len; ++i)
+ buffer[i] = (Py_UNICODE)charbuffer[i];
+}
+#endif
+
+/* much of this is taken from unicodeobject.c */
+static PyObject *
+format_float_internal(PyObject *value,
+ const InternalFormatSpec *format)
+{
+ char *buf = NULL; /* buffer returned from PyOS_double_to_string */
+ Py_ssize_t n_digits;
+ Py_ssize_t n_remainder;
+ Py_ssize_t n_total;
+ int has_decimal;
+ double val;
+ Py_ssize_t precision;
+ Py_ssize_t default_precision = 6;
+ STRINGLIB_CHAR type = format->type;
+ int add_pct = 0;
+ STRINGLIB_CHAR *p;
+ NumberFieldWidths spec;
+ int flags = 0;
+ PyObject *result = NULL;
+ STRINGLIB_CHAR sign_char = '\0';
+ int float_type; /* Used to see if we have a nan, inf, or regular float. */
+
+#if STRINGLIB_IS_UNICODE
+ Py_UNICODE *unicode_tmp = NULL;
+#endif
+
+ /* Locale settings, either from the actual locale or
+ from a hard-code pseudo-locale */
+ LocaleInfo locale;
+
+ if (format->precision > INT_MAX) {
+ PyErr_SetString(PyExc_ValueError, "precision too big");
+ goto done;
+ }
+ precision = (int)format->precision;
+
+ /* Alternate is not allowed on floats. */
+ if (format->alternate) {
+ PyErr_SetString(PyExc_ValueError,
+ "Alternate form (#) not allowed in float format "
+ "specifier");
+ goto done;
+ }
+
+ if (type == '\0') {
+ /* Omitted type specifier. This is like 'g' but with at least one
+ digit after the decimal point, and different default precision.*/
+ type = 'g';
+ default_precision = PyFloat_STR_PRECISION;
+ flags |= Py_DTSF_ADD_DOT_0;
+ }
+
+ if (type == 'n')
+ /* 'n' is the same as 'g', except for the locale used to
+ format the result. We take care of that later. */
+ type = 'g';
+
+ val = PyFloat_AsDouble(value);
+ if (val == -1.0 && PyErr_Occurred())
+ goto done;
+
+ if (type == '%') {
+ type = 'f';
+ val *= 100;
+ add_pct = 1;
+ }
+
+ if (precision < 0)
+ precision = default_precision;
+
+ /* Cast "type", because if we're in unicode we need to pass an
+ 8-bit char. This is safe, because we've restricted what "type"
+ can be. */
+ buf = PyOS_double_to_string(val, (char)type, precision, flags,
+ &float_type);
+ if (buf == NULL)
+ goto done;
+ n_digits = strlen(buf);
+
+ if (add_pct) {
+ /* We know that buf has a trailing zero (since we just called
+ strlen() on it), and we don't use that fact any more. So we
+ can just write over the trailing zero. */
+ buf[n_digits] = '%';
+ n_digits += 1;
+ }
+
+ /* Since there is no unicode version of PyOS_double_to_string,
+ just use the 8 bit version and then convert to unicode. */
+#if STRINGLIB_IS_UNICODE
+ unicode_tmp = (Py_UNICODE*)PyMem_Malloc((n_digits)*sizeof(Py_UNICODE));
+ if (unicode_tmp == NULL) {
+ PyErr_NoMemory();
+ goto done;
+ }
+ strtounicode(unicode_tmp, buf, n_digits);
+ p = unicode_tmp;
+#else
+ p = buf;
+#endif
+
+ /* Is a sign character present in the output? If so, remember it
+ and skip it */
+ if (*p == '-') {
+ sign_char = *p;
+ ++p;
+ --n_digits;
+ }
+
+ /* Determine if we have any "remainder" (after the digits, might include
+ decimal or exponent or both (or neither)) */
+ parse_number(p, n_digits, &n_remainder, &has_decimal);
+
+ /* Determine the grouping, separator, and decimal point, if any. */
+ get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
+ (format->thousands_separators ?
+ LT_DEFAULT_LOCALE :
+ LT_NO_LOCALE),
+ &locale);
+
+ /* Calculate how much memory we'll need. */
+ n_total = calc_number_widths(&spec, 0, sign_char, p, n_digits,
+ n_remainder, has_decimal, &locale, format);
+
+ /* Allocate the memory. */
+ result = STRINGLIB_NEW(NULL, n_total);
+ if (result == NULL)
+ goto done;
+
+ /* Populate the memory. */
+ fill_number(STRINGLIB_STR(result), &spec, p, n_digits, NULL,
+ format->fill_char, &locale, 0);
+
+done:
+ PyMem_Free(buf);
+#if STRINGLIB_IS_UNICODE
+ PyMem_Free(unicode_tmp);
+#endif
+ return result;
+}
+#endif /* FORMAT_FLOAT */
+
+/************************************************************************/
+/*********** complex formatting *****************************************/
+/************************************************************************/
+
+#ifdef FORMAT_COMPLEX
+
+static PyObject *
+format_complex_internal(PyObject *value,
+ const InternalFormatSpec *format)
+{
+ double re;
+ double im;
+ char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
+ char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
+
+ InternalFormatSpec tmp_format = *format;
+ Py_ssize_t n_re_digits;
+ Py_ssize_t n_im_digits;
+ Py_ssize_t n_re_remainder;
+ Py_ssize_t n_im_remainder;
+ Py_ssize_t n_re_total;
+ Py_ssize_t n_im_total;
+ int re_has_decimal;
+ int im_has_decimal;
+ Py_ssize_t precision;
+ Py_ssize_t default_precision = 6;
+ STRINGLIB_CHAR type = format->type;
+ STRINGLIB_CHAR *p_re;
+ STRINGLIB_CHAR *p_im;
+ NumberFieldWidths re_spec;
+ NumberFieldWidths im_spec;
+ int flags = 0;
+ PyObject *result = NULL;
+ STRINGLIB_CHAR *p;
+ STRINGLIB_CHAR re_sign_char = '\0';
+ STRINGLIB_CHAR im_sign_char = '\0';
+ int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
+ int im_float_type;
+ int add_parens = 0;
+ int skip_re = 0;
+ Py_ssize_t lpad;
+ Py_ssize_t rpad;
+ Py_ssize_t total;
+
+#if STRINGLIB_IS_UNICODE
+ Py_UNICODE *re_unicode_tmp = NULL;
+ Py_UNICODE *im_unicode_tmp = NULL;
+#endif
+
+ /* Locale settings, either from the actual locale or
+ from a hard-code pseudo-locale */
+ LocaleInfo locale;
+
+ if (format->precision > INT_MAX) {
+ PyErr_SetString(PyExc_ValueError, "precision too big");
+ goto done;
+ }
+ precision = (int)format->precision;
+
+ /* Alternate is not allowed on complex. */
+ if (format->alternate) {
+ PyErr_SetString(PyExc_ValueError,
+ "Alternate form (#) not allowed in complex format "
+ "specifier");
+ goto done;
+ }
+
+ /* Neither is zero pading. */
+ if (format->fill_char == '0') {
+ PyErr_SetString(PyExc_ValueError,
+ "Zero padding is not allowed in complex format "
+ "specifier");
+ goto done;
+ }
+
+ /* Neither is '=' alignment . */
+ if (format->align == '=') {
+ PyErr_SetString(PyExc_ValueError,
+ "'=' alignment flag is not allowed in complex format "
+ "specifier");
+ goto done;
+ }
+
+ re = PyComplex_RealAsDouble(value);
+ if (re == -1.0 && PyErr_Occurred())
+ goto done;
+ im = PyComplex_ImagAsDouble(value);
+ if (im == -1.0 && PyErr_Occurred())
+ goto done;
+
+ if (type == '\0') {
+ /* Omitted type specifier. Should be like str(self). */
+ type = 'g';
+ default_precision = PyFloat_STR_PRECISION;
+ if (re == 0.0 && copysign(1.0, re) == 1.0)
+ skip_re = 1;
+ else
+ add_parens = 1;
+ }
+
+ if (type == 'n')
+ /* 'n' is the same as 'g', except for the locale used to
+ format the result. We take care of that later. */
+ type = 'g';
+
+ if (precision < 0)
+ precision = default_precision;
+
+ /* Cast "type", because if we're in unicode we need to pass an
+ 8-bit char. This is safe, because we've restricted what "type"
+ can be. */
+ re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
+ &re_float_type);
+ if (re_buf == NULL)
+ goto done;
+ im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
+ &im_float_type);
+ if (im_buf == NULL)
+ goto done;
+
+ n_re_digits = strlen(re_buf);
+ n_im_digits = strlen(im_buf);
+
+ /* Since there is no unicode version of PyOS_double_to_string,
+ just use the 8 bit version and then convert to unicode. */
+#if STRINGLIB_IS_UNICODE
+ re_unicode_tmp = (Py_UNICODE*)PyMem_Malloc((n_re_digits)*sizeof(Py_UNICODE));
+ if (re_unicode_tmp == NULL) {
+ PyErr_NoMemory();
+ goto done;
+ }
+ strtounicode(re_unicode_tmp, re_buf, n_re_digits);
+ p_re = re_unicode_tmp;
+
+ im_unicode_tmp = (Py_UNICODE*)PyMem_Malloc((n_im_digits)*sizeof(Py_UNICODE));
+ if (im_unicode_tmp == NULL) {
+ PyErr_NoMemory();
+ goto done;
+ }
+ strtounicode(im_unicode_tmp, im_buf, n_im_digits);
+ p_im = im_unicode_tmp;
+#else
+ p_re = re_buf;
+ p_im = im_buf;
+#endif
+
+ /* Is a sign character present in the output? If so, remember it
+ and skip it */
+ if (*p_re == '-') {
+ re_sign_char = *p_re;
+ ++p_re;
+ --n_re_digits;
+ }
+ if (*p_im == '-') {
+ im_sign_char = *p_im;
+ ++p_im;
+ --n_im_digits;
+ }
+
+ /* Determine if we have any "remainder" (after the digits, might include
+ decimal or exponent or both (or neither)) */
+ parse_number(p_re, n_re_digits, &n_re_remainder, &re_has_decimal);
+ parse_number(p_im, n_im_digits, &n_im_remainder, &im_has_decimal);
+
+ /* Determine the grouping, separator, and decimal point, if any. */
+ get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
+ (format->thousands_separators ?
+ LT_DEFAULT_LOCALE :
+ LT_NO_LOCALE),
+ &locale);
+
+ /* Turn off any padding. We'll do it later after we've composed
+ the numbers without padding. */
+ tmp_format.fill_char = '\0';
+ tmp_format.align = '<';
+ tmp_format.width = -1;
+
+ /* Calculate how much memory we'll need. */
+ n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, p_re,
+ n_re_digits, n_re_remainder,
+ re_has_decimal, &locale, &tmp_format);
+
+ /* Same formatting, but always include a sign, unless the real part is
+ * going to be omitted, in which case we use whatever sign convention was
+ * requested by the original format. */
+ if (!skip_re)
+ tmp_format.sign = '+';
+ n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, p_im,
+ n_im_digits, n_im_remainder,
+ im_has_decimal, &locale, &tmp_format);
+
+ if (skip_re)
+ n_re_total = 0;
+
+ /* Add 1 for the 'j', and optionally 2 for parens. */
+ calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
+ format->width, format->align, &lpad, &rpad, &total);
+
+ result = STRINGLIB_NEW(NULL, total);
+ if (result == NULL)
+ goto done;
+
+ /* Populate the memory. First, the padding. */
+ p = fill_padding(STRINGLIB_STR(result),
+ n_re_total + n_im_total + 1 + add_parens * 2,
+ format->fill_char, lpad, rpad);
+
+ if (add_parens)
+ *p++ = '(';
+
+ if (!skip_re) {
+ fill_number(p, &re_spec, p_re, n_re_digits, NULL, 0, &locale, 0);
+ p += n_re_total;
+ }
+ fill_number(p, &im_spec, p_im, n_im_digits, NULL, 0, &locale, 0);
+ p += n_im_total;
+ *p++ = 'j';
+
+ if (add_parens)
+ *p++ = ')';
+
+done:
+ PyMem_Free(re_buf);
+ PyMem_Free(im_buf);
+#if STRINGLIB_IS_UNICODE
+ PyMem_Free(re_unicode_tmp);
+ PyMem_Free(im_unicode_tmp);
+#endif
+ return result;
+}
+#endif /* FORMAT_COMPLEX */
+
+/************************************************************************/
+/*********** built in formatters ****************************************/
+/************************************************************************/
+PyObject *
+FORMAT_STRING(PyObject *obj,
+ STRINGLIB_CHAR *format_spec,
+ Py_ssize_t format_spec_len)
+{
+ InternalFormatSpec format;
+ PyObject *result = NULL;
+
+ /* check for the special case of zero length format spec, make
+ it equivalent to str(obj) */
+ if (format_spec_len == 0) {
+ result = STRINGLIB_TOSTR(obj);
+ goto done;
+ }
+
+ /* parse the format_spec */
+ if (!parse_internal_render_format_spec(format_spec, format_spec_len,
+ &format, 's', '<'))
+ goto done;
+
+ /* type conversion? */
+ switch (format.type) {
+ case 's':
+ /* no type conversion needed, already a string. do the formatting */
+ result = format_string_internal(obj, &format);
+ break;
+ default:
+ /* unknown */
+ unknown_presentation_type(format.type, obj->ob_type->tp_name);
+ goto done;
+ }
+
+done:
+ return result;
+}
+
+#if defined FORMAT_LONG || defined FORMAT_INT
+static PyObject*
+format_int_or_long(PyObject* obj,
+ STRINGLIB_CHAR *format_spec,
+ Py_ssize_t format_spec_len,
+ IntOrLongToString tostring)
+{
+ PyObject *result = NULL;
+ PyObject *tmp = NULL;
+ InternalFormatSpec format;
+
+ /* check for the special case of zero length format spec, make
+ it equivalent to str(obj) */
+ if (format_spec_len == 0) {
+ result = STRINGLIB_TOSTR(obj);
+ goto done;
+ }
+
+ /* parse the format_spec */
+ if (!parse_internal_render_format_spec(format_spec,
+ format_spec_len,
+ &format, 'd', '>'))
+ goto done;
+
+ /* type conversion? */
+ switch (format.type) {
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'o':
+ case 'x':
+ case 'X':
+ case 'n':
+ /* no type conversion needed, already an int (or long). do
+ the formatting */
+ result = format_int_or_long_internal(obj, &format, tostring);
+ break;
+
+ case 'e':
+ case 'E':
+ case 'f':
+ case 'F':
+ case 'g':
+ case 'G':
+ case '%':
+ /* convert to float */
+ tmp = PyNumber_Float(obj);
+ if (tmp == NULL)
+ goto done;
+ result = format_float_internal(tmp, &format);
+ break;
+
+ default:
+ /* unknown */
+ unknown_presentation_type(format.type, obj->ob_type->tp_name);
+ goto done;
+ }
+
+done:
+ Py_XDECREF(tmp);
+ return result;
+}
+#endif /* FORMAT_LONG || defined FORMAT_INT */
+
+#ifdef FORMAT_LONG
+/* Need to define long_format as a function that will convert a long
+ to a string. In 3.0, _PyLong_Format has the correct signature. In
+ 2.x, we need to fudge a few parameters */
+#if PY_VERSION_HEX >= 0x03000000
+#define long_format _PyLong_Format
+#else
+static PyObject*
+long_format(PyObject* value, int base)
+{
+ /* Convert to base, don't add trailing 'L', and use the new octal
+ format. We already know this is a long object */
+ assert(PyLong_Check(value));
+ /* convert to base, don't add 'L', and use the new octal format */
+ return _PyLong_Format(value, base, 0, 1);
+}
+#endif
+
+PyObject *
+FORMAT_LONG(PyObject *obj,
+ STRINGLIB_CHAR *format_spec,
+ Py_ssize_t format_spec_len)
+{
+ return format_int_or_long(obj, format_spec, format_spec_len,
+ long_format);
+}
+#endif /* FORMAT_LONG */
+
+#ifdef FORMAT_INT
+/* this is only used for 2.x, not 3.0 */
+static PyObject*
+int_format(PyObject* value, int base)
+{
+ /* Convert to base, and use the new octal format. We already
+ know this is an int object */
+ assert(PyInt_Check(value));
+ return _PyInt_Format((PyIntObject*)value, base, 1);
+}
+
+PyObject *
+FORMAT_INT(PyObject *obj,
+ STRINGLIB_CHAR *format_spec,
+ Py_ssize_t format_spec_len)
+{
+ return format_int_or_long(obj, format_spec, format_spec_len,
+ int_format);
+}
+#endif /* FORMAT_INT */
+
+#ifdef FORMAT_FLOAT
+PyObject *
+FORMAT_FLOAT(PyObject *obj,
+ STRINGLIB_CHAR *format_spec,
+ Py_ssize_t format_spec_len)
+{
+ PyObject *result = NULL;
+ InternalFormatSpec format;
+
+ /* check for the special case of zero length format spec, make
+ it equivalent to str(obj) */
+ if (format_spec_len == 0) {
+ result = STRINGLIB_TOSTR(obj);
+ goto done;
+ }
+
+ /* parse the format_spec */
+ if (!parse_internal_render_format_spec(format_spec,
+ format_spec_len,
+ &format, '\0', '>'))
+ goto done;
+
+ /* type conversion? */
+ switch (format.type) {
+ case '\0': /* No format code: like 'g', but with at least one decimal. */
+ case 'e':
+ case 'E':
+ case 'f':
+ case 'F':
+ case 'g':
+ case 'G':
+ case 'n':
+ case '%':
+ /* no conversion, already a float. do the formatting */
+ result = format_float_internal(obj, &format);
+ break;
+
+ default:
+ /* unknown */
+ unknown_presentation_type(format.type, obj->ob_type->tp_name);
+ goto done;
+ }
+
+done:
+ return result;
+}
+#endif /* FORMAT_FLOAT */
+
+#ifdef FORMAT_COMPLEX
+PyObject *
+FORMAT_COMPLEX(PyObject *obj,
+ STRINGLIB_CHAR *format_spec,
+ Py_ssize_t format_spec_len)
+{
+ PyObject *result = NULL;
+ InternalFormatSpec format;
+
+ /* check for the special case of zero length format spec, make
+ it equivalent to str(obj) */
+ if (format_spec_len == 0) {
+ result = STRINGLIB_TOSTR(obj);
+ goto done;
+ }
+
+ /* parse the format_spec */
+ if (!parse_internal_render_format_spec(format_spec,
+ format_spec_len,
+ &format, '\0', '>'))
+ goto done;
+
+ /* type conversion? */
+ switch (format.type) {
+ case '\0': /* No format code: like 'g', but with at least one decimal. */
+ case 'e':
+ case 'E':
+ case 'f':
+ case 'F':
+ case 'g':
+ case 'G':
+ case 'n':
+ /* no conversion, already a complex. do the formatting */
+ result = format_complex_internal(obj, &format);
+ break;
+
+ default:
+ /* unknown */
+ unknown_presentation_type(format.type, obj->ob_type->tp_name);
+ goto done;
+ }
+
+done:
+ return result;
+}
+#endif /* FORMAT_COMPLEX */
diff --git a/Objects/stringlib/join.h b/Objects/stringlib/join.h
deleted file mode 100644
index 6f314e1..0000000
--- a/Objects/stringlib/join.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/* stringlib: bytes joining implementation */
-
-#if STRINGLIB_IS_UNICODE
-#error join.h only compatible with byte-wise strings
-#endif
-
-Py_LOCAL_INLINE(PyObject *)
-STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
-{
- char *sepstr = STRINGLIB_STR(sep);
- const Py_ssize_t seplen = STRINGLIB_LEN(sep);
- PyObject *res = NULL;
- char *p;
- Py_ssize_t seqlen = 0;
- Py_ssize_t sz = 0;
- Py_ssize_t i, nbufs;
- PyObject *seq, *item;
- Py_buffer *buffers = NULL;
-#define NB_STATIC_BUFFERS 10
- Py_buffer static_buffers[NB_STATIC_BUFFERS];
-
- seq = PySequence_Fast(iterable, "can only join an iterable");
- if (seq == NULL) {
- return NULL;
- }
-
- seqlen = PySequence_Fast_GET_SIZE(seq);
- if (seqlen == 0) {
- Py_DECREF(seq);
- return STRINGLIB_NEW(NULL, 0);
- }
-#ifndef STRINGLIB_MUTABLE
- if (seqlen == 1) {
- item = PySequence_Fast_GET_ITEM(seq, 0);
- if (STRINGLIB_CHECK_EXACT(item)) {
- Py_INCREF(item);
- Py_DECREF(seq);
- return item;
- }
- }
-#endif
- if (seqlen > NB_STATIC_BUFFERS) {
- buffers = PyMem_NEW(Py_buffer, seqlen);
- if (buffers == NULL) {
- Py_DECREF(seq);
- PyErr_NoMemory();
- return NULL;
- }
- }
- else {
- buffers = static_buffers;
- }
-
- /* Here is the general case. Do a pre-pass to figure out the total
- * amount of space we'll need (sz), and see whether all arguments are
- * bytes-like.
- */
- for (i = 0, nbufs = 0; i < seqlen; i++) {
- Py_ssize_t itemlen;
- item = PySequence_Fast_GET_ITEM(seq, i);
- if (PyBytes_CheckExact(item)) {
- /* Fast path. */
- Py_INCREF(item);
- buffers[i].obj = item;
- buffers[i].buf = PyBytes_AS_STRING(item);
- buffers[i].len = PyBytes_GET_SIZE(item);
- }
- else if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) {
- PyErr_Format(PyExc_TypeError,
- "sequence item %zd: expected a bytes-like object, "
- "%.80s found",
- i, Py_TYPE(item)->tp_name);
- goto error;
- }
- nbufs = i + 1; /* for error cleanup */
- itemlen = buffers[i].len;
- if (itemlen > PY_SSIZE_T_MAX - sz) {
- PyErr_SetString(PyExc_OverflowError,
- "join() result is too long");
- goto error;
- }
- sz += itemlen;
- if (i != 0) {
- if (seplen > PY_SSIZE_T_MAX - sz) {
- PyErr_SetString(PyExc_OverflowError,
- "join() result is too long");
- goto error;
- }
- sz += seplen;
- }
- if (seqlen != PySequence_Fast_GET_SIZE(seq)) {
- PyErr_SetString(PyExc_RuntimeError,
- "sequence changed size during iteration");
- goto error;
- }
- }
-
- /* Allocate result space. */
- res = STRINGLIB_NEW(NULL, sz);
- if (res == NULL)
- goto error;
-
- /* Catenate everything. */
- p = STRINGLIB_STR(res);
- if (!seplen) {
- /* fast path */
- for (i = 0; i < nbufs; i++) {
- Py_ssize_t n = buffers[i].len;
- char *q = buffers[i].buf;
- memcpy(p, q, n);
- p += n;
- }
- goto done;
- }
- for (i = 0; i < nbufs; i++) {
- Py_ssize_t n;
- char *q;
- if (i) {
- memcpy(p, sepstr, seplen);
- p += seplen;
- }
- n = buffers[i].len;
- q = buffers[i].buf;
- memcpy(p, q, n);
- p += n;
- }
- goto done;
-
-error:
- res = NULL;
-done:
- Py_DECREF(seq);
- for (i = 0; i < nbufs; i++)
- PyBuffer_Release(&buffers[i]);
- if (buffers != static_buffers)
- PyMem_FREE(buffers);
- return res;
-}
-
-#undef NB_STATIC_BUFFERS
diff --git a/Objects/stringlib/localeutil.h b/Objects/stringlib/localeutil.h
index bd16e0a..f548133 100644
--- a/Objects/stringlib/localeutil.h
+++ b/Objects/stringlib/localeutil.h
@@ -1,4 +1,12 @@
-/* _PyUnicode_InsertThousandsGrouping() helper functions */
+/* stringlib: locale related helpers implementation */
+
+#ifndef STRINGLIB_LOCALEUTIL_H
+#define STRINGLIB_LOCALEUTIL_H
+
+#include <locale.h>
+
+#define MAX(x, y) ((x) < (y) ? (y) : (x))
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
typedef struct {
const char *grouping;
@@ -6,19 +14,17 @@ typedef struct {
Py_ssize_t i; /* Where we're currently pointing in grouping. */
} GroupGenerator;
-
static void
-GroupGenerator_init(GroupGenerator *self, const char *grouping)
+_GroupGenerator_init(GroupGenerator *self, const char *grouping)
{
self->grouping = grouping;
self->i = 0;
self->previous = 0;
}
-
/* Returns the next grouping, or 0 to signify end. */
static Py_ssize_t
-GroupGenerator_next(GroupGenerator *self)
+_GroupGenerator_next(GroupGenerator *self)
{
/* Note that we don't really do much error checking here. If a
grouping string contains just CHAR_MAX, for example, then just
@@ -39,44 +45,168 @@ GroupGenerator_next(GroupGenerator *self)
}
}
-
/* Fill in some digits, leading zeros, and thousands separator. All
are optional, depending on when we're called. */
static void
-InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos,
- PyObject *digits, Py_ssize_t *digits_pos,
- Py_ssize_t n_chars, Py_ssize_t n_zeros,
- PyObject *thousands_sep, Py_ssize_t thousands_sep_len,
- Py_UCS4 *maxchar)
+fill(STRINGLIB_CHAR **digits_end, STRINGLIB_CHAR **buffer_end,
+ Py_ssize_t n_chars, Py_ssize_t n_zeros, const char* thousands_sep,
+ Py_ssize_t thousands_sep_len)
{
- if (!writer) {
- /* if maxchar > 127, maxchar is already set */
- if (*maxchar == 127 && thousands_sep) {
- Py_UCS4 maxchar2 = PyUnicode_MAX_CHAR_VALUE(thousands_sep);
- *maxchar = Py_MAX(*maxchar, maxchar2);
- }
- return;
- }
+#if STRINGLIB_IS_UNICODE
+ Py_ssize_t i;
+#endif
if (thousands_sep) {
- *buffer_pos -= thousands_sep_len;
+ *buffer_end -= thousands_sep_len;
/* Copy the thousands_sep chars into the buffer. */
- _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
- thousands_sep, 0,
- thousands_sep_len);
+#if STRINGLIB_IS_UNICODE
+ /* Convert from the char's of the thousands_sep from
+ the locale into unicode. */
+ for (i = 0; i < thousands_sep_len; ++i)
+ (*buffer_end)[i] = thousands_sep[i];
+#else
+ /* No conversion, just memcpy the thousands_sep. */
+ memcpy(*buffer_end, thousands_sep, thousands_sep_len);
+#endif
}
- *buffer_pos -= n_chars;
- *digits_pos -= n_chars;
- _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
- digits, *digits_pos,
- n_chars);
-
- if (n_zeros) {
- *buffer_pos -= n_zeros;
- enum PyUnicode_Kind kind = PyUnicode_KIND(writer->buffer);
- void *data = PyUnicode_DATA(writer->buffer);
- unicode_fill(kind, data, '0', *buffer_pos, n_zeros);
+ *buffer_end -= n_chars;
+ *digits_end -= n_chars;
+ memcpy(*buffer_end, *digits_end, n_chars * sizeof(STRINGLIB_CHAR));
+
+ *buffer_end -= n_zeros;
+ STRINGLIB_FILL(*buffer_end, '0', n_zeros);
+}
+
+/**
+ * _Py_InsertThousandsGrouping:
+ * @buffer: A pointer to the start of a string.
+ * @n_buffer: Number of characters in @buffer.
+ * @digits: A pointer to the digits we're reading from. If count
+ * is non-NULL, this is unused.
+ * @n_digits: The number of digits in the string, in which we want
+ * to put the grouping chars.
+ * @min_width: The minimum width of the digits in the output string.
+ * Output will be zero-padded on the left to fill.
+ * @grouping: see definition in localeconv().
+ * @thousands_sep: see definition in localeconv().
+ *
+ * There are 2 modes: counting and filling. If @buffer is NULL,
+ * we are in counting mode, else filling mode.
+ * If counting, the required buffer size is returned.
+ * If filling, we know the buffer will be large enough, so we don't
+ * need to pass in the buffer size.
+ * Inserts thousand grouping characters (as defined by grouping and
+ * thousands_sep) into the string between buffer and buffer+n_digits.
+ *
+ * Return value: 0 on error, else 1. Note that no error can occur if
+ * count is non-NULL.
+ *
+ * This name won't be used, the includer of this file should define
+ * it to be the actual function name, based on unicode or string.
+ *
+ * As closely as possible, this code mimics the logic in decimal.py's
+ _insert_thousands_sep().
+ **/
+Py_ssize_t
+_Py_InsertThousandsGrouping(STRINGLIB_CHAR *buffer,
+ Py_ssize_t n_buffer,
+ STRINGLIB_CHAR *digits,
+ Py_ssize_t n_digits,
+ Py_ssize_t min_width,
+ const char *grouping,
+ const char *thousands_sep)
+{
+ Py_ssize_t count = 0;
+ Py_ssize_t n_zeros;
+ int loop_broken = 0;
+ int use_separator = 0; /* First time through, don't append the
+ separator. They only go between
+ groups. */
+ STRINGLIB_CHAR *buffer_end = NULL;
+ STRINGLIB_CHAR *digits_end = NULL;
+ Py_ssize_t l;
+ Py_ssize_t n_chars;
+ Py_ssize_t thousands_sep_len = strlen(thousands_sep);
+ Py_ssize_t remaining = n_digits; /* Number of chars remaining to
+ be looked at */
+ /* A generator that returns all of the grouping widths, until it
+ returns 0. */
+ GroupGenerator groupgen;
+ _GroupGenerator_init(&groupgen, grouping);
+
+ if (buffer) {
+ buffer_end = buffer + n_buffer;
+ digits_end = digits + n_digits;
}
+
+ while ((l = _GroupGenerator_next(&groupgen)) > 0) {
+ l = MIN(l, MAX(MAX(remaining, min_width), 1));
+ n_zeros = MAX(0, l - remaining);
+ n_chars = MAX(0, MIN(remaining, l));
+
+ /* Use n_zero zero's and n_chars chars */
+
+ /* Count only, don't do anything. */
+ count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
+
+ if (buffer) {
+ /* Copy into the output buffer. */
+ fill(&digits_end, &buffer_end, n_chars, n_zeros,
+ use_separator ? thousands_sep : NULL, thousands_sep_len);
+ }
+
+ /* Use a separator next time. */
+ use_separator = 1;
+
+ remaining -= n_chars;
+ min_width -= l;
+
+ if (remaining <= 0 && min_width <= 0) {
+ loop_broken = 1;
+ break;
+ }
+ min_width -= thousands_sep_len;
+ }
+ if (!loop_broken) {
+ /* We left the loop without using a break statement. */
+
+ l = MAX(MAX(remaining, min_width), 1);
+ n_zeros = MAX(0, l - remaining);
+ n_chars = MAX(0, MIN(remaining, l));
+
+ /* Use n_zero zero's and n_chars chars */
+ count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
+ if (buffer) {
+ /* Copy into the output buffer. */
+ fill(&digits_end, &buffer_end, n_chars, n_zeros,
+ use_separator ? thousands_sep : NULL, thousands_sep_len);
+ }
+ }
+ return count;
+}
+
+/**
+ * _Py_InsertThousandsGroupingLocale:
+ * @buffer: A pointer to the start of a string.
+ * @n_digits: The number of digits in the string, in which we want
+ * to put the grouping chars.
+ *
+ * Reads thee current locale and calls _Py_InsertThousandsGrouping().
+ **/
+Py_ssize_t
+_Py_InsertThousandsGroupingLocale(STRINGLIB_CHAR *buffer,
+ Py_ssize_t n_buffer,
+ STRINGLIB_CHAR *digits,
+ Py_ssize_t n_digits,
+ Py_ssize_t min_width)
+{
+ struct lconv *locale_data = localeconv();
+ const char *grouping = locale_data->grouping;
+ const char *thousands_sep = locale_data->thousands_sep;
+
+ return _Py_InsertThousandsGrouping(buffer, n_buffer, digits, n_digits,
+ min_width, grouping, thousands_sep);
}
+#endif /* STRINGLIB_LOCALEUTIL_H */
diff --git a/Objects/stringlib/partition.h b/Objects/stringlib/partition.h
index ed32a6f..0170bdd 100644
--- a/Objects/stringlib/partition.h
+++ b/Objects/stringlib/partition.h
@@ -1,11 +1,14 @@
/* stringlib: partition implementation */
+#ifndef STRINGLIB_PARTITION_H
+#define STRINGLIB_PARTITION_H
+
#ifndef STRINGLIB_FASTSEARCH_H
#error must include "stringlib/fastsearch.h" before including this module
#endif
Py_LOCAL_INLINE(PyObject*)
-STRINGLIB(partition)(PyObject* str_obj,
+stringlib_partition(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
PyObject* sep_obj,
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len)
@@ -22,18 +25,13 @@ STRINGLIB(partition)(PyObject* str_obj,
if (!out)
return NULL;
- pos = FASTSEARCH(str, str_len, sep, sep_len, -1, FAST_SEARCH);
+ pos = fastsearch(str, str_len, sep, sep_len, -1, FAST_SEARCH);
if (pos < 0) {
#if STRINGLIB_MUTABLE
PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, str_len));
PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0));
PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(NULL, 0));
-
- if (PyErr_Occurred()) {
- Py_DECREF(out);
- return NULL;
- }
#else
Py_INCREF(str_obj);
PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj);
@@ -60,7 +58,7 @@ STRINGLIB(partition)(PyObject* str_obj,
}
Py_LOCAL_INLINE(PyObject*)
-STRINGLIB(rpartition)(PyObject* str_obj,
+stringlib_rpartition(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
PyObject* sep_obj,
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len)
@@ -77,18 +75,13 @@ STRINGLIB(rpartition)(PyObject* str_obj,
if (!out)
return NULL;
- pos = FASTSEARCH(str, str_len, sep, sep_len, -1, FAST_RSEARCH);
+ pos = fastsearch(str, str_len, sep, sep_len, -1, FAST_RSEARCH);
if (pos < 0) {
#if STRINGLIB_MUTABLE
PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(NULL, 0));
PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0));
PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str, str_len));
-
- if (PyErr_Occurred()) {
- Py_DECREF(out);
- return NULL;
- }
#else
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 0, (PyObject*) STRINGLIB_EMPTY);
@@ -114,3 +107,4 @@ STRINGLIB(rpartition)(PyObject* str_obj,
return out;
}
+#endif
diff --git a/Objects/stringlib/replace.h b/Objects/stringlib/replace.h
deleted file mode 100644
index ef318ed..0000000
--- a/Objects/stringlib/replace.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/* stringlib: replace implementation */
-
-#ifndef STRINGLIB_FASTSEARCH_H
-#error must include "stringlib/fastsearch.h" before including this module
-#endif
-
-Py_LOCAL_INLINE(void)
-STRINGLIB(replace_1char_inplace)(STRINGLIB_CHAR* s, STRINGLIB_CHAR* end,
- Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount)
-{
- *s = u2;
- while (--maxcount && ++s != end) {
- /* Find the next character to be replaced.
-
- If it occurs often, it is faster to scan for it using an inline
- loop. If it occurs seldom, it is faster to scan for it using a
- function call; the overhead of the function call is amortized
- across the many characters that call covers. We start with an
- inline loop and use a heuristic to determine whether to fall back
- to a function call. */
- if (*s != u1) {
- int attempts = 10;
- /* search u1 in a dummy loop */
- while (1) {
- if (++s == end)
- return;
- if (*s == u1)
- break;
- if (!--attempts) {
- /* if u1 was not found for attempts iterations,
- use FASTSEARCH() or memchr() */
-#if STRINGLIB_SIZEOF_CHAR == 1
- s++;
- s = memchr(s, u1, end - s);
- if (s == NULL)
- return;
-#else
- Py_ssize_t i;
- STRINGLIB_CHAR ch1 = (STRINGLIB_CHAR) u1;
- s++;
- i = FASTSEARCH(s, end - s, &ch1, 1, 0, FAST_SEARCH);
- if (i < 0)
- return;
- s += i;
-#endif
- /* restart the dummy loop */
- break;
- }
- }
- }
- *s = u2;
- }
-}
diff --git a/Objects/stringlib/split.h b/Objects/stringlib/split.h
index 31f77a7..60e7767 100644
--- a/Objects/stringlib/split.h
+++ b/Objects/stringlib/split.h
@@ -1,5 +1,8 @@
/* stringlib: split implementation */
+#ifndef STRINGLIB_SPLIT_H
+#define STRINGLIB_SPLIT_H
+
#ifndef STRINGLIB_FASTSEARCH_H
#error must include "stringlib/fastsearch.h" before including this module
#endif
@@ -51,7 +54,7 @@
#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Py_LOCAL_INLINE(PyObject *)
-STRINGLIB(split_whitespace)(PyObject* str_obj,
+stringlib_split_whitespace(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
Py_ssize_t maxcount)
{
@@ -99,7 +102,7 @@ STRINGLIB(split_whitespace)(PyObject* str_obj,
}
Py_LOCAL_INLINE(PyObject *)
-STRINGLIB(split_char)(PyObject* str_obj,
+stringlib_split_char(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR ch,
Py_ssize_t maxcount)
@@ -142,7 +145,7 @@ STRINGLIB(split_char)(PyObject* str_obj,
}
Py_LOCAL_INLINE(PyObject *)
-STRINGLIB(split)(PyObject* str_obj,
+stringlib_split(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
Py_ssize_t maxcount)
@@ -155,7 +158,7 @@ STRINGLIB(split)(PyObject* str_obj,
return NULL;
}
else if (sep_len == 1)
- return STRINGLIB(split_char)(str_obj, str, str_len, sep[0], maxcount);
+ return stringlib_split_char(str_obj, str, str_len, sep[0], maxcount);
list = PyList_New(PREALLOC_SIZE(maxcount));
if (list == NULL)
@@ -163,7 +166,7 @@ STRINGLIB(split)(PyObject* str_obj,
i = j = 0;
while (maxcount-- > 0) {
- pos = FASTSEARCH(str+i, str_len-i, sep, sep_len, -1, FAST_SEARCH);
+ pos = fastsearch(str+i, str_len-i, sep, sep_len, -1, FAST_SEARCH);
if (pos < 0)
break;
j = i + pos;
@@ -190,7 +193,7 @@ STRINGLIB(split)(PyObject* str_obj,
}
Py_LOCAL_INLINE(PyObject *)
-STRINGLIB(rsplit_whitespace)(PyObject* str_obj,
+stringlib_rsplit_whitespace(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
Py_ssize_t maxcount)
{
@@ -240,7 +243,7 @@ STRINGLIB(rsplit_whitespace)(PyObject* str_obj,
}
Py_LOCAL_INLINE(PyObject *)
-STRINGLIB(rsplit_char)(PyObject* str_obj,
+stringlib_rsplit_char(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR ch,
Py_ssize_t maxcount)
@@ -284,7 +287,7 @@ STRINGLIB(rsplit_char)(PyObject* str_obj,
}
Py_LOCAL_INLINE(PyObject *)
-STRINGLIB(rsplit)(PyObject* str_obj,
+stringlib_rsplit(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
Py_ssize_t maxcount)
@@ -297,7 +300,7 @@ STRINGLIB(rsplit)(PyObject* str_obj,
return NULL;
}
else if (sep_len == 1)
- return STRINGLIB(rsplit_char)(str_obj, str, str_len, sep[0], maxcount);
+ return stringlib_rsplit_char(str_obj, str, str_len, sep[0], maxcount);
list = PyList_New(PREALLOC_SIZE(maxcount));
if (list == NULL)
@@ -305,7 +308,7 @@ STRINGLIB(rsplit)(PyObject* str_obj,
j = str_len;
while (maxcount-- > 0) {
- pos = FASTSEARCH(str, j, sep, sep_len, -1, FAST_RSEARCH);
+ pos = fastsearch(str, j, sep, sep_len, -1, FAST_RSEARCH);
if (pos < 0)
break;
SPLIT_ADD(str, pos + sep_len, j);
@@ -333,7 +336,7 @@ STRINGLIB(rsplit)(PyObject* str_obj,
}
Py_LOCAL_INLINE(PyObject *)
-STRINGLIB(splitlines)(PyObject* str_obj,
+stringlib_splitlines(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
int keepends)
{
@@ -345,8 +348,8 @@ STRINGLIB(splitlines)(PyObject* str_obj,
and the appends only done when the prealloc buffer is full.
That's too much work for little gain.*/
- Py_ssize_t i;
- Py_ssize_t j;
+ register Py_ssize_t i;
+ register Py_ssize_t j;
PyObject *list = PyList_New(0);
PyObject *sub;
@@ -388,3 +391,4 @@ STRINGLIB(splitlines)(PyObject* str_obj,
return NULL;
}
+#endif
diff --git a/Objects/stringlib/unicode_format.h b/Objects/stringlib/string_format.h
index b526ad2..2bd1839 100644
--- a/Objects/stringlib/unicode_format.h
+++ b/Objects/stringlib/string_format.h
@@ -1,7 +1,22 @@
/*
- unicode_format.h -- implementation of str.format().
+ string_format.h -- implementation of string.format().
+
+ It uses the Objects/stringlib conventions, so that it can be
+ compiled for both unicode and string objects.
*/
+
+/* Defines for Python 2.6 compatibility */
+#if PY_VERSION_HEX < 0x03000000
+#define PyLong_FromSsize_t _PyLong_FromSsize_t
+#endif
+
+/* Defines for more efficiently reallocating the string buffer */
+#define INITIAL_SIZE_INCREMENT 100
+#define SIZE_MULTIPLIER 2
+#define MAX_SIZE_INCREMENT 3200
+
+
/************************************************************************/
/*********** Global data structures and forward declarations *********/
/************************************************************************/
@@ -11,8 +26,8 @@
unicode pointers.
*/
typedef struct {
- PyObject *str; /* borrowed reference */
- Py_ssize_t start, end;
+ STRINGLIB_CHAR *ptr;
+ STRINGLIB_CHAR *end;
} SubString;
@@ -49,30 +64,34 @@ AutoNumber_Init(AutoNumber *auto_number)
/* fill in a SubString from a pointer and length */
Py_LOCAL_INLINE(void)
-SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end)
+SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
{
- str->str = s;
- str->start = start;
- str->end = end;
+ str->ptr = p;
+ if (p == NULL)
+ str->end = NULL;
+ else
+ str->end = str->ptr + len;
}
-/* return a new string. if str->str is NULL, return None */
+/* return a new string. if str->ptr is NULL, return None */
Py_LOCAL_INLINE(PyObject *)
SubString_new_object(SubString *str)
{
- if (str->str == NULL)
- Py_RETURN_NONE;
- return PyUnicode_Substring(str->str, str->start, str->end);
+ if (str->ptr == NULL) {
+ Py_INCREF(Py_None);
+ return Py_None;
+ }
+ return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
}
-/* return a new string. if str->str is NULL, return a new empty string */
+/* return a new string. if str->ptr is NULL, return None */
Py_LOCAL_INLINE(PyObject *)
SubString_new_object_or_empty(SubString *str)
{
- if (str->str == NULL) {
- return PyUnicode_New(0, 0);
+ if (str->ptr == NULL) {
+ return STRINGLIB_NEW(NULL, 0);
}
- return SubString_new_object(str);
+ return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
}
/* Return 1 if an error has been detected switching between automatic
@@ -102,6 +121,74 @@ autonumber_state_error(AutoNumberState state, int field_name_is_empty)
/************************************************************************/
+/*********** Output string management functions ****************/
+/************************************************************************/
+
+typedef struct {
+ STRINGLIB_CHAR *ptr;
+ STRINGLIB_CHAR *end;
+ PyObject *obj;
+ Py_ssize_t size_increment;
+} OutputString;
+
+/* initialize an OutputString object, reserving size characters */
+static int
+output_initialize(OutputString *output, Py_ssize_t size)
+{
+ output->obj = STRINGLIB_NEW(NULL, size);
+ if (output->obj == NULL)
+ return 0;
+
+ output->ptr = STRINGLIB_STR(output->obj);
+ output->end = STRINGLIB_LEN(output->obj) + output->ptr;
+ output->size_increment = INITIAL_SIZE_INCREMENT;
+
+ return 1;
+}
+
+/*
+ output_extend reallocates the output string buffer.
+ It returns a status: 0 for a failed reallocation,
+ 1 for success.
+*/
+
+static int
+output_extend(OutputString *output, Py_ssize_t count)
+{
+ STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
+ Py_ssize_t curlen = output->ptr - startptr;
+ Py_ssize_t maxlen = curlen + count + output->size_increment;
+
+ if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
+ return 0;
+ startptr = STRINGLIB_STR(output->obj);
+ output->ptr = startptr + curlen;
+ output->end = startptr + maxlen;
+ if (output->size_increment < MAX_SIZE_INCREMENT)
+ output->size_increment *= SIZE_MULTIPLIER;
+ return 1;
+}
+
+/*
+ output_data dumps characters into our output string
+ buffer.
+
+ In some cases, it has to reallocate the string.
+
+ It returns a status: 0 for a failed reallocation,
+ 1 for success.
+*/
+static int
+output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
+{
+ if ((count > output->end - output->ptr) && !output_extend(output, count))
+ return 0;
+ memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
+ output->ptr += count;
+ return 1;
+}
+
+/************************************************************************/
/*********** Format string parsing -- integers and identifiers *********/
/************************************************************************/
@@ -110,14 +197,14 @@ get_integer(const SubString *str)
{
Py_ssize_t accumulator = 0;
Py_ssize_t digitval;
- Py_ssize_t i;
+ STRINGLIB_CHAR *p;
/* empty string is an error */
- if (str->start >= str->end)
+ if (str->ptr >= str->end)
return -1;
- for (i = str->start; i < str->end; i++) {
- digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i));
+ for (p = str->ptr; p < str->end; p++) {
+ digitval = STRINGLIB_TODECIMAL(*p);
if (digitval < 0)
return -1;
/*
@@ -192,36 +279,34 @@ typedef struct {
lifetime of the iterator. can be empty */
SubString str;
- /* index to where we are inside field_name */
- Py_ssize_t index;
+ /* pointer to where we are inside field_name */
+ STRINGLIB_CHAR *ptr;
} FieldNameIterator;
static int
-FieldNameIterator_init(FieldNameIterator *self, PyObject *s,
- Py_ssize_t start, Py_ssize_t end)
+FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,
+ Py_ssize_t len)
{
- SubString_init(&self->str, s, start, end);
- self->index = start;
+ SubString_init(&self->str, ptr, len);
+ self->ptr = self->str.ptr;
return 1;
}
static int
_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
{
- Py_UCS4 c;
+ STRINGLIB_CHAR c;
- name->str = self->str.str;
- name->start = self->index;
+ name->ptr = self->ptr;
/* return everything until '.' or '[' */
- while (self->index < self->str.end) {
- c = PyUnicode_READ_CHAR(self->str.str, self->index++);
- switch (c) {
+ while (self->ptr < self->str.end) {
+ switch (c = *self->ptr++) {
case '[':
case '.':
/* backup so that we this character will be seen next time */
- self->index--;
+ self->ptr--;
break;
default:
continue;
@@ -229,7 +314,7 @@ _FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
break;
}
/* end of string is okay */
- name->end = self->index;
+ name->end = self->ptr;
return 1;
}
@@ -237,15 +322,13 @@ static int
_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
{
int bracket_seen = 0;
- Py_UCS4 c;
+ STRINGLIB_CHAR c;
- name->str = self->str.str;
- name->start = self->index;
+ name->ptr = self->ptr;
/* return everything until ']' */
- while (self->index < self->str.end) {
- c = PyUnicode_READ_CHAR(self->str.str, self->index++);
- switch (c) {
+ while (self->ptr < self->str.end) {
+ switch (c = *self->ptr++) {
case ']':
bracket_seen = 1;
break;
@@ -262,7 +345,7 @@ _FieldNameIterator_item(FieldNameIterator *self, SubString *name)
/* end of string is okay */
/* don't include the ']' */
- name->end = self->index-1;
+ name->end = self->ptr-1;
return 1;
}
@@ -272,10 +355,10 @@ FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
Py_ssize_t *name_idx, SubString *name)
{
/* check at end of input */
- if (self->index >= self->str.end)
+ if (self->ptr >= self->str.end)
return 1;
- switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) {
+ switch (*self->ptr++) {
case '.':
*is_attribute = 1;
if (_FieldNameIterator_attr(self, name) == 0)
@@ -298,7 +381,7 @@ FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
}
/* empty string is an error */
- if (name->start == name->end) {
+ if (name->ptr == name->end) {
PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
return 0;
}
@@ -314,23 +397,24 @@ FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
'rest' is an iterator to return the rest
*/
static int
-field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first,
+field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
Py_ssize_t *first_idx, FieldNameIterator *rest,
AutoNumber *auto_number)
{
- Py_UCS4 c;
- Py_ssize_t i = start;
+ STRINGLIB_CHAR c;
+ STRINGLIB_CHAR *p = ptr;
+ STRINGLIB_CHAR *end = ptr + len;
int field_name_is_empty;
int using_numeric_index;
/* find the part up until the first '.' or '[' */
- while (i < end) {
- switch (c = PyUnicode_READ_CHAR(str, i++)) {
+ while (p < end) {
+ switch (c = *p++) {
case '[':
case '.':
/* backup so that we this character is available to the
"rest" iterator */
- i--;
+ p--;
break;
default:
continue;
@@ -339,15 +423,15 @@ field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *fir
}
/* set up the return values */
- SubString_init(first, str, start, i);
- FieldNameIterator_init(rest, str, i, end);
+ SubString_init(first, ptr, p - ptr);
+ FieldNameIterator_init(rest, p, end - p);
/* see if "first" is an integer, in which case it's used as an index */
*first_idx = get_integer(first);
if (*first_idx == -1 && PyErr_Occurred())
return 0;
- field_name_is_empty = first->start >= first->end;
+ field_name_is_empty = first->ptr >= first->end;
/* If the field name is omitted or if we have a numeric index
specified, then we're doing numeric indexing into args. */
@@ -402,7 +486,7 @@ get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
Py_ssize_t index;
FieldNameIterator rest;
- if (!field_name_split(input->str, input->start, input->end, &first,
+ if (!field_name_split(input->ptr, input->end - input->ptr, &first,
&index, &rest, auto_number)) {
goto error;
}
@@ -410,43 +494,21 @@ get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
if (index == -1) {
/* look up in kwargs */
PyObject *key = SubString_new_object(&first);
- if (key == NULL) {
+ if (key == NULL)
goto error;
- }
- if (kwargs == NULL) {
+ if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {
PyErr_SetObject(PyExc_KeyError, key);
Py_DECREF(key);
goto error;
}
- /* Use PyObject_GetItem instead of PyDict_GetItem because this
- code is no longer just used with kwargs. It might be passed
- a non-dict when called through format_map. */
- obj = PyObject_GetItem(kwargs, key);
Py_DECREF(key);
- if (obj == NULL) {
- goto error;
- }
+ Py_INCREF(obj);
}
else {
- /* If args is NULL, we have a format string with a positional field
- with only kwargs to retrieve it from. This can only happen when
- used with format_map(), where positional arguments are not
- allowed. */
- if (args == NULL) {
- PyErr_SetString(PyExc_ValueError, "Format string contains "
- "positional fields");
- goto error;
- }
-
/* look up in args */
obj = PySequence_GetItem(args, index);
- if (obj == NULL) {
- PyErr_Format(PyExc_IndexError,
- "Replacement index %zd out of range for positional "
- "args tuple",
- index);
- goto error;
- }
+ if (obj == NULL)
+ goto error;
}
/* iterate over the rest of the field_name */
@@ -495,41 +557,48 @@ error:
appends to the output.
*/
static int
-render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer)
+render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
{
int ok = 0;
PyObject *result = NULL;
PyObject *format_spec_object = NULL;
- int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
- int err;
+ PyObject *(*formatter)(PyObject *, STRINGLIB_CHAR *, Py_ssize_t) = NULL;
+ STRINGLIB_CHAR* format_spec_start = format_spec->ptr ?
+ format_spec->ptr : NULL;
+ Py_ssize_t format_spec_len = format_spec->ptr ?
+ format_spec->end - format_spec->ptr : 0;
/* If we know the type exactly, skip the lookup of __format__ and just
call the formatter directly. */
+#if STRINGLIB_IS_UNICODE
if (PyUnicode_CheckExact(fieldobj))
- formatter = _PyUnicode_FormatAdvancedWriter;
+ formatter = _PyUnicode_FormatAdvanced;
+ /* Unfortunately, there's a problem with checking for int, long,
+ and float here. If we're being included as unicode, their
+ formatters expect string format_spec args. For now, just skip
+ this optimization for unicode. This could be fixed, but it's a
+ hassle. */
+#else
+ if (PyString_CheckExact(fieldobj))
+ formatter = _PyBytes_FormatAdvanced;
+ else if (PyInt_CheckExact(fieldobj))
+ formatter =_PyInt_FormatAdvanced;
else if (PyLong_CheckExact(fieldobj))
- formatter = _PyLong_FormatAdvancedWriter;
+ formatter =_PyLong_FormatAdvanced;
else if (PyFloat_CheckExact(fieldobj))
- formatter = _PyFloat_FormatAdvancedWriter;
- else if (PyComplex_CheckExact(fieldobj))
- formatter = _PyComplex_FormatAdvancedWriter;
+ formatter = _PyFloat_FormatAdvanced;
+#endif
if (formatter) {
/* we know exactly which formatter will be called when __format__ is
looked up, so call it directly, instead. */
- err = formatter(writer, fieldobj, format_spec->str,
- format_spec->start, format_spec->end);
- return (err == 0);
+ result = formatter(fieldobj, format_spec_start, format_spec_len);
}
else {
/* We need to create an object out of the pointers we have, because
__format__ takes a string/unicode object for format_spec. */
- if (format_spec->str)
- format_spec_object = PyUnicode_Substring(format_spec->str,
- format_spec->start,
- format_spec->end);
- else
- format_spec_object = PyUnicode_New(0, 0);
+ format_spec_object = STRINGLIB_NEW(format_spec_start,
+ format_spec_len);
if (format_spec_object == NULL)
goto done;
@@ -538,10 +607,24 @@ render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *write
if (result == NULL)
goto done;
- if (_PyUnicodeWriter_WriteStr(writer, result) == -1)
- goto done;
- ok = 1;
+#if PY_VERSION_HEX >= 0x03000000
+ assert(PyUnicode_Check(result));
+#else
+ assert(PyString_Check(result) || PyUnicode_Check(result));
+
+ /* Convert result to our type. We could be str, and result could
+ be unicode */
+ {
+ PyObject *tmp = STRINGLIB_TOSTR(result);
+ if (tmp == NULL)
+ goto done;
+ Py_DECREF(result);
+ result = tmp;
+ }
+#endif
+ ok = output_data(output,
+ STRINGLIB_STR(result), STRINGLIB_LEN(result));
done:
Py_XDECREF(format_spec_object);
Py_XDECREF(result);
@@ -550,33 +633,23 @@ done:
static int
parse_field(SubString *str, SubString *field_name, SubString *format_spec,
- int *format_spec_needs_expanding, Py_UCS4 *conversion)
+ STRINGLIB_CHAR *conversion)
{
/* Note this function works if the field name is zero length,
which is good. Zero length field names are handled later, in
field_name_split. */
- Py_UCS4 c = 0;
+ STRINGLIB_CHAR c = 0;
/* initialize these, as they may be empty */
*conversion = '\0';
- SubString_init(format_spec, NULL, 0, 0);
+ SubString_init(format_spec, NULL, 0);
/* Search for the field name. it's terminated by the end of
the string, or a ':' or '!' */
- field_name->str = str->str;
- field_name->start = str->start;
- while (str->start < str->end) {
- switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
- case '{':
- PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name");
- return 0;
- case '[':
- for (; str->start < str->end; str->start++)
- if (PyUnicode_READ_CHAR(str->str, str->start) == ']')
- break;
- continue;
- case '}':
+ field_name->ptr = str->ptr;
+ while (str->ptr < str->end) {
+ switch (c = *(str->ptr++)) {
case ':':
case '!':
break;
@@ -586,62 +659,40 @@ parse_field(SubString *str, SubString *field_name, SubString *format_spec,
break;
}
- field_name->end = str->start - 1;
if (c == '!' || c == ':') {
- Py_ssize_t count;
/* we have a format specifier and/or a conversion */
/* don't include the last character */
+ field_name->end = str->ptr-1;
+
+ /* the format specifier is the rest of the string */
+ format_spec->ptr = str->ptr;
+ format_spec->end = str->end;
/* see if there's a conversion specifier */
if (c == '!') {
/* there must be another character present */
- if (str->start >= str->end) {
+ if (format_spec->ptr >= format_spec->end) {
PyErr_SetString(PyExc_ValueError,
- "end of string while looking for conversion "
+ "end of format while looking for conversion "
"specifier");
return 0;
}
- *conversion = PyUnicode_READ_CHAR(str->str, str->start++);
+ *conversion = *(format_spec->ptr++);
- if (str->start < str->end) {
- c = PyUnicode_READ_CHAR(str->str, str->start++);
- if (c == '}')
- return 1;
+ /* if there is another character, it must be a colon */
+ if (format_spec->ptr < format_spec->end) {
+ c = *(format_spec->ptr++);
if (c != ':') {
PyErr_SetString(PyExc_ValueError,
- "expected ':' after conversion specifier");
+ "expected ':' after format specifier");
return 0;
}
}
}
- format_spec->str = str->str;
- format_spec->start = str->start;
- count = 1;
- while (str->start < str->end) {
- switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
- case '{':
- *format_spec_needs_expanding = 1;
- count++;
- break;
- case '}':
- count--;
- if (count == 0) {
- format_spec->end = str->start - 1;
- return 1;
- }
- break;
- default:
- break;
- }
- }
-
- PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec");
- return 0;
- }
- else if (c != '}') {
- PyErr_SetString(PyExc_ValueError, "expected '}' before end of string");
- return 0;
}
+ else
+ /* end of string, there's no format_spec or conversion */
+ field_name->end = str->ptr;
return 1;
}
@@ -660,10 +711,9 @@ typedef struct {
} MarkupIterator;
static int
-MarkupIterator_init(MarkupIterator *self, PyObject *str,
- Py_ssize_t start, Py_ssize_t end)
+MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
{
- SubString_init(&self->str, str, start, end);
+ SubString_init(&self->str, ptr, len);
return 1;
}
@@ -672,29 +722,30 @@ MarkupIterator_init(MarkupIterator *self, PyObject *str,
static int
MarkupIterator_next(MarkupIterator *self, SubString *literal,
int *field_present, SubString *field_name,
- SubString *format_spec, Py_UCS4 *conversion,
+ SubString *format_spec, STRINGLIB_CHAR *conversion,
int *format_spec_needs_expanding)
{
int at_end;
- Py_UCS4 c = 0;
- Py_ssize_t start;
+ STRINGLIB_CHAR c = 0;
+ STRINGLIB_CHAR *start;
+ int count;
Py_ssize_t len;
int markup_follows = 0;
/* initialize all of the output variables */
- SubString_init(literal, NULL, 0, 0);
- SubString_init(field_name, NULL, 0, 0);
- SubString_init(format_spec, NULL, 0, 0);
+ SubString_init(literal, NULL, 0);
+ SubString_init(field_name, NULL, 0);
+ SubString_init(format_spec, NULL, 0);
*conversion = '\0';
*format_spec_needs_expanding = 0;
*field_present = 0;
/* No more input, end of iterator. This is the normal exit
path. */
- if (self->str.start >= self->str.end)
+ if (self->str.ptr >= self->str.end)
return 1;
- start = self->str.start;
+ start = self->str.ptr;
/* First read any literal text. Read until the end of string, an
escaped '{' or '}', or an unescaped '{'. In order to never
@@ -703,8 +754,8 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal,
including the brace, but no format object. The next time
through, we'll return the rest of the literal, skipping past
the second consecutive brace. */
- while (self->str.start < self->str.end) {
- switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
+ while (self->str.ptr < self->str.end) {
+ switch (c = *(self->str.ptr++)) {
case '{':
case '}':
markup_follows = 1;
@@ -715,12 +766,10 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal,
break;
}
- at_end = self->str.start >= self->str.end;
- len = self->str.start - start;
+ at_end = self->str.ptr >= self->str.end;
+ len = self->str.ptr - start;
- if ((c == '}') && (at_end ||
- (c != PyUnicode_READ_CHAR(self->str.str,
- self->str.start)))) {
+ if ((c == '}') && (at_end || (c != *self->str.ptr))) {
PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
"in format string");
return 0;
@@ -731,10 +780,10 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal,
return 0;
}
if (!at_end) {
- if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) {
+ if (c == *self->str.ptr) {
/* escaped } or {, skip it in the input. there is no
markup object following us, just this literal text */
- self->str.start++;
+ self->str.ptr++;
markup_follows = 0;
}
else
@@ -742,25 +791,56 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal,
}
/* record the literal text */
- literal->str = self->str.str;
- literal->start = start;
+ literal->ptr = start;
literal->end = start + len;
if (!markup_follows)
return 2;
- /* this is markup; parse the field */
+ /* this is markup, find the end of the string by counting nested
+ braces. note that this prohibits escaped braces, so that
+ format_specs cannot have braces in them. */
*field_present = 1;
- if (!parse_field(&self->str, field_name, format_spec,
- format_spec_needs_expanding, conversion))
- return 0;
- return 2;
+ count = 1;
+
+ start = self->str.ptr;
+
+ /* we know we can't have a zero length string, so don't worry
+ about that case */
+ while (self->str.ptr < self->str.end) {
+ switch (c = *(self->str.ptr++)) {
+ case '{':
+ /* the format spec needs to be recursively expanded.
+ this is an optimization, and not strictly needed */
+ *format_spec_needs_expanding = 1;
+ count++;
+ break;
+ case '}':
+ count--;
+ if (count <= 0) {
+ /* we're done. parse and get out */
+ SubString s;
+
+ SubString_init(&s, start, self->str.ptr - 1 - start);
+ if (parse_field(&s, field_name, format_spec, conversion) == 0)
+ return 0;
+
+ /* success */
+ return 2;
+ }
+ break;
+ }
+ }
+
+ /* end of string while searching for matching '}' */
+ PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
+ return 0;
}
/* do the !r or !s conversion on obj */
static PyObject *
-do_conversion(PyObject *obj, Py_UCS4 conversion)
+do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
{
/* XXX in pre-3.0, do we need to convert this to unicode, since it
might have returned a string? */
@@ -768,9 +848,7 @@ do_conversion(PyObject *obj, Py_UCS4 conversion)
case 'r':
return PyObject_Repr(obj);
case 's':
- return PyObject_Str(obj);
- case 'a':
- return PyObject_ASCII(obj);
+ return STRINGLIB_TOSTR(obj);
default:
if (conversion > 32 && conversion < 127) {
/* It's the ASCII subrange; casting to char is safe
@@ -802,8 +880,8 @@ do_conversion(PyObject *obj, Py_UCS4 conversion)
static int
output_markup(SubString *field_name, SubString *format_spec,
- int format_spec_needs_expanding, Py_UCS4 conversion,
- _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs,
+ int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
+ OutputString *output, PyObject *args, PyObject *kwargs,
int recursion_depth, AutoNumber *auto_number)
{
PyObject *tmp = NULL;
@@ -819,7 +897,7 @@ output_markup(SubString *field_name, SubString *format_spec,
if (conversion != '\0') {
tmp = do_conversion(fieldobj, conversion);
- if (tmp == NULL || PyUnicode_READY(tmp) == -1)
+ if (tmp == NULL)
goto done;
/* do the assignment, transferring ownership: fieldobj = tmp */
@@ -828,23 +906,24 @@ output_markup(SubString *field_name, SubString *format_spec,
tmp = NULL;
}
- /* if needed, recursively compute the format_spec */
+ /* if needed, recurively compute the format_spec */
if (format_spec_needs_expanding) {
tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
auto_number);
- if (tmp == NULL || PyUnicode_READY(tmp) == -1)
+ if (tmp == NULL)
goto done;
/* note that in the case we're expanding the format string,
tmp must be kept around until after the call to
render_field. */
- SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp));
+ SubString_init(&expanded_format_spec,
+ STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
actual_format_spec = &expanded_format_spec;
}
else
actual_format_spec = format_spec;
- if (render_field(fieldobj, actual_format_spec, writer) == 0)
+ if (render_field(fieldobj, actual_format_spec, output) == 0)
goto done;
result = 1;
@@ -864,7 +943,7 @@ done:
*/
static int
do_markup(SubString *input, PyObject *args, PyObject *kwargs,
- _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number)
+ OutputString *output, int recursion_depth, AutoNumber *auto_number)
{
MarkupIterator iter;
int format_spec_needs_expanding;
@@ -873,29 +952,20 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs,
SubString literal;
SubString field_name;
SubString format_spec;
- Py_UCS4 conversion;
+ STRINGLIB_CHAR conversion;
- MarkupIterator_init(&iter, input->str, input->start, input->end);
+ MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
while ((result = MarkupIterator_next(&iter, &literal, &field_present,
&field_name, &format_spec,
&conversion,
&format_spec_needs_expanding)) == 2) {
- if (literal.end != literal.start) {
- if (!field_present && iter.str.start == iter.str.end)
- writer->overallocate = 0;
- if (_PyUnicodeWriter_WriteSubstring(writer, literal.str,
- literal.start, literal.end) < 0)
- return 0;
- }
-
- if (field_present) {
- if (iter.str.start == iter.str.end)
- writer->overallocate = 0;
+ if (!output_data(output, literal.ptr, literal.end - literal.ptr))
+ return 0;
+ if (field_present)
if (!output_markup(&field_name, &format_spec,
- format_spec_needs_expanding, conversion, writer,
+ format_spec_needs_expanding, conversion, output,
args, kwargs, recursion_depth, auto_number))
return 0;
- }
}
return result;
}
@@ -909,26 +979,43 @@ static PyObject *
build_string(SubString *input, PyObject *args, PyObject *kwargs,
int recursion_depth, AutoNumber *auto_number)
{
- _PyUnicodeWriter writer;
+ OutputString output;
+ PyObject *result = NULL;
+ Py_ssize_t count;
+
+ output.obj = NULL; /* needed so cleanup code always works */
/* check the recursion level */
if (recursion_depth <= 0) {
PyErr_SetString(PyExc_ValueError,
"Max string recursion exceeded");
- return NULL;
+ goto done;
}
- _PyUnicodeWriter_Init(&writer);
- writer.overallocate = 1;
- writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100;
+ /* initial size is the length of the format string, plus the size
+ increment. seems like a reasonable default */
+ if (!output_initialize(&output,
+ input->end - input->ptr +
+ INITIAL_SIZE_INCREMENT))
+ goto done;
- if (!do_markup(input, args, kwargs, &writer, recursion_depth,
+ if (!do_markup(input, args, kwargs, &output, recursion_depth,
auto_number)) {
- _PyUnicodeWriter_Dealloc(&writer);
- return NULL;
+ goto done;
+ }
+
+ count = output.ptr - STRINGLIB_STR(output.obj);
+ if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
+ goto done;
}
- return _PyUnicodeWriter_Finish(&writer);
+ /* transfer ownership to result */
+ result = output.obj;
+ output.obj = NULL;
+
+done:
+ Py_XDECREF(output.obj);
+ return result;
}
/************************************************************************/
@@ -949,19 +1036,11 @@ do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
AutoNumber auto_number;
- if (PyUnicode_READY(self) == -1)
- return NULL;
-
AutoNumber_Init(&auto_number);
- SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self));
+ SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
return build_string(&input, args, kwargs, recursion_depth, &auto_number);
}
-static PyObject *
-do_string_format_map(PyObject *self, PyObject *obj)
-{
- return do_string_format(self, NULL, obj);
-}
/************************************************************************/
@@ -975,7 +1054,9 @@ do_string_format_map(PyObject *self, PyObject *obj)
typedef struct {
PyObject_HEAD
- PyObject *str;
+
+ STRINGLIB_OBJECT *str;
+
MarkupIterator it_markup;
} formatteriterobject;
@@ -1000,7 +1081,7 @@ formatteriter_next(formatteriterobject *it)
SubString literal;
SubString field_name;
SubString format_spec;
- Py_UCS4 conversion;
+ STRINGLIB_CHAR conversion;
int format_spec_needs_expanding;
int field_present;
int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
@@ -1044,8 +1125,7 @@ formatteriter_next(formatteriterobject *it)
Py_INCREF(conversion_str);
}
else
- conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
- &conversion, 1);
+ conversion_str = STRINGLIB_NEW(&conversion, 1);
if (conversion_str == NULL)
goto done;
@@ -1071,10 +1151,10 @@ static PyTypeObject PyFormatterIter_Type = {
0, /* tp_itemsize */
/* methods */
(destructor)formatteriter_dealloc, /* tp_dealloc */
- 0, /* tp_vectorcall_offset */
+ 0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
- 0, /* tp_as_async */
+ 0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
@@ -1102,18 +1182,10 @@ static PyTypeObject PyFormatterIter_Type = {
describing the parsed elements. It's a wrapper around
stringlib/string_format.h's MarkupIterator */
static PyObject *
-formatter_parser(PyObject *ignored, PyObject *self)
+formatter_parser(STRINGLIB_OBJECT *self)
{
formatteriterobject *it;
- if (!PyUnicode_Check(self)) {
- PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
- return NULL;
- }
-
- if (PyUnicode_READY(self) == -1)
- return NULL;
-
it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
if (it == NULL)
return NULL;
@@ -1123,7 +1195,10 @@ formatter_parser(PyObject *ignored, PyObject *self)
it->str = self;
/* initialize the contained MarkupIterator */
- MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self));
+ MarkupIterator_init(&it->it_markup,
+ STRINGLIB_STR(self),
+ STRINGLIB_LEN(self));
+
return (PyObject *)it;
}
@@ -1139,7 +1214,9 @@ formatter_parser(PyObject *ignored, PyObject *self)
typedef struct {
PyObject_HEAD
- PyObject *str;
+
+ STRINGLIB_OBJECT *str;
+
FieldNameIterator it_field;
} fieldnameiterobject;
@@ -1207,10 +1284,10 @@ static PyTypeObject PyFieldNameIter_Type = {
0, /* tp_itemsize */
/* methods */
(destructor)fieldnameiter_dealloc, /* tp_dealloc */
- 0, /* tp_vectorcall_offset */
+ 0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
- 0, /* tp_as_async */
+ 0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
@@ -1240,7 +1317,7 @@ static PyTypeObject PyFieldNameIter_Type = {
field_name_split. The iterator it returns is a
FieldNameIterator */
static PyObject *
-formatter_field_name_split(PyObject *ignored, PyObject *self)
+formatter_field_name_split(STRINGLIB_OBJECT *self)
{
SubString first;
Py_ssize_t first_idx;
@@ -1249,14 +1326,6 @@ formatter_field_name_split(PyObject *ignored, PyObject *self)
PyObject *first_obj = NULL;
PyObject *result = NULL;
- if (!PyUnicode_Check(self)) {
- PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
- return NULL;
- }
-
- if (PyUnicode_READY(self) == -1)
- return NULL;
-
it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
if (it == NULL)
return NULL;
@@ -1268,7 +1337,8 @@ formatter_field_name_split(PyObject *ignored, PyObject *self)
/* Pass in auto_number = NULL. We'll return an empty string for
first_obj in that case. */
- if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self),
+ if (!field_name_split(STRINGLIB_STR(self),
+ STRINGLIB_LEN(self),
&first, &first_idx, &it->it_field, NULL))
goto done;
diff --git a/Objects/stringlib/stringdefs.h b/Objects/stringlib/stringdefs.h
index ce27f3e..84e4616 100644
--- a/Objects/stringlib/stringdefs.h
+++ b/Objects/stringlib/stringdefs.h
@@ -6,10 +6,7 @@
compiled as unicode. */
#define STRINGLIB_IS_UNICODE 0
-#define FASTSEARCH fastsearch
-#define STRINGLIB(F) stringlib_##F
-#define STRINGLIB_OBJECT PyBytesObject
-#define STRINGLIB_SIZEOF_CHAR 1
+#define STRINGLIB_OBJECT PyStringObject
#define STRINGLIB_CHAR char
#define STRINGLIB_TYPE_NAME "string"
#define STRINGLIB_PARSE_CODE "S"
@@ -18,11 +15,19 @@
#define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r'))
#define STRINGLIB_ISDECIMAL(x) ((x >= '0') && (x <= '9'))
#define STRINGLIB_TODECIMAL(x) (STRINGLIB_ISDECIMAL(x) ? (x - '0') : -1)
-#define STRINGLIB_STR PyBytes_AS_STRING
-#define STRINGLIB_LEN PyBytes_GET_SIZE
-#define STRINGLIB_NEW PyBytes_FromStringAndSize
-#define STRINGLIB_CHECK PyBytes_Check
-#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
+#define STRINGLIB_TOUPPER Py_TOUPPER
+#define STRINGLIB_TOLOWER Py_TOLOWER
+#define STRINGLIB_FILL memset
+#define STRINGLIB_STR PyString_AS_STRING
+#define STRINGLIB_LEN PyString_GET_SIZE
+#define STRINGLIB_NEW PyString_FromStringAndSize
+#define STRINGLIB_RESIZE _PyString_Resize
+#define STRINGLIB_CHECK PyString_Check
+#define STRINGLIB_CHECK_EXACT PyString_CheckExact
#define STRINGLIB_TOSTR PyObject_Str
-#define STRINGLIB_TOASCII PyObject_Repr
+#define STRINGLIB_GROUPING _PyString_InsertThousandsGrouping
+#define STRINGLIB_GROUPING_LOCALE _PyString_InsertThousandsGroupingLocale
+
+#define STRINGLIB_WANT_CONTAINS_OBJ 1
+
#endif /* !STRINGLIB_STRINGDEFS_H */
diff --git a/Objects/stringlib/transmogrify.h b/Objects/stringlib/transmogrify.h
index e1165ea..be595a6 100644
--- a/Objects/stringlib/transmogrify.h
+++ b/Objects/stringlib/transmogrify.h
@@ -1,48 +1,27 @@
-#if STRINGLIB_IS_UNICODE
-# error "transmogrify.h only compatible with byte-wise strings"
-#endif
+/* NOTE: this API is -ONLY- for use with single byte character strings. */
+/* Do not use it with Unicode. */
/* the more complicated methods. parts of these should be pulled out into the
shared code in bytes_methods.c to cut down on duplicate code bloat. */
-/*[clinic input]
-class B "PyObject *" "&PyType_Type"
-[clinic start generated code]*/
-/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2935558188d97c76]*/
-
-#include "clinic/transmogrify.h.h"
-
-static inline PyObject *
-return_self(PyObject *self)
-{
-#if !STRINGLIB_MUTABLE
- if (STRINGLIB_CHECK_EXACT(self)) {
- Py_INCREF(self);
- return self;
- }
-#endif
- return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
-}
-
-/*[clinic input]
-B.expandtabs as stringlib_expandtabs
-
- tabsize: int = 8
-
-Return a copy where all tab characters are expanded using spaces.
+PyDoc_STRVAR(expandtabs__doc__,
+"B.expandtabs([tabsize]) -> copy of B\n\
+\n\
+Return a copy of B where all tab characters are expanded using spaces.\n\
+If tabsize is not given, a tab size of 8 characters is assumed.");
-If tabsize is not given, a tab size of 8 characters is assumed.
-[clinic start generated code]*/
-
-static PyObject *
-stringlib_expandtabs_impl(PyObject *self, int tabsize)
-/*[clinic end generated code: output=069cb7fae72e4c2b input=3c6d3b12aa3ccbea]*/
+static PyObject*
+stringlib_expandtabs(PyObject *self, PyObject *args)
{
const char *e, *p;
char *q;
Py_ssize_t i, j;
PyObject *u;
-
+ int tabsize = 8;
+
+ if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
+ return NULL;
+
/* First pass: determine size of output string */
i = j = 0;
e = STRINGLIB_STR(self) + STRINGLIB_LEN(self);
@@ -67,18 +46,18 @@ stringlib_expandtabs_impl(PyObject *self, int tabsize)
}
}
}
-
+
if (i > PY_SSIZE_T_MAX - j)
goto overflow;
-
+
/* Second pass: create output string and fill it */
u = STRINGLIB_NEW(NULL, i + j);
if (!u)
return NULL;
-
+
j = 0;
q = STRINGLIB_STR(u);
-
+
for (p = STRINGLIB_STR(self); p < e; p++) {
if (*p == '\t') {
if (tabsize > 0) {
@@ -102,7 +81,7 @@ stringlib_expandtabs_impl(PyObject *self, int tabsize)
return NULL;
}
-static inline PyObject *
+Py_LOCAL_INLINE(PyObject *)
pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
{
PyObject *u;
@@ -112,93 +91,118 @@ pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
if (right < 0)
right = 0;
- if (left == 0 && right == 0) {
- return return_self(self);
+ if (left == 0 && right == 0 && STRINGLIB_CHECK_EXACT(self)) {
+#if STRINGLIB_MUTABLE
+ /* We're defined as returning a copy; If the object is mutable
+ * that means we must make an identical copy. */
+ return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+#else
+ Py_INCREF(self);
+ return (PyObject *)self;
+#endif /* STRINGLIB_MUTABLE */
}
- u = STRINGLIB_NEW(NULL, left + STRINGLIB_LEN(self) + right);
+ u = STRINGLIB_NEW(NULL,
+ left + STRINGLIB_LEN(self) + right);
if (u) {
if (left)
memset(STRINGLIB_STR(u), fill, left);
- memcpy(STRINGLIB_STR(u) + left,
- STRINGLIB_STR(self),
- STRINGLIB_LEN(self));
+ Py_MEMCPY(STRINGLIB_STR(u) + left,
+ STRINGLIB_STR(self),
+ STRINGLIB_LEN(self));
if (right)
memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self),
- fill, right);
+ fill, right);
}
return u;
}
-/*[clinic input]
-B.ljust as stringlib_ljust
-
- width: Py_ssize_t
- fillchar: char = b' '
- /
-
-Return a left-justified string of length width.
-
-Padding is done using the specified fill character.
-[clinic start generated code]*/
+PyDoc_STRVAR(ljust__doc__,
+"B.ljust(width[, fillchar]) -> copy of B\n"
+"\n"
+"Return B left justified in a string of length width. Padding is\n"
+"done using the specified fill character (default is a space).");
static PyObject *
-stringlib_ljust_impl(PyObject *self, Py_ssize_t width, char fillchar)
-/*[clinic end generated code: output=c79ca173c5ff8337 input=eff2d014bc7d80df]*/
+stringlib_ljust(PyObject *self, PyObject *args)
{
- if (STRINGLIB_LEN(self) >= width) {
- return return_self(self);
+ Py_ssize_t width;
+ char fillchar = ' ';
+
+ if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
+ return NULL;
+
+ if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) {
+#if STRINGLIB_MUTABLE
+ /* We're defined as returning a copy; If the object is mutable
+ * that means we must make an identical copy. */
+ return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+#else
+ Py_INCREF(self);
+ return (PyObject*) self;
+#endif
}
return pad(self, 0, width - STRINGLIB_LEN(self), fillchar);
}
-/*[clinic input]
-B.rjust as stringlib_rjust
-
- width: Py_ssize_t
- fillchar: char = b' '
- /
-
-Return a right-justified string of length width.
-
-Padding is done using the specified fill character.
-[clinic start generated code]*/
+PyDoc_STRVAR(rjust__doc__,
+"B.rjust(width[, fillchar]) -> copy of B\n"
+"\n"
+"Return B right justified in a string of length width. Padding is\n"
+"done using the specified fill character (default is a space)");
static PyObject *
-stringlib_rjust_impl(PyObject *self, Py_ssize_t width, char fillchar)
-/*[clinic end generated code: output=7df5d728a5439570 input=218b0bd31308955d]*/
+stringlib_rjust(PyObject *self, PyObject *args)
{
- if (STRINGLIB_LEN(self) >= width) {
- return return_self(self);
+ Py_ssize_t width;
+ char fillchar = ' ';
+
+ if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
+ return NULL;
+
+ if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) {
+#if STRINGLIB_MUTABLE
+ /* We're defined as returning a copy; If the object is mutable
+ * that means we must make an identical copy. */
+ return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+#else
+ Py_INCREF(self);
+ return (PyObject*) self;
+#endif
}
return pad(self, width - STRINGLIB_LEN(self), 0, fillchar);
}
-/*[clinic input]
-B.center as stringlib_center
-
- width: Py_ssize_t
- fillchar: char = b' '
- /
-
-Return a centered string of length width.
-
-Padding is done using the specified fill character.
-[clinic start generated code]*/
+PyDoc_STRVAR(center__doc__,
+"B.center(width[, fillchar]) -> copy of B\n"
+"\n"
+"Return B centered in a string of length width. Padding is\n"
+"done using the specified fill character (default is a space).");
static PyObject *
-stringlib_center_impl(PyObject *self, Py_ssize_t width, char fillchar)
-/*[clinic end generated code: output=d8da2e055288b4c2 input=3776fd278765d89b]*/
+stringlib_center(PyObject *self, PyObject *args)
{
Py_ssize_t marg, left;
+ Py_ssize_t width;
+ char fillchar = ' ';
- if (STRINGLIB_LEN(self) >= width) {
- return return_self(self);
+ if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
+ return NULL;
+
+ if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) {
+#if STRINGLIB_MUTABLE
+ /* We're defined as returning a copy; If the object is mutable
+ * that means we must make an identical copy. */
+ return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+#else
+ Py_INCREF(self);
+ return (PyObject*) self;
+#endif
}
marg = width - STRINGLIB_LEN(self);
@@ -207,27 +211,39 @@ stringlib_center_impl(PyObject *self, Py_ssize_t width, char fillchar)
return pad(self, left, marg - left, fillchar);
}
-/*[clinic input]
-B.zfill as stringlib_zfill
-
- width: Py_ssize_t
- /
-
-Pad a numeric string with zeros on the left, to fill a field of the given width.
-
-The original string is never truncated.
-[clinic start generated code]*/
+PyDoc_STRVAR(zfill__doc__,
+"B.zfill(width) -> copy of B\n"
+"\n"
+"Pad a numeric string B with zeros on the left, to fill a field\n"
+"of the specified width. B is never truncated.");
static PyObject *
-stringlib_zfill_impl(PyObject *self, Py_ssize_t width)
-/*[clinic end generated code: output=0b3c684a7f1b2319 input=2da6d7b8e9bcb19a]*/
+stringlib_zfill(PyObject *self, PyObject *args)
{
Py_ssize_t fill;
PyObject *s;
char *p;
+ Py_ssize_t width;
+
+ if (!PyArg_ParseTuple(args, "n:zfill", &width))
+ return NULL;
if (STRINGLIB_LEN(self) >= width) {
- return return_self(self);
+ if (STRINGLIB_CHECK_EXACT(self)) {
+#if STRINGLIB_MUTABLE
+ /* We're defined as returning a copy; If the object is mutable
+ * that means we must make an identical copy. */
+ return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+#else
+ Py_INCREF(self);
+ return (PyObject*) self;
+#endif
+ }
+ else
+ return STRINGLIB_NEW(
+ STRINGLIB_STR(self),
+ STRINGLIB_LEN(self)
+ );
}
fill = width - STRINGLIB_LEN(self);
@@ -244,497 +260,5 @@ stringlib_zfill_impl(PyObject *self, Py_ssize_t width)
p[fill] = '0';
}
- return s;
-}
-
-
-/* find and count characters and substrings */
-
-#define findchar(target, target_len, c) \
- ((char *)memchr((const void *)(target), c, target_len))
-
-
-static Py_ssize_t
-countchar(const char *target, Py_ssize_t target_len, char c,
- Py_ssize_t maxcount)
-{
- Py_ssize_t count = 0;
- const char *start = target;
- const char *end = target + target_len;
-
- while ((start = findchar(start, end - start, c)) != NULL) {
- count++;
- if (count >= maxcount)
- break;
- start += 1;
- }
- return count;
-}
-
-
-/* Algorithms for different cases of string replacement */
-
-/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
-static PyObject *
-stringlib_replace_interleave(PyObject *self,
- const char *to_s, Py_ssize_t to_len,
- Py_ssize_t maxcount)
-{
- const char *self_s;
- char *result_s;
- Py_ssize_t self_len, result_len;
- Py_ssize_t count, i;
- PyObject *result;
-
- self_len = STRINGLIB_LEN(self);
-
- /* 1 at the end plus 1 after every character;
- count = min(maxcount, self_len + 1) */
- if (maxcount <= self_len) {
- count = maxcount;
- }
- else {
- /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
- count = self_len + 1;
- }
-
- /* Check for overflow */
- /* result_len = count * to_len + self_len; */
- assert(count > 0);
- if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
- PyErr_SetString(PyExc_OverflowError,
- "replace bytes is too long");
- return NULL;
- }
- result_len = count * to_len + self_len;
- result = STRINGLIB_NEW(NULL, result_len);
- if (result == NULL) {
- return NULL;
- }
-
- self_s = STRINGLIB_STR(self);
- result_s = STRINGLIB_STR(result);
-
- if (to_len > 1) {
- /* Lay the first one down (guaranteed this will occur) */
- memcpy(result_s, to_s, to_len);
- result_s += to_len;
- count -= 1;
-
- for (i = 0; i < count; i++) {
- *result_s++ = *self_s++;
- memcpy(result_s, to_s, to_len);
- result_s += to_len;
- }
- }
- else {
- result_s[0] = to_s[0];
- result_s += to_len;
- count -= 1;
- for (i = 0; i < count; i++) {
- *result_s++ = *self_s++;
- result_s[0] = to_s[0];
- result_s += to_len;
- }
- }
-
- /* Copy the rest of the original string */
- memcpy(result_s, self_s, self_len - i);
-
- return result;
-}
-
-/* Special case for deleting a single character */
-/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
-static PyObject *
-stringlib_replace_delete_single_character(PyObject *self,
- char from_c, Py_ssize_t maxcount)
-{
- const char *self_s, *start, *next, *end;
- char *result_s;
- Py_ssize_t self_len, result_len;
- Py_ssize_t count;
- PyObject *result;
-
- self_len = STRINGLIB_LEN(self);
- self_s = STRINGLIB_STR(self);
-
- count = countchar(self_s, self_len, from_c, maxcount);
- if (count == 0) {
- return return_self(self);
- }
-
- result_len = self_len - count; /* from_len == 1 */
- assert(result_len>=0);
-
- result = STRINGLIB_NEW(NULL, result_len);
- if (result == NULL) {
- return NULL;
- }
- result_s = STRINGLIB_STR(result);
-
- start = self_s;
- end = self_s + self_len;
- while (count-- > 0) {
- next = findchar(start, end - start, from_c);
- if (next == NULL)
- break;
- memcpy(result_s, start, next - start);
- result_s += (next - start);
- start = next + 1;
- }
- memcpy(result_s, start, end - start);
-
- return result;
-}
-
-/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
-
-static PyObject *
-stringlib_replace_delete_substring(PyObject *self,
- const char *from_s, Py_ssize_t from_len,
- Py_ssize_t maxcount)
-{
- const char *self_s, *start, *next, *end;
- char *result_s;
- Py_ssize_t self_len, result_len;
- Py_ssize_t count, offset;
- PyObject *result;
-
- self_len = STRINGLIB_LEN(self);
- self_s = STRINGLIB_STR(self);
-
- count = stringlib_count(self_s, self_len,
- from_s, from_len,
- maxcount);
-
- if (count == 0) {
- /* no matches */
- return return_self(self);
- }
-
- result_len = self_len - (count * from_len);
- assert (result_len>=0);
-
- result = STRINGLIB_NEW(NULL, result_len);
- if (result == NULL) {
- return NULL;
- }
- result_s = STRINGLIB_STR(result);
-
- start = self_s;
- end = self_s + self_len;
- while (count-- > 0) {
- offset = stringlib_find(start, end - start,
- from_s, from_len,
- 0);
- if (offset == -1)
- break;
- next = start + offset;
-
- memcpy(result_s, start, next - start);
-
- result_s += (next - start);
- start = next + from_len;
- }
- memcpy(result_s, start, end - start);
- return result;
-}
-
-/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
-static PyObject *
-stringlib_replace_single_character_in_place(PyObject *self,
- char from_c, char to_c,
- Py_ssize_t maxcount)
-{
- const char *self_s, *end;
- char *result_s, *start, *next;
- Py_ssize_t self_len;
- PyObject *result;
-
- /* The result string will be the same size */
- self_s = STRINGLIB_STR(self);
- self_len = STRINGLIB_LEN(self);
-
- next = findchar(self_s, self_len, from_c);
-
- if (next == NULL) {
- /* No matches; return the original bytes */
- return return_self(self);
- }
-
- /* Need to make a new bytes */
- result = STRINGLIB_NEW(NULL, self_len);
- if (result == NULL) {
- return NULL;
- }
- result_s = STRINGLIB_STR(result);
- memcpy(result_s, self_s, self_len);
-
- /* change everything in-place, starting with this one */
- start = result_s + (next - self_s);
- *start = to_c;
- start++;
- end = result_s + self_len;
-
- while (--maxcount > 0) {
- next = findchar(start, end - start, from_c);
- if (next == NULL)
- break;
- *next = to_c;
- start = next + 1;
- }
-
- return result;
-}
-
-/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
-static PyObject *
-stringlib_replace_substring_in_place(PyObject *self,
- const char *from_s, Py_ssize_t from_len,
- const char *to_s, Py_ssize_t to_len,
- Py_ssize_t maxcount)
-{
- const char *self_s, *end;
- char *result_s, *start;
- Py_ssize_t self_len, offset;
- PyObject *result;
-
- /* The result bytes will be the same size */
-
- self_s = STRINGLIB_STR(self);
- self_len = STRINGLIB_LEN(self);
-
- offset = stringlib_find(self_s, self_len,
- from_s, from_len,
- 0);
- if (offset == -1) {
- /* No matches; return the original bytes */
- return return_self(self);
- }
-
- /* Need to make a new bytes */
- result = STRINGLIB_NEW(NULL, self_len);
- if (result == NULL) {
- return NULL;
- }
- result_s = STRINGLIB_STR(result);
- memcpy(result_s, self_s, self_len);
-
- /* change everything in-place, starting with this one */
- start = result_s + offset;
- memcpy(start, to_s, from_len);
- start += from_len;
- end = result_s + self_len;
-
- while ( --maxcount > 0) {
- offset = stringlib_find(start, end - start,
- from_s, from_len,
- 0);
- if (offset == -1)
- break;
- memcpy(start + offset, to_s, from_len);
- start += offset + from_len;
- }
-
- return result;
-}
-
-/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
-static PyObject *
-stringlib_replace_single_character(PyObject *self,
- char from_c,
- const char *to_s, Py_ssize_t to_len,
- Py_ssize_t maxcount)
-{
- const char *self_s, *start, *next, *end;
- char *result_s;
- Py_ssize_t self_len, result_len;
- Py_ssize_t count;
- PyObject *result;
-
- self_s = STRINGLIB_STR(self);
- self_len = STRINGLIB_LEN(self);
-
- count = countchar(self_s, self_len, from_c, maxcount);
- if (count == 0) {
- /* no matches, return unchanged */
- return return_self(self);
- }
-
- /* use the difference between current and new, hence the "-1" */
- /* result_len = self_len + count * (to_len-1) */
- assert(count > 0);
- if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
- PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
- return NULL;
- }
- result_len = self_len + count * (to_len - 1);
-
- result = STRINGLIB_NEW(NULL, result_len);
- if (result == NULL) {
- return NULL;
- }
- result_s = STRINGLIB_STR(result);
-
- start = self_s;
- end = self_s + self_len;
- while (count-- > 0) {
- next = findchar(start, end - start, from_c);
- if (next == NULL)
- break;
-
- if (next == start) {
- /* replace with the 'to' */
- memcpy(result_s, to_s, to_len);
- result_s += to_len;
- start += 1;
- } else {
- /* copy the unchanged old then the 'to' */
- memcpy(result_s, start, next - start);
- result_s += (next - start);
- memcpy(result_s, to_s, to_len);
- result_s += to_len;
- start = next + 1;
- }
- }
- /* Copy the remainder of the remaining bytes */
- memcpy(result_s, start, end - start);
-
- return result;
-}
-
-/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
-static PyObject *
-stringlib_replace_substring(PyObject *self,
- const char *from_s, Py_ssize_t from_len,
- const char *to_s, Py_ssize_t to_len,
- Py_ssize_t maxcount)
-{
- const char *self_s, *start, *next, *end;
- char *result_s;
- Py_ssize_t self_len, result_len;
- Py_ssize_t count, offset;
- PyObject *result;
-
- self_s = STRINGLIB_STR(self);
- self_len = STRINGLIB_LEN(self);
-
- count = stringlib_count(self_s, self_len,
- from_s, from_len,
- maxcount);
-
- if (count == 0) {
- /* no matches, return unchanged */
- return return_self(self);
- }
-
- /* Check for overflow */
- /* result_len = self_len + count * (to_len-from_len) */
- assert(count > 0);
- if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
- PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
- return NULL;
- }
- result_len = self_len + count * (to_len - from_len);
-
- result = STRINGLIB_NEW(NULL, result_len);
- if (result == NULL) {
- return NULL;
- }
- result_s = STRINGLIB_STR(result);
-
- start = self_s;
- end = self_s + self_len;
- while (count-- > 0) {
- offset = stringlib_find(start, end - start,
- from_s, from_len,
- 0);
- if (offset == -1)
- break;
- next = start + offset;
- if (next == start) {
- /* replace with the 'to' */
- memcpy(result_s, to_s, to_len);
- result_s += to_len;
- start += from_len;
- } else {
- /* copy the unchanged old then the 'to' */
- memcpy(result_s, start, next - start);
- result_s += (next - start);
- memcpy(result_s, to_s, to_len);
- result_s += to_len;
- start = next + from_len;
- }
- }
- /* Copy the remainder of the remaining bytes */
- memcpy(result_s, start, end - start);
-
- return result;
-}
-
-
-static PyObject *
-stringlib_replace(PyObject *self,
- const char *from_s, Py_ssize_t from_len,
- const char *to_s, Py_ssize_t to_len,
- Py_ssize_t maxcount)
-{
- if (STRINGLIB_LEN(self) < from_len) {
- /* nothing to do; return the original bytes */
- return return_self(self);
- }
- if (maxcount < 0) {
- maxcount = PY_SSIZE_T_MAX;
- } else if (maxcount == 0) {
- /* nothing to do; return the original bytes */
- return return_self(self);
- }
-
- /* Handle zero-length special cases */
- if (from_len == 0) {
- if (to_len == 0) {
- /* nothing to do; return the original bytes */
- return return_self(self);
- }
- /* insert the 'to' bytes everywhere. */
- /* >>> b"Python".replace(b"", b".") */
- /* b'.P.y.t.h.o.n.' */
- return stringlib_replace_interleave(self, to_s, to_len, maxcount);
- }
-
- if (to_len == 0) {
- /* delete all occurrences of 'from' bytes */
- if (from_len == 1) {
- return stringlib_replace_delete_single_character(
- self, from_s[0], maxcount);
- } else {
- return stringlib_replace_delete_substring(
- self, from_s, from_len, maxcount);
- }
- }
-
- /* Handle special case where both bytes have the same length */
-
- if (from_len == to_len) {
- if (from_len == 1) {
- return stringlib_replace_single_character_in_place(
- self, from_s[0], to_s[0], maxcount);
- } else {
- return stringlib_replace_substring_in_place(
- self, from_s, from_len, to_s, to_len, maxcount);
- }
- }
-
- /* Otherwise use the more generic algorithms */
- if (from_len == 1) {
- return stringlib_replace_single_character(
- self, from_s[0], to_s, to_len, maxcount);
- } else {
- /* len('from')>=2, len('to')>=1 */
- return stringlib_replace_substring(
- self, from_s, from_len, to_s, to_len, maxcount);
- }
+ return (PyObject*) s;
}
-
-#undef findchar
diff --git a/Objects/stringlib/ucs1lib.h b/Objects/stringlib/ucs1lib.h
deleted file mode 100644
index ce1eb57..0000000
--- a/Objects/stringlib/ucs1lib.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/* this is sort of a hack. there's at least one place (formatting
- floats) where some stringlib code takes a different path if it's
- compiled as unicode. */
-#define STRINGLIB_IS_UNICODE 1
-
-#define FASTSEARCH ucs1lib_fastsearch
-#define STRINGLIB(F) ucs1lib_##F
-#define STRINGLIB_OBJECT PyUnicodeObject
-#define STRINGLIB_SIZEOF_CHAR 1
-#define STRINGLIB_MAX_CHAR 0xFFu
-#define STRINGLIB_CHAR Py_UCS1
-#define STRINGLIB_TYPE_NAME "unicode"
-#define STRINGLIB_PARSE_CODE "U"
-#define STRINGLIB_EMPTY unicode_empty
-#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
-#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
-#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL
-#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL
-#define STRINGLIB_STR PyUnicode_1BYTE_DATA
-#define STRINGLIB_LEN PyUnicode_GET_LENGTH
-#define STRINGLIB_NEW _PyUnicode_FromUCS1
-#define STRINGLIB_CHECK PyUnicode_Check
-#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
-
-#define STRINGLIB_TOSTR PyObject_Str
-#define STRINGLIB_TOASCII PyObject_ASCII
-
-#define _Py_InsertThousandsGrouping _PyUnicode_ucs1_InsertThousandsGrouping
-
-
diff --git a/Objects/stringlib/ucs2lib.h b/Objects/stringlib/ucs2lib.h
deleted file mode 100644
index f900cb6..0000000
--- a/Objects/stringlib/ucs2lib.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* this is sort of a hack. there's at least one place (formatting
- floats) where some stringlib code takes a different path if it's
- compiled as unicode. */
-#define STRINGLIB_IS_UNICODE 1
-
-#define FASTSEARCH ucs2lib_fastsearch
-#define STRINGLIB(F) ucs2lib_##F
-#define STRINGLIB_OBJECT PyUnicodeObject
-#define STRINGLIB_SIZEOF_CHAR 2
-#define STRINGLIB_MAX_CHAR 0xFFFFu
-#define STRINGLIB_CHAR Py_UCS2
-#define STRINGLIB_TYPE_NAME "unicode"
-#define STRINGLIB_PARSE_CODE "U"
-#define STRINGLIB_EMPTY unicode_empty
-#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
-#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
-#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL
-#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL
-#define STRINGLIB_STR PyUnicode_2BYTE_DATA
-#define STRINGLIB_LEN PyUnicode_GET_LENGTH
-#define STRINGLIB_NEW _PyUnicode_FromUCS2
-#define STRINGLIB_CHECK PyUnicode_Check
-#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
-
-#define STRINGLIB_TOSTR PyObject_Str
-#define STRINGLIB_TOASCII PyObject_ASCII
-
-#define _Py_InsertThousandsGrouping _PyUnicode_ucs2_InsertThousandsGrouping
-
diff --git a/Objects/stringlib/ucs4lib.h b/Objects/stringlib/ucs4lib.h
deleted file mode 100644
index 86a480f..0000000
--- a/Objects/stringlib/ucs4lib.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* this is sort of a hack. there's at least one place (formatting
- floats) where some stringlib code takes a different path if it's
- compiled as unicode. */
-#define STRINGLIB_IS_UNICODE 1
-
-#define FASTSEARCH ucs4lib_fastsearch
-#define STRINGLIB(F) ucs4lib_##F
-#define STRINGLIB_OBJECT PyUnicodeObject
-#define STRINGLIB_SIZEOF_CHAR 4
-#define STRINGLIB_MAX_CHAR 0x10FFFFu
-#define STRINGLIB_CHAR Py_UCS4
-#define STRINGLIB_TYPE_NAME "unicode"
-#define STRINGLIB_PARSE_CODE "U"
-#define STRINGLIB_EMPTY unicode_empty
-#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
-#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
-#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL
-#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL
-#define STRINGLIB_STR PyUnicode_4BYTE_DATA
-#define STRINGLIB_LEN PyUnicode_GET_LENGTH
-#define STRINGLIB_NEW _PyUnicode_FromUCS4
-#define STRINGLIB_CHECK PyUnicode_Check
-#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
-
-#define STRINGLIB_TOSTR PyObject_Str
-#define STRINGLIB_TOASCII PyObject_ASCII
-
-#define _Py_InsertThousandsGrouping _PyUnicode_ucs4_InsertThousandsGrouping
-
diff --git a/Objects/stringlib/undef.h b/Objects/stringlib/undef.h
deleted file mode 100644
index f9d3f1d..0000000
--- a/Objects/stringlib/undef.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#undef FASTSEARCH
-#undef STRINGLIB
-#undef STRINGLIB_SIZEOF_CHAR
-#undef STRINGLIB_MAX_CHAR
-#undef STRINGLIB_CHAR
-#undef STRINGLIB_STR
-#undef STRINGLIB_LEN
-#undef STRINGLIB_NEW
-#undef _Py_InsertThousandsGrouping
-#undef STRINGLIB_IS_UNICODE
-
diff --git a/Objects/stringlib/unicodedefs.h b/Objects/stringlib/unicodedefs.h
index 3db5629..dd814f6 100644
--- a/Objects/stringlib/unicodedefs.h
+++ b/Objects/stringlib/unicodedefs.h
@@ -6,10 +6,7 @@
compiled as unicode. */
#define STRINGLIB_IS_UNICODE 1
-#define FASTSEARCH fastsearch
-#define STRINGLIB(F) stringlib_##F
#define STRINGLIB_OBJECT PyUnicodeObject
-#define STRINGLIB_SIZEOF_CHAR Py_UNICODE_SIZE
#define STRINGLIB_CHAR Py_UNICODE
#define STRINGLIB_TYPE_NAME "unicode"
#define STRINGLIB_PARSE_CODE "U"
@@ -18,14 +15,22 @@
#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL
#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL
+#define STRINGLIB_TOUPPER Py_UNICODE_TOUPPER
+#define STRINGLIB_TOLOWER Py_UNICODE_TOLOWER
+#define STRINGLIB_FILL Py_UNICODE_FILL
#define STRINGLIB_STR PyUnicode_AS_UNICODE
#define STRINGLIB_LEN PyUnicode_GET_SIZE
#define STRINGLIB_NEW PyUnicode_FromUnicode
+#define STRINGLIB_RESIZE PyUnicode_Resize
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
+#define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping
+#if PY_VERSION_HEX < 0x03000000
+#define STRINGLIB_TOSTR PyObject_Unicode
+#else
#define STRINGLIB_TOSTR PyObject_Str
-#define STRINGLIB_TOASCII PyObject_ASCII
+#endif
#define STRINGLIB_WANT_CONTAINS_OBJ 1