summaryrefslogtreecommitdiffstats
path: root/Objects/stringlib
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/stringlib')
-rw-r--r--Objects/stringlib/asciilib.h1
-rw-r--r--Objects/stringlib/codecs.h6
-rw-r--r--Objects/stringlib/fastsearch.h8
-rw-r--r--Objects/stringlib/join.h133
-rw-r--r--Objects/stringlib/replace.h53
-rw-r--r--Objects/stringlib/stringdefs.h1
-rw-r--r--Objects/stringlib/ucs1lib.h1
-rw-r--r--Objects/stringlib/ucs2lib.h1
-rw-r--r--Objects/stringlib/ucs4lib.h1
-rw-r--r--Objects/stringlib/undef.h1
-rw-r--r--Objects/stringlib/unicode_format.h24
-rw-r--r--Objects/stringlib/unicodedefs.h1
12 files changed, 203 insertions, 28 deletions
diff --git a/Objects/stringlib/asciilib.h b/Objects/stringlib/asciilib.h
index f62813d..d0fc18d 100644
--- a/Objects/stringlib/asciilib.h
+++ b/Objects/stringlib/asciilib.h
@@ -19,7 +19,6 @@
#define STRINGLIB_STR PyUnicode_1BYTE_DATA
#define STRINGLIB_LEN PyUnicode_GET_LENGTH
#define STRINGLIB_NEW(STR,LEN) _PyUnicode_FromASCII((char*)(STR),(LEN))
-#define STRINGLIB_RESIZE not_supported
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h
index f353367..f855003 100644
--- a/Objects/stringlib/codecs.h
+++ b/Objects/stringlib/codecs.h
@@ -47,7 +47,7 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
unsigned long value = *(unsigned long *) _s;
if (value & ASCII_CHAR_MASK)
break;
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+#if PY_LITTLE_ENDIAN
_p[0] = (STRINGLIB_CHAR)(value & 0xFFu);
_p[1] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);
_p[2] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);
@@ -486,7 +486,7 @@ STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
const unsigned char *q = *inptr;
STRINGLIB_CHAR *p = dest + *outpos;
/* Offsets from q for retrieving byte pairs in the right order. */
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+#if PY_LITTLE_ENDIAN
int ihi = !!native_ordering, ilo = !native_ordering;
#else
int ihi = !native_ordering, ilo = !!native_ordering;
@@ -517,7 +517,7 @@ STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
block = SWAB(block);
#endif
}
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+#if PY_LITTLE_ENDIAN
# if SIZEOF_LONG == 4
p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);
p[1] = (STRINGLIB_CHAR)(block >> 16);
diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h
index 55ac77d..cd7cac4 100644
--- a/Objects/stringlib/fastsearch.h
+++ b/Objects/stringlib/fastsearch.h
@@ -142,6 +142,8 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
mask = 0;
if (mode != FAST_RSEARCH) {
+ const STRINGLIB_CHAR *ss = s + m - 1;
+ const STRINGLIB_CHAR *pp = p + m - 1;
/* create compressed boyer-moore delta 1 table */
@@ -156,7 +158,7 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
for (i = 0; i <= w; i++) {
/* note: using mlast in the skip path slows things down on x86 */
- if (s[i+m-1] == p[m-1]) {
+ if (ss[i] == pp[0]) {
/* candidate match */
for (j = 0; j < mlast; j++)
if (s[i+j] != p[j])
@@ -172,13 +174,13 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
continue;
}
/* miss: check if next character is part of pattern */
- if (!STRINGLIB_BLOOM(mask, s[i+m]))
+ if (!STRINGLIB_BLOOM(mask, ss[i+1]))
i = i + m;
else
i = i + skip;
} else {
/* skip: check if next character is part of pattern */
- if (!STRINGLIB_BLOOM(mask, s[i+m]))
+ if (!STRINGLIB_BLOOM(mask, ss[i+1]))
i = i + m;
}
}
diff --git a/Objects/stringlib/join.h b/Objects/stringlib/join.h
new file mode 100644
index 0000000..5568b31
--- /dev/null
+++ b/Objects/stringlib/join.h
@@ -0,0 +1,133 @@
+/* stringlib: bytes joining implementation */
+
+#if STRINGLIB_SIZEOF_CHAR != 1
+#error join.h only compatible with byte-wise strings
+#endif
+
+Py_LOCAL_INLINE(PyObject *)
+STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
+{
+ char *sepstr = STRINGLIB_STR(sep);
+ const Py_ssize_t seplen = STRINGLIB_LEN(sep);
+ PyObject *res = NULL;
+ char *p;
+ Py_ssize_t seqlen = 0;
+ Py_ssize_t sz = 0;
+ Py_ssize_t i, nbufs;
+ PyObject *seq, *item;
+ Py_buffer *buffers = NULL;
+#define NB_STATIC_BUFFERS 10
+ Py_buffer static_buffers[NB_STATIC_BUFFERS];
+
+ seq = PySequence_Fast(iterable, "can only join an iterable");
+ if (seq == NULL) {
+ return NULL;
+ }
+
+ seqlen = PySequence_Fast_GET_SIZE(seq);
+ if (seqlen == 0) {
+ Py_DECREF(seq);
+ return STRINGLIB_NEW(NULL, 0);
+ }
+#ifndef STRINGLIB_MUTABLE
+ if (seqlen == 1) {
+ item = PySequence_Fast_GET_ITEM(seq, 0);
+ if (STRINGLIB_CHECK_EXACT(item)) {
+ Py_INCREF(item);
+ Py_DECREF(seq);
+ return item;
+ }
+ }
+#endif
+ if (seqlen > NB_STATIC_BUFFERS) {
+ buffers = PyMem_NEW(Py_buffer, seqlen);
+ if (buffers == NULL) {
+ Py_DECREF(seq);
+ PyErr_NoMemory();
+ return NULL;
+ }
+ }
+ else {
+ buffers = static_buffers;
+ }
+
+ /* Here is the general case. Do a pre-pass to figure out the total
+ * amount of space we'll need (sz), and see whether all arguments are
+ * buffer-compatible.
+ */
+ for (i = 0, nbufs = 0; i < seqlen; i++) {
+ Py_ssize_t itemlen;
+ item = PySequence_Fast_GET_ITEM(seq, i);
+ if (_getbuffer(item, &buffers[i]) < 0) {
+ PyErr_Format(PyExc_TypeError,
+ "sequence item %zd: expected bytes, bytearray, "
+ "or an object with the buffer interface, %.80s found",
+ i, Py_TYPE(item)->tp_name);
+ goto error;
+ }
+ nbufs = i + 1; /* for error cleanup */
+ itemlen = buffers[i].len;
+ if (itemlen > PY_SSIZE_T_MAX - sz) {
+ PyErr_SetString(PyExc_OverflowError,
+ "join() result is too long");
+ goto error;
+ }
+ sz += itemlen;
+ if (i != 0) {
+ if (seplen > PY_SSIZE_T_MAX - sz) {
+ PyErr_SetString(PyExc_OverflowError,
+ "join() result is too long");
+ goto error;
+ }
+ sz += seplen;
+ }
+ if (seqlen != PySequence_Fast_GET_SIZE(seq)) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "sequence changed size during iteration");
+ goto error;
+ }
+ }
+
+ /* Allocate result space. */
+ res = STRINGLIB_NEW(NULL, sz);
+ if (res == NULL)
+ goto error;
+
+ /* Catenate everything. */
+ p = STRINGLIB_STR(res);
+ if (!seplen) {
+ /* fast path */
+ for (i = 0; i < nbufs; i++) {
+ Py_ssize_t n = buffers[i].len;
+ char *q = buffers[i].buf;
+ Py_MEMCPY(p, q, n);
+ p += n;
+ }
+ goto done;
+ }
+ for (i = 0; i < nbufs; i++) {
+ Py_ssize_t n;
+ char *q;
+ if (i) {
+ Py_MEMCPY(p, sepstr, seplen);
+ p += seplen;
+ }
+ n = buffers[i].len;
+ q = buffers[i].buf;
+ Py_MEMCPY(p, q, n);
+ p += n;
+ }
+ goto done;
+
+error:
+ res = NULL;
+done:
+ Py_DECREF(seq);
+ for (i = 0; i < nbufs; i++)
+ PyBuffer_Release(&buffers[i]);
+ if (buffers != static_buffers)
+ PyMem_FREE(buffers);
+ return res;
+}
+
+#undef NB_STATIC_BUFFERS
diff --git a/Objects/stringlib/replace.h b/Objects/stringlib/replace.h
new file mode 100644
index 0000000..ef318ed
--- /dev/null
+++ b/Objects/stringlib/replace.h
@@ -0,0 +1,53 @@
+/* stringlib: replace implementation */
+
+#ifndef STRINGLIB_FASTSEARCH_H
+#error must include "stringlib/fastsearch.h" before including this module
+#endif
+
+Py_LOCAL_INLINE(void)
+STRINGLIB(replace_1char_inplace)(STRINGLIB_CHAR* s, STRINGLIB_CHAR* end,
+ Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount)
+{
+ *s = u2;
+ while (--maxcount && ++s != end) {
+ /* Find the next character to be replaced.
+
+ If it occurs often, it is faster to scan for it using an inline
+ loop. If it occurs seldom, it is faster to scan for it using a
+ function call; the overhead of the function call is amortized
+ across the many characters that call covers. We start with an
+ inline loop and use a heuristic to determine whether to fall back
+ to a function call. */
+ if (*s != u1) {
+ int attempts = 10;
+ /* search u1 in a dummy loop */
+ while (1) {
+ if (++s == end)
+ return;
+ if (*s == u1)
+ break;
+ if (!--attempts) {
+ /* if u1 was not found for attempts iterations,
+ use FASTSEARCH() or memchr() */
+#if STRINGLIB_SIZEOF_CHAR == 1
+ s++;
+ s = memchr(s, u1, end - s);
+ if (s == NULL)
+ return;
+#else
+ Py_ssize_t i;
+ STRINGLIB_CHAR ch1 = (STRINGLIB_CHAR) u1;
+ s++;
+ i = FASTSEARCH(s, end - s, &ch1, 1, 0, FAST_SEARCH);
+ if (i < 0)
+ return;
+ s += i;
+#endif
+ /* restart the dummy loop */
+ break;
+ }
+ }
+ }
+ *s = u2;
+ }
+}
diff --git a/Objects/stringlib/stringdefs.h b/Objects/stringlib/stringdefs.h
index 7bb91a7..ce27f3e 100644
--- a/Objects/stringlib/stringdefs.h
+++ b/Objects/stringlib/stringdefs.h
@@ -21,7 +21,6 @@
#define STRINGLIB_STR PyBytes_AS_STRING
#define STRINGLIB_LEN PyBytes_GET_SIZE
#define STRINGLIB_NEW PyBytes_FromStringAndSize
-#define STRINGLIB_RESIZE _PyBytes_Resize
#define STRINGLIB_CHECK PyBytes_Check
#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
#define STRINGLIB_TOSTR PyObject_Str
diff --git a/Objects/stringlib/ucs1lib.h b/Objects/stringlib/ucs1lib.h
index e8c6fcb..ce1eb57 100644
--- a/Objects/stringlib/ucs1lib.h
+++ b/Objects/stringlib/ucs1lib.h
@@ -19,7 +19,6 @@
#define STRINGLIB_STR PyUnicode_1BYTE_DATA
#define STRINGLIB_LEN PyUnicode_GET_LENGTH
#define STRINGLIB_NEW _PyUnicode_FromUCS1
-#define STRINGLIB_RESIZE not_supported
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
diff --git a/Objects/stringlib/ucs2lib.h b/Objects/stringlib/ucs2lib.h
index 45e5729..f900cb6 100644
--- a/Objects/stringlib/ucs2lib.h
+++ b/Objects/stringlib/ucs2lib.h
@@ -19,7 +19,6 @@
#define STRINGLIB_STR PyUnicode_2BYTE_DATA
#define STRINGLIB_LEN PyUnicode_GET_LENGTH
#define STRINGLIB_NEW _PyUnicode_FromUCS2
-#define STRINGLIB_RESIZE not_supported
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
diff --git a/Objects/stringlib/ucs4lib.h b/Objects/stringlib/ucs4lib.h
index 647a27e..86a480f 100644
--- a/Objects/stringlib/ucs4lib.h
+++ b/Objects/stringlib/ucs4lib.h
@@ -19,7 +19,6 @@
#define STRINGLIB_STR PyUnicode_4BYTE_DATA
#define STRINGLIB_LEN PyUnicode_GET_LENGTH
#define STRINGLIB_NEW _PyUnicode_FromUCS4
-#define STRINGLIB_RESIZE not_supported
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
diff --git a/Objects/stringlib/undef.h b/Objects/stringlib/undef.h
index 03117ec..f9d3f1d 100644
--- a/Objects/stringlib/undef.h
+++ b/Objects/stringlib/undef.h
@@ -6,7 +6,6 @@
#undef STRINGLIB_STR
#undef STRINGLIB_LEN
#undef STRINGLIB_NEW
-#undef STRINGLIB_RESIZE
#undef _Py_InsertThousandsGrouping
#undef STRINGLIB_IS_UNICODE
diff --git a/Objects/stringlib/unicode_format.h b/Objects/stringlib/unicode_format.h
index c1c2cf3..b01d756 100644
--- a/Objects/stringlib/unicode_format.h
+++ b/Objects/stringlib/unicode_format.h
@@ -875,25 +875,19 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs,
SubString literal;
SubString field_name;
SubString format_spec;
- Py_UCS4 conversion, maxchar;
- Py_ssize_t sublen;
- int err;
+ Py_UCS4 conversion;
MarkupIterator_init(&iter, input->str, input->start, input->end);
while ((result = MarkupIterator_next(&iter, &literal, &field_present,
&field_name, &format_spec,
&conversion,
&format_spec_needs_expanding)) == 2) {
- sublen = literal.end - literal.start;
- if (sublen) {
- maxchar = _PyUnicode_FindMaxChar(literal.str,
- literal.start, literal.end);
- err = _PyUnicodeWriter_Prepare(writer, sublen, maxchar);
- if (err == -1)
+ if (literal.end != literal.start) {
+ if (!field_present && iter.str.start == iter.str.end)
+ writer->overallocate = 0;
+ if (_PyUnicodeWriter_WriteSubstring(writer, literal.str,
+ literal.start, literal.end) < 0)
return 0;
- _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
- literal.str, literal.start, sublen);
- writer->pos += sublen;
}
if (field_present) {
@@ -918,7 +912,6 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
int recursion_depth, AutoNumber *auto_number)
{
_PyUnicodeWriter writer;
- Py_ssize_t minlen;
/* check the recursion level */
if (recursion_depth <= 0) {
@@ -927,8 +920,9 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
return NULL;
}
- minlen = PyUnicode_GET_LENGTH(input->str) + 100;
- _PyUnicodeWriter_Init(&writer, minlen);
+ _PyUnicodeWriter_Init(&writer);
+ writer.overallocate = 1;
+ writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100;
if (!do_markup(input, args, kwargs, &writer, recursion_depth,
auto_number)) {
diff --git a/Objects/stringlib/unicodedefs.h b/Objects/stringlib/unicodedefs.h
index f16f21e..48d00ec 100644
--- a/Objects/stringlib/unicodedefs.h
+++ b/Objects/stringlib/unicodedefs.h
@@ -21,7 +21,6 @@
#define STRINGLIB_STR PyUnicode_AS_UNICODE
#define STRINGLIB_LEN PyUnicode_GET_SIZE
#define STRINGLIB_NEW PyUnicode_FromUnicode
-#define STRINGLIB_RESIZE PyUnicode_Resize
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact