summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Objects/stringlib/README.txt29
-rw-r--r--Objects/stringlib/find.h33
-rw-r--r--Objects/stringobject.c94
-rw-r--r--Objects/unicodeobject.c152
4 files changed, 161 insertions, 147 deletions
diff --git a/Objects/stringlib/README.txt b/Objects/stringlib/README.txt
index 051197c..82a8774 100644
--- a/Objects/stringlib/README.txt
+++ b/Objects/stringlib/README.txt
@@ -3,3 +3,32 @@ possibly other modules, in a not too distant future).
the stuff in here is included into relevant places; see the individual
source files for details.
+
+--------------------------------------------------------------------
+the following defines used by the different modules:
+
+STRINGLIB_CHAR
+
+ the type used to hold a character (char or Py_UNICODE)
+
+STRINGLIB_EMPTY
+
+ a PyObject representing the empty string
+
+int STRINGLIB_CMP(STRINGLIB_CHAR*, STRINGLIB_CHAR*, Py_ssize_t)
+
+ compares two strings. returns 0 if they match, and non-zero if not.
+
+Py_ssize_t STRINGLIB_LEN(PyObject*)
+
+ returns the length of the given string object (which must be of the
+ right type)
+
+PyObject* STRINGLIB_NEW(STRINGLIB_CHAR*, Py_ssize_t)
+
+ creates a new string object
+
+STRINGLIB_CHAR* STRINGLIB_STR(PyObject*)
+
+ returns the pointer to the character data for the given string
+ object (which must be of the right type)
diff --git a/Objects/stringlib/find.h b/Objects/stringlib/find.h
index d5394b4..9db633d 100644
--- a/Objects/stringlib/find.h
+++ b/Objects/stringlib/find.h
@@ -48,6 +48,39 @@ stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
return pos;
}
+#ifdef STRINGLIB_STR
+
+Py_LOCAL(Py_ssize_t)
+stringlib_find_obj(PyObject* str, PyObject* sub,
+ Py_ssize_t start, Py_ssize_t end)
+{
+ return stringlib_find(
+ STRINGLIB_STR(str) + start, end - start,
+ STRINGLIB_STR(sub), STRINGLIB_LEN(sub), start
+ );
+}
+
+Py_LOCAL(int)
+stringlib_contains_obj(PyObject* str, PyObject* sub)
+{
+ return stringlib_find(
+ STRINGLIB_STR(str), STRINGLIB_LEN(str),
+ STRINGLIB_STR(sub), STRINGLIB_LEN(sub), 0
+ ) != -1;
+}
+
+Py_LOCAL(Py_ssize_t)
+stringlib_rfind_obj(PyObject* str, PyObject* sub,
+ Py_ssize_t start, Py_ssize_t end)
+{
+ return stringlib_rfind(
+ STRINGLIB_STR(str) + start, end - start,
+ STRINGLIB_STR(sub), STRINGLIB_LEN(sub), start
+ );
+}
+
+#endif
+
#endif
/*
diff --git a/Objects/stringobject.c b/Objects/stringobject.c
index 7e93783..c881927 100644
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -690,6 +690,9 @@ PyObject *PyString_DecodeEscape(const char *s,
return NULL;
}
+/* -------------------------------------------------------------------- */
+/* object api */
+
static Py_ssize_t
string_getsize(register PyObject *op)
{
@@ -765,22 +768,23 @@ PyString_AsStringAndSize(register PyObject *obj,
}
/* -------------------------------------------------------------------- */
-/* stringlib components */
+/* Methods */
#define STRINGLIB_CHAR char
-#define STRINGLIB_NEW PyString_FromStringAndSize
#define STRINGLIB_CMP memcmp
+#define STRINGLIB_LEN PyString_GET_SIZE
+#define STRINGLIB_NEW PyString_FromStringAndSize
+#define STRINGLIB_STR PyString_AS_STRING
#define STRINGLIB_EMPTY nullstring
#include "stringlib/fastsearch.h"
+#include "stringlib/count.h"
#include "stringlib/find.h"
#include "stringlib/partition.h"
-/* -------------------------------------------------------------------- */
-/* Methods */
static int
string_print(PyStringObject *op, FILE *fp, int flags)
@@ -1048,49 +1052,36 @@ string_slice(register PyStringObject *a, register Py_ssize_t i,
}
static int
-string_contains(PyObject *a, PyObject *el)
+string_contains(PyObject *str_obj, PyObject *sub_obj)
{
- char *s = PyString_AS_STRING(a);
- const char *sub = PyString_AS_STRING(el);
- Py_ssize_t len_sub = PyString_GET_SIZE(el);
- Py_ssize_t pos;
-
- if (!PyString_CheckExact(el)) {
+ if (!PyString_CheckExact(sub_obj)) {
#ifdef Py_USING_UNICODE
- if (PyUnicode_Check(el))
- return PyUnicode_Contains(a, el);
+ if (PyUnicode_Check(sub_obj))
+ return PyUnicode_Contains(str_obj, sub_obj);
#endif
- if (!PyString_Check(el)) {
+ if (!PyString_Check(sub_obj)) {
PyErr_SetString(PyExc_TypeError,
"'in <string>' requires string as left operand");
return -1;
}
}
- if (len_sub == 0)
- return 1;
-
- pos = fastsearch(
- s, PyString_GET_SIZE(a),
- sub, len_sub, FAST_SEARCH
- );
-
- return (pos != -1);
+ return stringlib_contains_obj(str_obj, sub_obj);
}
static PyObject *
string_item(PyStringObject *a, register Py_ssize_t i)
{
+ char pchar;
PyObject *v;
- char *pchar;
if (i < 0 || i >= a->ob_size) {
PyErr_SetString(PyExc_IndexError, "string index out of range");
return NULL;
}
- pchar = a->ob_sval + i;
- v = (PyObject *)characters[*pchar & UCHAR_MAX];
+ pchar = a->ob_sval[i];
+ v = (PyObject *)characters[pchar & UCHAR_MAX];
if (v == NULL)
- v = PyString_FromStringAndSize(pchar, 1);
+ v = PyString_FromStringAndSize(&pchar, 1);
else {
#ifdef COUNT_ALLOCS
one_strings++;
@@ -1166,9 +1157,8 @@ string_richcompare(PyStringObject *a, PyStringObject *b, int op)
int
_PyString_Eq(PyObject *o1, PyObject *o2)
{
- PyStringObject *a, *b;
- a = (PyStringObject*)o1;
- b = (PyStringObject*)o2;
+ PyStringObject *a = (PyStringObject*) o1;
+ PyStringObject *b = (PyStringObject*) o2;
return a->ob_size == b->ob_size
&& *a->ob_sval == *b->ob_sval
&& memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
@@ -2264,43 +2254,37 @@ as in slice notation.");
static PyObject *
string_count(PyStringObject *self, PyObject *args)
{
- const char *s = PyString_AS_STRING(self), *sub;
- Py_ssize_t len = PyString_GET_SIZE(self), n;
- Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
- Py_ssize_t m, r;
- PyObject *subobj;
+ PyObject *sub_obj;
+ const char *str = PyString_AS_STRING(self), *sub;
+ Py_ssize_t sub_len;
+ Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
- if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
- _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
+ if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
+ _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
return NULL;
- if (PyString_Check(subobj)) {
- sub = PyString_AS_STRING(subobj);
- n = PyString_GET_SIZE(subobj);
+ if (PyString_Check(sub_obj)) {
+ sub = PyString_AS_STRING(sub_obj);
+ sub_len = PyString_GET_SIZE(sub_obj);
}
#ifdef Py_USING_UNICODE
- else if (PyUnicode_Check(subobj)) {
+ else if (PyUnicode_Check(sub_obj)) {
Py_ssize_t count;
- count = PyUnicode_Count((PyObject *)self, subobj, i, last);
+ count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
if (count == -1)
return NULL;
else
- return PyInt_FromLong((long) count);
+ return PyInt_FromSsize_t(count);
}
#endif
- else if (PyObject_AsCharBuffer(subobj, &sub, &n))
+ else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
return NULL;
- string_adjust_indices(&i, &last, len);
+ string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
- m = last + 1 - n;
- if (n == 0)
- return PyInt_FromSsize_t(m-i);
-
- r = fastsearch(s + i, last - i, sub, n, FAST_COUNT);
- if (r < 0)
- r = 0; /* no match */
- return PyInt_FromSsize_t(r);
+ return PyInt_FromSsize_t(
+ stringlib_count(str + start, end - start, sub, sub_len)
+ );
}
PyDoc_STRVAR(swapcase__doc__,
@@ -2477,7 +2461,7 @@ return_self(PyStringObject *self)
}
Py_LOCAL(Py_ssize_t)
- countchar(char *target, int target_len, char c, Py_ssize_t maxcount)
+countchar(char *target, int target_len, char c, Py_ssize_t maxcount)
{
Py_ssize_t count=0;
char *start=target;
@@ -2580,7 +2564,7 @@ countstring(char *target, Py_ssize_t target_len,
}
-/* Algorithms for difference cases of string replacement */
+/* Algorithms for different cases of string replacement */
/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Py_LOCAL(PyStringObject *)
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 26cf521..8cb32e1 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3857,7 +3857,9 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
#define STRINGLIB_CHAR Py_UNICODE
+#define STRINGLIB_LEN PyUnicode_GET_SIZE
#define STRINGLIB_NEW PyUnicode_FromUnicode
+#define STRINGLIB_STR PyUnicode_AS_UNICODE
Py_LOCAL(int)
STRINGLIB_CMP(const Py_UNICODE* str, const Py_UNICODE* other, Py_ssize_t len)
@@ -3918,67 +3920,33 @@ Py_ssize_t PyUnicode_Count(PyObject *str,
return result;
}
-static Py_ssize_t findstring(PyUnicodeObject *self,
- PyUnicodeObject *substring,
- Py_ssize_t start,
- Py_ssize_t end,
- int direction)
-{
- FIX_START_END(self);
-
- if (substring->length == 0)
- return (direction > 0) ? start : end;
-
- if (direction > 0) {
- Py_ssize_t pos = fastsearch(
- PyUnicode_AS_UNICODE(self) + start, end - start,
- substring->str, substring->length, FAST_SEARCH
- );
- if (pos >= 0)
- return pos + start;
- } else {
- end -= substring->length;
- for (; end >= start; end--)
- if (Py_UNICODE_MATCH(self, end, substring))
- return end;
- }
- return -1;
-}
-
Py_ssize_t PyUnicode_Find(PyObject *str,
- PyObject *substr,
+ PyObject *sub,
Py_ssize_t start,
Py_ssize_t end,
int direction)
{
Py_ssize_t result;
- PyUnicodeObject* str_obj;
- PyUnicodeObject* sub_obj;
- str_obj = (PyUnicodeObject*) PyUnicode_FromObject(str);
- if (!str_obj)
+ str = PyUnicode_FromObject(str);
+ if (!str)
return -2;
- sub_obj = (PyUnicodeObject*) PyUnicode_FromObject(substr);
- if (!sub_obj) {
- Py_DECREF(str_obj);
+ sub = PyUnicode_FromObject(sub);
+ if (!sub) {
+ Py_DECREF(str);
return -2;
}
- FIX_START_END(str_obj);
+ FIX_START_END((PyUnicodeObject*) str);
if (direction > 0)
- result = stringlib_find(
- str_obj->str + start, end - start, sub_obj->str, sub_obj->length,
- start
- );
+ result = stringlib_find_obj(str, sub, start, end);
else
- result = stringlib_rfind(
- str_obj->str + start, end - start, sub_obj->str, sub_obj->length,
- start
- );
+ result = stringlib_rfind_obj(str, sub, start, end);
+
+ Py_DECREF(str);
+ Py_DECREF(sub);
- Py_DECREF(str_obj);
- Py_DECREF(sub_obj);
return result;
}
@@ -5046,39 +5014,29 @@ onError:
int PyUnicode_Contains(PyObject *container,
PyObject *element)
{
- PyUnicodeObject *u, *v;
- Py_ssize_t size;
- Py_ssize_t pos;
+ PyObject *str, *sub;
+ int result;
/* Coerce the two arguments */
- v = (PyUnicodeObject *) PyUnicode_FromObject(element);
- if (!v) {
+ sub = PyUnicode_FromObject(element);
+ if (!sub) {
PyErr_SetString(PyExc_TypeError,
"'in <string>' requires string as left operand");
return -1;
}
- u = (PyUnicodeObject *) PyUnicode_FromObject(container);
- if (!u) {
- Py_DECREF(v);
+ str = PyUnicode_FromObject(container);
+ if (!str) {
+ Py_DECREF(sub);
return -1;
}
- size = PyUnicode_GET_SIZE(v);
- if (!size) {
- pos = 0;
- goto done;
- }
+ result = stringlib_contains_obj(str, sub);
- pos = fastsearch(
- PyUnicode_AS_UNICODE(u), PyUnicode_GET_SIZE(u),
- PyUnicode_AS_UNICODE(v), size, FAST_SEARCH
- );
+ Py_DECREF(str);
+ Py_DECREF(sub);
-done:
- Py_DECREF(u);
- Py_DECREF(v);
- return (pos != -1);
+ return result;
}
/* Concat to string or Unicode object giving a new Unicode object. */
@@ -5305,23 +5263,26 @@ Return -1 on failure.");
static PyObject *
unicode_find(PyUnicodeObject *self, PyObject *args)
{
- PyUnicodeObject *substring;
+ PyObject *substring;
Py_ssize_t start = 0;
Py_ssize_t end = PY_SSIZE_T_MAX;
- PyObject *result;
+ Py_ssize_t result;
if (!PyArg_ParseTuple(args, "O|O&O&:find", &substring,
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
return NULL;
- substring = (PyUnicodeObject *)PyUnicode_FromObject(
- (PyObject *)substring);
- if (substring == NULL)
+
+ substring = PyUnicode_FromObject(substring);
+ if (!substring)
return NULL;
- result = PyInt_FromSsize_t(findstring(self, substring, start, end, 1));
+ FIX_START_END(self);
+
+ result = stringlib_find_obj((PyObject*) self, substring, start, end);
Py_DECREF(substring);
- return result;
+
+ return PyInt_FromSsize_t(result);
}
static PyObject *
@@ -5371,7 +5332,7 @@ static PyObject *
unicode_index(PyUnicodeObject *self, PyObject *args)
{
Py_ssize_t result;
- PyUnicodeObject *substring;
+ PyObject *substring;
Py_ssize_t start = 0;
Py_ssize_t end = PY_SSIZE_T_MAX;
@@ -5379,18 +5340,21 @@ unicode_index(PyUnicodeObject *self, PyObject *args)
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
return NULL;
- substring = (PyUnicodeObject *)PyUnicode_FromObject(
- (PyObject *)substring);
- if (substring == NULL)
+ substring = PyUnicode_FromObject(substring);
+ if (!substring)
return NULL;
- result = findstring(self, substring, start, end, 1);
+ FIX_START_END(self);
+
+ result = stringlib_find_obj((PyObject*) self, substring, start, end);
Py_DECREF(substring);
+
if (result < 0) {
PyErr_SetString(PyExc_ValueError, "substring not found");
return NULL;
}
+
return PyInt_FromSsize_t(result);
}
@@ -6038,23 +6002,25 @@ Return -1 on failure.");
static PyObject *
unicode_rfind(PyUnicodeObject *self, PyObject *args)
{
- PyUnicodeObject *substring;
+ PyObject *substring;
Py_ssize_t start = 0;
Py_ssize_t end = PY_SSIZE_T_MAX;
- PyObject *result;
+ Py_ssize_t result;
if (!PyArg_ParseTuple(args, "O|O&O&:rfind", &substring,
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
return NULL;
- substring = (PyUnicodeObject *)PyUnicode_FromObject(
- (PyObject *)substring);
- if (substring == NULL)
+ substring = PyUnicode_FromObject(substring);
+ if (!substring)
return NULL;
- result = PyInt_FromSsize_t(findstring(self, substring, start, end, -1));
+ FIX_START_END(self);
+
+ result = stringlib_rfind_obj((PyObject*)self, substring, start, end);
Py_DECREF(substring);
- return result;
+
+ return PyInt_FromSsize_t(result);
}
PyDoc_STRVAR(rindex__doc__,
@@ -6065,22 +6031,24 @@ Like S.rfind() but raise ValueError when the substring is not found.");
static PyObject *
unicode_rindex(PyUnicodeObject *self, PyObject *args)
{
- Py_ssize_t result;
- PyUnicodeObject *substring;
+ PyObject *substring;
Py_ssize_t start = 0;
Py_ssize_t end = PY_SSIZE_T_MAX;
+ Py_ssize_t result;
if (!PyArg_ParseTuple(args, "O|O&O&:rindex", &substring,
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
return NULL;
- substring = (PyUnicodeObject *)PyUnicode_FromObject(
- (PyObject *)substring);
- if (substring == NULL)
+ substring = PyUnicode_FromObject(substring);
+ if (!substring)
return NULL;
- result = findstring(self, substring, start, end, -1);
+ FIX_START_END(self);
+
+ result = stringlib_rfind_obj((PyObject*)self, substring, start, end);
Py_DECREF(substring);
+
if (result < 0) {
PyErr_SetString(PyExc_ValueError, "substring not found");
return NULL;