diff options
Diffstat (limited to 'Objects/stringlib')
| -rw-r--r-- | Objects/stringlib/README.txt | 9 | ||||
| -rw-r--r-- | Objects/stringlib/count.h | 16 | ||||
| -rw-r--r-- | Objects/stringlib/ctype.h | 1 | ||||
| -rw-r--r-- | Objects/stringlib/fastsearch.h | 40 | ||||
| -rw-r--r-- | Objects/stringlib/find.h | 58 | ||||
| -rw-r--r-- | Objects/stringlib/partition.h | 83 | ||||
| -rw-r--r-- | Objects/stringlib/split.h | 394 | ||||
| -rw-r--r-- | Objects/stringlib/stringdefs.h | 5 | ||||
| -rw-r--r-- | Objects/stringlib/transmogrify.h | 91 | ||||
| -rw-r--r-- | Objects/stringlib/unicodedefs.h | 3 | 
10 files changed, 504 insertions, 196 deletions
diff --git a/Objects/stringlib/README.txt b/Objects/stringlib/README.txt index d948386..c884ec3 100644 --- a/Objects/stringlib/README.txt +++ b/Objects/stringlib/README.txt @@ -28,3 +28,12 @@ STRINGLIB_CHAR* STRINGLIB_STR(PyObject*)      returns the pointer to the character data for the given string      object (which must be of the right type) + +int STRINGLIB_CHECK_EXACT(PyObject *) + +    returns true if the object is an instance of our type, not a subclass. + +STRINGLIB_MUTABLE + +    Must be 0 or 1 to tell the cpp macros in stringlib code if the object +    being operated on is mutable or not. diff --git a/Objects/stringlib/count.h b/Objects/stringlib/count.h index eba37e9..de34f96 100644 --- a/Objects/stringlib/count.h +++ b/Objects/stringlib/count.h @@ -9,28 +9,22 @@  Py_LOCAL_INLINE(Py_ssize_t)  stringlib_count(const STRINGLIB_CHAR* str, Py_ssize_t str_len, -                const STRINGLIB_CHAR* sub, Py_ssize_t sub_len) +                const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, +                Py_ssize_t maxcount)  {      Py_ssize_t count;      if (str_len < 0)          return 0; /* start > len(str) */      if (sub_len == 0) -        return str_len + 1; +        return (str_len < maxcount) ? str_len + 1 : maxcount; -    count = fastsearch(str, str_len, sub, sub_len, FAST_COUNT); +    count = fastsearch(str, str_len, sub, sub_len, maxcount, FAST_COUNT);      if (count < 0) -        count = 0; /* no match */ +        return 0; /* no match */      return count;  }  #endif - -/* -Local variables: -c-basic-offset: 4 -indent-tabs-mode: nil -End: -*/ diff --git a/Objects/stringlib/ctype.h b/Objects/stringlib/ctype.h index 8951276..739cf3d 100644 --- a/Objects/stringlib/ctype.h +++ b/Objects/stringlib/ctype.h @@ -107,4 +107,3 @@ stringlib_swapcase(PyObject *self)                      STRINGLIB_LEN(self));      return newobj;  } - diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h index 7b9dd47..21cf3a2 100644 --- a/Objects/stringlib/fastsearch.h +++ b/Objects/stringlib/fastsearch.h @@ -18,10 +18,13 @@  #define FAST_SEARCH 1  #define FAST_RSEARCH 2 +#define BLOOM_ADD(mask, ch) ((mask |= (1 << ((ch) & (LONG_BIT - 1))))) +#define BLOOM(mask, ch)     ((mask &  (1 << ((ch) & (LONG_BIT - 1))))) +  Py_LOCAL_INLINE(Py_ssize_t)  fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,             const STRINGLIB_CHAR* p, Py_ssize_t m, -           int mode) +           Py_ssize_t maxcount, int mode)  {      long mask;      Py_ssize_t skip, count = 0; @@ -29,7 +32,7 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,      w = n - m; -    if (w < 0) +    if (w < 0 || (mode == FAST_COUNT && maxcount == 0))          return -1;      /* look for special cases */ @@ -39,8 +42,11 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,          /* use special case for 1-character strings */          if (mode == FAST_COUNT) {              for (i = 0; i < n; i++) -                if (s[i] == p[0]) +                if (s[i] == p[0]) {                      count++; +                    if (count == maxcount) +                        return maxcount; +                }              return count;          } else if (mode == FAST_SEARCH) {              for (i = 0; i < n; i++) @@ -56,19 +62,20 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,      mlast = m - 1;      skip = mlast - 1; +    mask = 0;      if (mode != FAST_RSEARCH) {          /* create compressed boyer-moore delta 1 table */          /* process pattern[:-1] */ -        for (mask = i = 0; i < mlast; i++) { -            mask |= (1 << (p[i] & 0x1F)); +        for (i = 0; i < mlast; i++) { +            BLOOM_ADD(mask, p[i]);              if (p[i] == p[mlast])                  skip = mlast - i - 1;          }          /* process pattern[-1] outside the loop */ -        mask |= (1 << (p[mlast] & 0x1F)); +        BLOOM_ADD(mask, p[mlast]);          for (i = 0; i <= w; i++) {              /* note: using mlast in the skip path slows things down on x86 */ @@ -82,17 +89,19 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,                      if (mode != FAST_COUNT)                          return i;                      count++; +                    if (count == maxcount) +                        return maxcount;                      i = i + mlast;                      continue;                  }                  /* miss: check if next character is part of pattern */ -                if (!(mask & (1 << (s[i+m] & 0x1F)))) +                if (!BLOOM(mask, s[i+m]))                      i = i + m;                  else                      i = i + skip;              } else {                  /* skip: check if next character is part of pattern */ -                if (!(mask & (1 << (s[i+m] & 0x1F)))) +                if (!BLOOM(mask, s[i+m]))                      i = i + m;              }          } @@ -101,10 +110,10 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,          /* create compressed boyer-moore delta 1 table */          /* process pattern[0] outside the loop */ -        mask = (1 << (p[0] & 0x1F)); +        BLOOM_ADD(mask, p[0]);          /* process pattern[:0:-1] */          for (i = mlast; i > 0; i--) { -            mask |= (1 << (p[i] & 0x1F)); +            BLOOM_ADD(mask, p[i]);              if (p[i] == p[0])                  skip = i - 1;          } @@ -119,13 +128,13 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,                      /* got a match! */                      return i;                  /* miss: check if previous character is part of pattern */ -                if (!(mask & (1 << (s[i-1] & 0x1F)))) +                if (!BLOOM(mask, s[i-1]))                      i = i - m;                  else                      i = i - skip;              } else {                  /* skip: check if previous character is part of pattern */ -                if (!(mask & (1 << (s[i-1] & 0x1F)))) +                if (!BLOOM(mask, s[i-1]))                      i = i - m;              }          } @@ -137,10 +146,3 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,  }  #endif - -/* -Local variables: -c-basic-offset: 4 -indent-tabs-mode: nil -End: -*/ diff --git a/Objects/stringlib/find.h b/Objects/stringlib/find.h index b5bace7..f915296c 100644 --- a/Objects/stringlib/find.h +++ b/Objects/stringlib/find.h @@ -19,7 +19,7 @@ stringlib_find(const STRINGLIB_CHAR* str, Py_ssize_t str_len,      if (sub_len == 0)          return offset; -    pos = fastsearch(str, str_len, sub, sub_len, FAST_SEARCH); +    pos = fastsearch(str, str_len, sub, sub_len, -1, FAST_SEARCH);      if (pos >= 0)          pos += offset; @@ -39,7 +39,7 @@ stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len,      if (sub_len == 0)          return str_len + offset; -    pos = fastsearch(str, str_len, sub, sub_len, FAST_RSEARCH); +    pos = fastsearch(str, str_len, sub, sub_len, -1, FAST_RSEARCH);      if (pos >= 0)          pos += offset; @@ -47,22 +47,27 @@ stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len,      return pos;  } +/* helper macro to fixup start/end slice values */ +#define ADJUST_INDICES(start, end, len)         \ +    if (end > len)                              \ +        end = len;                              \ +    else if (end < 0) {                         \ +        end += len;                             \ +        if (end < 0)                            \ +            end = 0;                            \ +    }                                           \ +    if (start < 0) {                            \ +        start += len;                           \ +        if (start < 0)                          \ +            start = 0;                          \ +    } +  Py_LOCAL_INLINE(Py_ssize_t)  stringlib_find_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len,                       const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,                       Py_ssize_t start, Py_ssize_t end)  { -    if (start < 0) -        start += str_len; -    if (start < 0) -        start = 0; -    if (end > str_len) -        end = str_len; -    if (end < 0) -        end += str_len; -    if (end < 0) -        end = 0; - +    ADJUST_INDICES(start, end, str_len);      return stringlib_find(str + start, end - start, sub, sub_len, start);  } @@ -71,21 +76,11 @@ stringlib_rfind_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len,                        const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,                        Py_ssize_t start, Py_ssize_t end)  { -    if (start < 0) -        start += str_len; -    if (start < 0) -        start = 0; -    if (end > str_len) -        end = str_len; -    if (end < 0) -        end += str_len; -    if (end < 0) -        end = 0; - +    ADJUST_INDICES(start, end, str_len);      return stringlib_rfind(str + start, end - start, sub, sub_len, start);  } -#if defined(STRINGLIB_STR) && !defined(FROM_BYTEARRAY) +#ifdef STRINGLIB_WANT_CONTAINS_OBJ  Py_LOCAL_INLINE(int)  stringlib_contains_obj(PyObject* str, PyObject* sub) @@ -96,9 +91,9 @@ stringlib_contains_obj(PyObject* str, PyObject* sub)          ) != -1;  } -#endif /* STRINGLIB_STR */ +#endif /* STRINGLIB_WANT_CONTAINS_OBJ */ -#ifdef FROM_UNICODE +#if STRINGLIB_IS_UNICODE  /*  This function is a helper for the "find" family (find, rfind, index, @@ -146,13 +141,6 @@ _ParseTupleFinds (PyObject *args, PyObject **substring,      return 1;  } -#endif /* FROM_UNICODE */ +#endif /* STRINGLIB_IS_UNICODE */  #endif /* STRINGLIB_FIND_H */ - -/* -Local variables: -c-basic-offset: 4 -indent-tabs-mode: nil -End: -*/ diff --git a/Objects/stringlib/partition.h b/Objects/stringlib/partition.h index 2f26212..0170bdd 100644 --- a/Objects/stringlib/partition.h +++ b/Objects/stringlib/partition.h @@ -8,33 +8,39 @@  #endif  Py_LOCAL_INLINE(PyObject*) -stringlib_partition( -    PyObject* str_obj, const STRINGLIB_CHAR* str, Py_ssize_t str_len, -    PyObject* sep_obj, const STRINGLIB_CHAR* sep, Py_ssize_t sep_len -    ) +stringlib_partition(PyObject* str_obj, +                    const STRINGLIB_CHAR* str, Py_ssize_t str_len, +                    PyObject* sep_obj, +                    const STRINGLIB_CHAR* sep, Py_ssize_t sep_len)  {      PyObject* out;      Py_ssize_t pos;      if (sep_len == 0) {          PyErr_SetString(PyExc_ValueError, "empty separator"); -	return NULL; +        return NULL;      }      out = PyTuple_New(3);      if (!out) -	return NULL; +        return NULL; -    pos = fastsearch(str, str_len, sep, sep_len, FAST_SEARCH); +    pos = fastsearch(str, str_len, sep, sep_len, -1, FAST_SEARCH);      if (pos < 0) { -	Py_INCREF(str_obj); -	PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj); -	Py_INCREF(STRINGLIB_EMPTY); -	PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY); -	Py_INCREF(STRINGLIB_EMPTY); -	PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY); -	return out; +#if STRINGLIB_MUTABLE +        PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, str_len)); +        PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0)); +        PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(NULL, 0)); +#else +        Py_INCREF(str_obj); +        PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj); +        Py_INCREF(STRINGLIB_EMPTY); +        PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY); +        Py_INCREF(STRINGLIB_EMPTY); +        PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY); +#endif +        return out;      }      PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos)); @@ -44,41 +50,47 @@ stringlib_partition(      PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos));      if (PyErr_Occurred()) { -	Py_DECREF(out); -	return NULL; +        Py_DECREF(out); +        return NULL;      }      return out;  }  Py_LOCAL_INLINE(PyObject*) -stringlib_rpartition( -    PyObject* str_obj, const STRINGLIB_CHAR* str, Py_ssize_t str_len, -    PyObject* sep_obj, const STRINGLIB_CHAR* sep, Py_ssize_t sep_len -    ) +stringlib_rpartition(PyObject* str_obj, +                     const STRINGLIB_CHAR* str, Py_ssize_t str_len, +                     PyObject* sep_obj, +                     const STRINGLIB_CHAR* sep, Py_ssize_t sep_len)  {      PyObject* out;      Py_ssize_t pos;      if (sep_len == 0) {          PyErr_SetString(PyExc_ValueError, "empty separator"); -	return NULL; +        return NULL;      }      out = PyTuple_New(3);      if (!out) -	return NULL; +        return NULL; -    pos = fastsearch(str, str_len, sep, sep_len, FAST_RSEARCH); +    pos = fastsearch(str, str_len, sep, sep_len, -1, FAST_RSEARCH);      if (pos < 0) { -	Py_INCREF(STRINGLIB_EMPTY); -	PyTuple_SET_ITEM(out, 0, (PyObject*) STRINGLIB_EMPTY); -	Py_INCREF(STRINGLIB_EMPTY); -	PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY); -	Py_INCREF(str_obj); -	PyTuple_SET_ITEM(out, 2, (PyObject*) str_obj); -	return out; +#if STRINGLIB_MUTABLE +        PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(NULL, 0)); +        PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0)); +        PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str, str_len)); +#else +        Py_INCREF(STRINGLIB_EMPTY); +        PyTuple_SET_ITEM(out, 0, (PyObject*) STRINGLIB_EMPTY); +        Py_INCREF(STRINGLIB_EMPTY); +        PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY); +        Py_INCREF(str_obj); +        PyTuple_SET_ITEM(out, 2, (PyObject*) str_obj); +#endif +        return out;      }      PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos)); @@ -88,18 +100,11 @@ stringlib_rpartition(      PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos));      if (PyErr_Occurred()) { -	Py_DECREF(out); -	return NULL; +        Py_DECREF(out); +        return NULL;      }      return out;  }  #endif - -/* -Local variables: -c-basic-offset: 4 -indent-tabs-mode: nil -End: -*/ diff --git a/Objects/stringlib/split.h b/Objects/stringlib/split.h new file mode 100644 index 0000000..60e7767 --- /dev/null +++ b/Objects/stringlib/split.h @@ -0,0 +1,394 @@ +/* stringlib: split implementation */ + +#ifndef STRINGLIB_SPLIT_H +#define STRINGLIB_SPLIT_H + +#ifndef STRINGLIB_FASTSEARCH_H +#error must include "stringlib/fastsearch.h" before including this module +#endif + +/* Overallocate the initial list to reduce the number of reallocs for small +   split sizes.  Eg, "A A A A A A A A A A".split() (10 elements) has three +   resizes, to sizes 4, 8, then 16.  Most observed string splits are for human +   text (roughly 11 words per line) and field delimited data (usually 1-10 +   fields).  For large strings the split algorithms are bandwidth limited +   so increasing the preallocation likely will not improve things.*/ + +#define MAX_PREALLOC 12 + +/* 5 splits gives 6 elements */ +#define PREALLOC_SIZE(maxsplit) \ +    (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1) + +#define SPLIT_APPEND(data, left, right)         \ +    sub = STRINGLIB_NEW((data) + (left),        \ +                        (right) - (left));      \ +    if (sub == NULL)                            \ +        goto onError;                           \ +    if (PyList_Append(list, sub)) {             \ +        Py_DECREF(sub);                         \ +        goto onError;                           \ +    }                                           \ +    else                                        \ +        Py_DECREF(sub); + +#define SPLIT_ADD(data, left, right) {          \ +    sub = STRINGLIB_NEW((data) + (left),        \ +                        (right) - (left));      \ +    if (sub == NULL)                            \ +        goto onError;                           \ +    if (count < MAX_PREALLOC) {                 \ +        PyList_SET_ITEM(list, count, sub);      \ +    } else {                                    \ +        if (PyList_Append(list, sub)) {         \ +            Py_DECREF(sub);                     \ +            goto onError;                       \ +        }                                       \ +        else                                    \ +            Py_DECREF(sub);                     \ +    }                                           \ +    count++; } + + +/* Always force the list to the expected size. */ +#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count + +Py_LOCAL_INLINE(PyObject *) +stringlib_split_whitespace(PyObject* str_obj, +                           const STRINGLIB_CHAR* str, Py_ssize_t str_len, +                           Py_ssize_t maxcount) +{ +    Py_ssize_t i, j, count=0; +    PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); +    PyObject *sub; + +    if (list == NULL) +        return NULL; + +    i = j = 0; +    while (maxcount-- > 0) { +        while (i < str_len && STRINGLIB_ISSPACE(str[i])) +            i++; +        if (i == str_len) break; +        j = i; i++; +        while (i < str_len && !STRINGLIB_ISSPACE(str[i])) +            i++; +#ifndef STRINGLIB_MUTABLE +        if (j == 0 && i == str_len && STRINGLIB_CHECK_EXACT(str_obj)) { +            /* No whitespace in str_obj, so just use it as list[0] */ +            Py_INCREF(str_obj); +            PyList_SET_ITEM(list, 0, (PyObject *)str_obj); +            count++; +            break; +        } +#endif +        SPLIT_ADD(str, j, i); +    } + +    if (i < str_len) { +        /* Only occurs when maxcount was reached */ +        /* Skip any remaining whitespace and copy to end of string */ +        while (i < str_len && STRINGLIB_ISSPACE(str[i])) +            i++; +        if (i != str_len) +            SPLIT_ADD(str, i, str_len); +    } +    FIX_PREALLOC_SIZE(list); +    return list; + +  onError: +    Py_DECREF(list); +    return NULL; +} + +Py_LOCAL_INLINE(PyObject *) +stringlib_split_char(PyObject* str_obj, +                     const STRINGLIB_CHAR* str, Py_ssize_t str_len, +                     const STRINGLIB_CHAR ch, +                     Py_ssize_t maxcount) +{ +    Py_ssize_t i, j, count=0; +    PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); +    PyObject *sub; + +    if (list == NULL) +        return NULL; + +    i = j = 0; +    while ((j < str_len) && (maxcount-- > 0)) { +        for(; j < str_len; j++) { +            /* I found that using memchr makes no difference */ +            if (str[j] == ch) { +                SPLIT_ADD(str, i, j); +                i = j = j + 1; +                break; +            } +        } +    } +#ifndef STRINGLIB_MUTABLE +    if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) { +        /* ch not in str_obj, so just use str_obj as list[0] */ +        Py_INCREF(str_obj); +        PyList_SET_ITEM(list, 0, (PyObject *)str_obj); +        count++; +    } else +#endif +    if (i <= str_len) { +        SPLIT_ADD(str, i, str_len); +    } +    FIX_PREALLOC_SIZE(list); +    return list; + +  onError: +    Py_DECREF(list); +    return NULL; +} + +Py_LOCAL_INLINE(PyObject *) +stringlib_split(PyObject* str_obj, +                const STRINGLIB_CHAR* str, Py_ssize_t str_len, +                const STRINGLIB_CHAR* sep, Py_ssize_t sep_len, +                Py_ssize_t maxcount) +{ +    Py_ssize_t i, j, pos, count=0; +    PyObject *list, *sub; + +    if (sep_len == 0) { +        PyErr_SetString(PyExc_ValueError, "empty separator"); +        return NULL; +    } +    else if (sep_len == 1) +        return stringlib_split_char(str_obj, str, str_len, sep[0], maxcount); + +    list = PyList_New(PREALLOC_SIZE(maxcount)); +    if (list == NULL) +        return NULL; + +    i = j = 0; +    while (maxcount-- > 0) { +        pos = fastsearch(str+i, str_len-i, sep, sep_len, -1, FAST_SEARCH); +        if (pos < 0) +            break; +        j = i + pos; +        SPLIT_ADD(str, i, j); +        i = j + sep_len; +    } +#ifndef STRINGLIB_MUTABLE +    if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) { +        /* No match in str_obj, so just use it as list[0] */ +        Py_INCREF(str_obj); +        PyList_SET_ITEM(list, 0, (PyObject *)str_obj); +        count++; +    } else +#endif +    { +        SPLIT_ADD(str, i, str_len); +    } +    FIX_PREALLOC_SIZE(list); +    return list; + +  onError: +    Py_DECREF(list); +    return NULL; +} + +Py_LOCAL_INLINE(PyObject *) +stringlib_rsplit_whitespace(PyObject* str_obj, +                            const STRINGLIB_CHAR* str, Py_ssize_t str_len, +                            Py_ssize_t maxcount) +{ +    Py_ssize_t i, j, count=0; +    PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); +    PyObject *sub; + +    if (list == NULL) +        return NULL; + +    i = j = str_len - 1; +    while (maxcount-- > 0) { +        while (i >= 0 && STRINGLIB_ISSPACE(str[i])) +            i--; +        if (i < 0) break; +        j = i; i--; +        while (i >= 0 && !STRINGLIB_ISSPACE(str[i])) +            i--; +#ifndef STRINGLIB_MUTABLE +        if (j == str_len - 1 && i < 0 && STRINGLIB_CHECK_EXACT(str_obj)) { +            /* No whitespace in str_obj, so just use it as list[0] */ +            Py_INCREF(str_obj); +            PyList_SET_ITEM(list, 0, (PyObject *)str_obj); +            count++; +            break; +        } +#endif +        SPLIT_ADD(str, i + 1, j + 1); +    } + +    if (i >= 0) { +        /* Only occurs when maxcount was reached */ +        /* Skip any remaining whitespace and copy to beginning of string */ +        while (i >= 0 && STRINGLIB_ISSPACE(str[i])) +            i--; +        if (i >= 0) +            SPLIT_ADD(str, 0, i + 1); +    } +    FIX_PREALLOC_SIZE(list); +    if (PyList_Reverse(list) < 0) +        goto onError; +    return list; + +  onError: +    Py_DECREF(list); +    return NULL; +} + +Py_LOCAL_INLINE(PyObject *) +stringlib_rsplit_char(PyObject* str_obj, +                      const STRINGLIB_CHAR* str, Py_ssize_t str_len, +                      const STRINGLIB_CHAR ch, +                      Py_ssize_t maxcount) +{ +    Py_ssize_t i, j, count=0; +    PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); +    PyObject *sub; + +    if (list == NULL) +        return NULL; + +    i = j = str_len - 1; +    while ((i >= 0) && (maxcount-- > 0)) { +        for(; i >= 0; i--) { +            if (str[i] == ch) { +                SPLIT_ADD(str, i + 1, j + 1); +                j = i = i - 1; +                break; +            } +        } +    } +#ifndef STRINGLIB_MUTABLE +    if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) { +        /* ch not in str_obj, so just use str_obj as list[0] */ +        Py_INCREF(str_obj); +        PyList_SET_ITEM(list, 0, (PyObject *)str_obj); +        count++; +    } else +#endif +    if (j >= -1) { +        SPLIT_ADD(str, 0, j + 1); +    } +    FIX_PREALLOC_SIZE(list); +    if (PyList_Reverse(list) < 0) +        goto onError; +    return list; + +  onError: +    Py_DECREF(list); +    return NULL; +} + +Py_LOCAL_INLINE(PyObject *) +stringlib_rsplit(PyObject* str_obj, +                 const STRINGLIB_CHAR* str, Py_ssize_t str_len, +                 const STRINGLIB_CHAR* sep, Py_ssize_t sep_len, +                 Py_ssize_t maxcount) +{ +    Py_ssize_t j, pos, count=0; +    PyObject *list, *sub; + +    if (sep_len == 0) { +        PyErr_SetString(PyExc_ValueError, "empty separator"); +        return NULL; +    } +    else if (sep_len == 1) +        return stringlib_rsplit_char(str_obj, str, str_len, sep[0], maxcount); + +    list = PyList_New(PREALLOC_SIZE(maxcount)); +    if (list == NULL) +        return NULL; + +    j = str_len; +    while (maxcount-- > 0) { +        pos = fastsearch(str, j, sep, sep_len, -1, FAST_RSEARCH); +        if (pos < 0) +            break; +        SPLIT_ADD(str, pos + sep_len, j); +        j = pos; +    } +#ifndef STRINGLIB_MUTABLE +    if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) { +        /* No match in str_obj, so just use it as list[0] */ +        Py_INCREF(str_obj); +        PyList_SET_ITEM(list, 0, (PyObject *)str_obj); +        count++; +    } else +#endif +    { +        SPLIT_ADD(str, 0, j); +    } +    FIX_PREALLOC_SIZE(list); +    if (PyList_Reverse(list) < 0) +        goto onError; +    return list; + +  onError: +    Py_DECREF(list); +    return NULL; +} + +Py_LOCAL_INLINE(PyObject *) +stringlib_splitlines(PyObject* str_obj, +                     const STRINGLIB_CHAR* str, Py_ssize_t str_len, +                     int keepends) +{ +    /* This does not use the preallocated list because splitlines is +       usually run with hundreds of newlines.  The overhead of +       switching between PyList_SET_ITEM and append causes about a +       2-3% slowdown for that common case.  A smarter implementation +       could move the if check out, so the SET_ITEMs are done first +       and the appends only done when the prealloc buffer is full. +       That's too much work for little gain.*/ + +    register Py_ssize_t i; +    register Py_ssize_t j; +    PyObject *list = PyList_New(0); +    PyObject *sub; + +    if (list == NULL) +        return NULL; + +    for (i = j = 0; i < str_len; ) { +        Py_ssize_t eol; + +        /* Find a line and append it */ +        while (i < str_len && !STRINGLIB_ISLINEBREAK(str[i])) +            i++; + +        /* Skip the line break reading CRLF as one line break */ +        eol = i; +        if (i < str_len) { +            if (str[i] == '\r' && i + 1 < str_len && str[i+1] == '\n') +                i += 2; +            else +                i++; +            if (keepends) +                eol = i; +        } +#ifndef STRINGLIB_MUTABLE +        if (j == 0 && eol == str_len && STRINGLIB_CHECK_EXACT(str_obj)) { +            /* No linebreak in str_obj, so just use it as list[0] */ +            if (PyList_Append(list, str_obj)) +                goto onError; +            break; +        } +#endif +        SPLIT_APPEND(str, j, eol); +        j = i; +    } +    return list; + +  onError: +    Py_DECREF(list); +    return NULL; +} + +#endif diff --git a/Objects/stringlib/stringdefs.h b/Objects/stringlib/stringdefs.h index 4a95258..84e4616 100644 --- a/Objects/stringlib/stringdefs.h +++ b/Objects/stringlib/stringdefs.h @@ -11,6 +11,8 @@  #define STRINGLIB_TYPE_NAME      "string"  #define STRINGLIB_PARSE_CODE     "S"  #define STRINGLIB_EMPTY          nullstring +#define STRINGLIB_ISSPACE        Py_ISSPACE +#define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r'))  #define STRINGLIB_ISDECIMAL(x)   ((x >= '0') && (x <= '9'))  #define STRINGLIB_TODECIMAL(x)   (STRINGLIB_ISDECIMAL(x) ? (x - '0') : -1)  #define STRINGLIB_TOUPPER        Py_TOUPPER @@ -21,8 +23,11 @@  #define STRINGLIB_NEW            PyString_FromStringAndSize  #define STRINGLIB_RESIZE         _PyString_Resize  #define STRINGLIB_CHECK          PyString_Check +#define STRINGLIB_CHECK_EXACT    PyString_CheckExact  #define STRINGLIB_TOSTR          PyObject_Str  #define STRINGLIB_GROUPING       _PyString_InsertThousandsGrouping  #define STRINGLIB_GROUPING_LOCALE _PyString_InsertThousandsGroupingLocale +#define STRINGLIB_WANT_CONTAINS_OBJ 1 +  #endif /* !STRINGLIB_STRINGDEFS_H */ diff --git a/Objects/stringlib/transmogrify.h b/Objects/stringlib/transmogrify.h index 7dc8177..1e132e5 100644 --- a/Objects/stringlib/transmogrify.h +++ b/Objects/stringlib/transmogrify.h @@ -1,13 +1,6 @@  /* NOTE: this API is -ONLY- for use with single byte character strings. */  /* Do not use it with Unicode. */ -#include "bytes_methods.h" - -#ifndef STRINGLIB_MUTABLE -#warning "STRINGLIB_MUTABLE not defined before #include, assuming 0" -#define STRINGLIB_MUTABLE 0 -#endif -  /* the more complicated methods.  parts of these should be pulled out into the     shared code in bytes_methods.c to cut down on duplicate code bloat.  */ @@ -269,87 +262,3 @@ stringlib_zfill(PyObject *self, PyObject *args)      return (PyObject*) s;  } - - -#define _STRINGLIB_SPLIT_APPEND(data, left, right)		\ -	str = STRINGLIB_NEW((data) + (left),	                \ -					 (right) - (left));	\ -	if (str == NULL)					\ -		goto onError;					\ -	if (PyList_Append(list, str)) {				\ -		Py_DECREF(str);					\ -		goto onError;					\ -	}							\ -	else							\ -		Py_DECREF(str); - -PyDoc_STRVAR(splitlines__doc__, -"B.splitlines([keepends]) -> list of lines\n\ -\n\ -Return a list of the lines in B, breaking at line boundaries.\n\ -Line breaks are not included in the resulting list unless keepends\n\ -is given and true."); - -static PyObject* -stringlib_splitlines(PyObject *self, PyObject *args) -{ -    register Py_ssize_t i; -    register Py_ssize_t j; -    Py_ssize_t len; -    int keepends = 0; -    PyObject *list; -    PyObject *str; -    char *data; - -    if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends)) -        return NULL; - -    data = STRINGLIB_STR(self); -    len = STRINGLIB_LEN(self); - -    /* This does not use the preallocated list because splitlines is -       usually run with hundreds of newlines.  The overhead of -       switching between PyList_SET_ITEM and append causes about a -       2-3% slowdown for that common case.  A smarter implementation -       could move the if check out, so the SET_ITEMs are done first -       and the appends only done when the prealloc buffer is full. -       That's too much work for little gain.*/ - -    list = PyList_New(0); -    if (!list) -        goto onError; - -    for (i = j = 0; i < len; ) { -	Py_ssize_t eol; - -	/* Find a line and append it */ -	while (i < len && data[i] != '\n' && data[i] != '\r') -	    i++; - -	/* Skip the line break reading CRLF as one line break */ -	eol = i; -	if (i < len) { -	    if (data[i] == '\r' && i + 1 < len && -		data[i+1] == '\n') -		i += 2; -	    else -		i++; -	    if (keepends) -		eol = i; -	} -	_STRINGLIB_SPLIT_APPEND(data, j, eol); -	j = i; -    } -    if (j < len) { -	_STRINGLIB_SPLIT_APPEND(data, j, len); -    } - -    return list; - - onError: -    Py_XDECREF(list); -    return NULL; -} - -#undef _STRINGLIB_SPLIT_APPEND - diff --git a/Objects/stringlib/unicodedefs.h b/Objects/stringlib/unicodedefs.h index 524781f..dd814f6 100644 --- a/Objects/stringlib/unicodedefs.h +++ b/Objects/stringlib/unicodedefs.h @@ -11,6 +11,8 @@  #define STRINGLIB_TYPE_NAME      "unicode"  #define STRINGLIB_PARSE_CODE     "U"  #define STRINGLIB_EMPTY          unicode_empty +#define STRINGLIB_ISSPACE        Py_UNICODE_ISSPACE +#define STRINGLIB_ISLINEBREAK    BLOOM_LINEBREAK  #define STRINGLIB_ISDECIMAL      Py_UNICODE_ISDECIMAL  #define STRINGLIB_TODECIMAL      Py_UNICODE_TODECIMAL  #define STRINGLIB_TOUPPER        Py_UNICODE_TOUPPER @@ -21,6 +23,7 @@  #define STRINGLIB_NEW            PyUnicode_FromUnicode  #define STRINGLIB_RESIZE         PyUnicode_Resize  #define STRINGLIB_CHECK          PyUnicode_Check +#define STRINGLIB_CHECK_EXACT    PyUnicode_CheckExact  #define STRINGLIB_GROUPING       _PyUnicode_InsertThousandsGrouping  #if PY_VERSION_HEX < 0x03000000  | 
