diff options
Diffstat (limited to 'Objects/stringobject.c')
| -rw-r--r-- | Objects/stringobject.c | 123 | 
1 files changed, 122 insertions, 1 deletions
diff --git a/Objects/stringobject.c b/Objects/stringobject.c index b0c4640..95ade51 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -5,6 +5,18 @@  #include <ctype.h> +#undef USE_INLINE /* XXX - set via configure? */ + +#if defined(_MSC_VER) /* this is taken from _sre.c */ +#pragma warning(disable: 4710) +/* fastest possible local call under MSVC */ +#define LOCAL(type) static __inline type __fastcall +#elif defined(USE_INLINE) +#define LOCAL(type) static inline type +#else +#define LOCAL(type) static type +#endif +  #ifdef COUNT_ALLOCS  int null_strings, one_strings;  #endif @@ -763,6 +775,108 @@ PyString_AsStringAndSize(register PyObject *obj,  	return 0;  } +/* -------------------------------------------------------------------- */ +/* Helpers */ + +#define USE_FAST /* experimental fast search implementation */ + +/* XXX - this code is copied from unicodeobject.c.  we really should +   refactor the core implementations (see _sre.c for how this can be +   done), but that'll have to wait -- fredrik */ + +/* fast search/count implementation, based on a mix between boyer- +   moore and horspool, with a few more bells and whistles on the top. +   for some more background, see: http://effbot.org/stringlib */ + +/* note: fastsearch may access s[n], which isn't a problem when using +   Python's ordinary string types, but may cause problems if you're +   using this code in other contexts.  also, the count mode returns -1 +   if there cannot possible be a match in the target string, and 0 if +   it has actually checked for matches, but didn't find any.  callers +   beware! */ + +#define FAST_COUNT 0 +#define FAST_SEARCH 1 + +LOCAL(Py_ssize_t) +	fastsearch(const unsigned char* s, Py_ssize_t n, const unsigned char* p, +		   Py_ssize_t m, int mode) +{ +	long mask; +	int skip, count = 0; +	Py_ssize_t i, j, mlast, w; + +	w = n - m; + +	if (w < 0) +		return -1; + +	/* look for special cases */ +	if (m <= 1) { +		if (m <= 0) +			return -1; +		/* use special case for 1-character strings */ +		if (mode == FAST_COUNT) { +			for (i = 0; i < n; i++) +				if (s[i] == p[0]) +					count++; +			return count; +		} else { +			for (i = 0; i < n; i++) +				if (s[i] == p[0]) +					return i; +		} +		return -1; +	} + +	mlast = m - 1; + +	/* create compressed boyer-moore delta 1 table */ +	skip = mlast - 1; +	/* process pattern[:-1] */ +	for (mask = i = 0; i < mlast; i++) { +		mask |= (1 << (p[i] & 0x1F)); +		if (p[i] == p[mlast]) +			skip = mlast - i - 1; +	} +	/* process pattern[-1] outside the loop */ +	mask |= (1 << (p[mlast] & 0x1F)); + +	for (i = 0; i <= w; i++) { +		/* note: using mlast in the skip path slows things down on x86 */ +		if (s[i+m-1] == p[m-1]) { +			/* candidate match */ +			for (j = 0; j < mlast; j++) +				if (s[i+j] != p[j]) +					break; +			if (j == mlast) { +				/* got a match! */ +				if (mode != FAST_COUNT) +					return i; +				count++; +				i = i + mlast; +				continue; +			} +			/* miss: check if next character is part of pattern */ +			if (!(mask & (1 << (s[i+m] & 0x1F)))) +				i = i + m; +			else { +				i = i + skip; +				continue; +			} +		} else { +			/* skip: check if next character is part of pattern */ +			if (!(mask & (1 << (s[i+m] & 0x1F)))) +				i = i + m; +		} +	} + +	if (mode != FAST_COUNT) +		return -1; +	return count; +} + +/* -------------------------------------------------------------------- */  /* Methods */  static int @@ -2177,7 +2291,7 @@ as in slice notation.");  static PyObject *  string_count(PyStringObject *self, PyObject *args)  { -	const char *s = PyString_AS_STRING(self), *sub, *t; +	const char *s = PyString_AS_STRING(self), *sub;  	Py_ssize_t len = PyString_GET_SIZE(self), n;  	Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;  	Py_ssize_t m, r; @@ -2210,8 +2324,14 @@ string_count(PyStringObject *self, PyObject *args)  	if (n == 0)  		return PyInt_FromSsize_t(m-i); +#ifdef USE_FAST +	r = fastsearch(s + i, last - i, sub, n, FAST_COUNT); +	if (r < 0) +		r = 0; /* no match */ +#else  	r = 0;  	while (i < m) { +		const char *t  		if (!memcmp(s+i, sub, n)) {  			r++;  			i += n; @@ -2225,6 +2345,7 @@ string_count(PyStringObject *self, PyObject *args)  			break;  		i = t - s;  	} +#endif  	return PyInt_FromSsize_t(r);  }  | 
