diff options
Diffstat (limited to 'src/corelib/tools/qbytearraymatcher.cpp')
-rw-r--r-- | src/corelib/tools/qbytearraymatcher.cpp | 323 |
1 files changed, 323 insertions, 0 deletions
diff --git a/src/corelib/tools/qbytearraymatcher.cpp b/src/corelib/tools/qbytearraymatcher.cpp new file mode 100644 index 0000000..cd4cf90 --- /dev/null +++ b/src/corelib/tools/qbytearraymatcher.cpp @@ -0,0 +1,323 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** Contact: Qt Software Information (qt-info@nokia.com) +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the either Technology Preview License Agreement or the +** Beta Release License Agreement. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain +** additional rights. These rights are described in the Nokia Qt LGPL +** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this +** package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3.0 as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU General Public License version 3.0 requirements will be +** met: http://www.gnu.org/copyleft/gpl.html. +** +** If you are unsure which license is appropriate for your use, please +** contact the sales department at qt-sales@nokia.com. +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "qbytearraymatcher.h" + +#include <limits.h> + +QT_BEGIN_NAMESPACE + +static inline void bm_init_skiptable(const uchar *cc, int len, uchar *skiptable) +{ + int l = qMin(len, 255); + memset(skiptable, l, 256*sizeof(uchar)); + cc += len - l; + while (l--) + skiptable[*cc++] = l; +} + +static inline int bm_find(const uchar *cc, int l, int index, const uchar *puc, uint pl, + const uchar *skiptable) +{ + if (pl == 0) + return index > l ? -1 : index; + const uint pl_minus_one = pl - 1; + + register const uchar *current = cc + index + pl_minus_one; + const uchar *end = cc + l; + while (current < end) { + uint skip = skiptable[*current]; + if (!skip) { + // possible match + while (skip < pl) { + if (*(current - skip) != puc[pl_minus_one - skip]) + break; + skip++; + } + if (skip > pl_minus_one) // we have a match + return (current - cc) - skip + 1; + + // in case we don't have a match we are a bit inefficient as we only skip by one + // when we have the non matching char in the string. + if (skiptable[*(current - skip)] == pl) + skip = pl - skip; + else + skip = 1; + } + if (current > end - skip) + break; + current += skip; + } + return -1; // not found +} + +/*! \class QByteArrayMatcher + \brief The QByteArrayMatcher class holds a sequence of bytes that + can be quickly matched in a byte array. + + \ingroup tools + \ingroup text + + This class is useful when you have a sequence of bytes that you + want to repeatedly match against some byte arrays (perhaps in a + loop), or when you want to search for the same sequence of bytes + multiple times in the same byte array. Using a matcher object and + indexIn() is faster than matching a plain QByteArray with + QByteArray::indexOf() if repeated matching takes place. This + class offers no benefit if you are doing one-off byte array + matches. + + Create the QByteArrayMatcher with the QByteArray you want to + search for. Then call indexIn() on the QByteArray that you want to + search. + + \sa QByteArray, QStringMatcher +*/ + +/*! + Constructs an empty byte array matcher that won't match anything. + Call setPattern() to give it a pattern to match. +*/ +QByteArrayMatcher::QByteArrayMatcher() + : d(0) +{ + p.p = 0; + qMemSet(p.q_skiptable, 0, sizeof(p.q_skiptable)); +} + +/*! + Constructs a byte array matcher from \a pattern. \a pattern + has the given \a length. \a pattern must remain in scope, but + the destructor does not delete \a pattern. + */ +QByteArrayMatcher::QByteArrayMatcher(const char *pattern, int length) + : d(0) +{ + p.p = reinterpret_cast<const uchar *>(pattern); + p.l = length; + bm_init_skiptable(p.p, p.l, p.q_skiptable); +} + +/*! + Constructs a byte array matcher that will search for \a pattern. + Call indexIn() to perform a search. +*/ +QByteArrayMatcher::QByteArrayMatcher(const QByteArray &pattern) + : d(0), q_pattern(pattern) +{ + p.p = reinterpret_cast<const uchar *>(pattern.constData()); + p.l = pattern.size(); + bm_init_skiptable(p.p, p.l, p.q_skiptable); +} + +/*! + Copies the \a other byte array matcher to this byte array matcher. +*/ +QByteArrayMatcher::QByteArrayMatcher(const QByteArrayMatcher &other) + : d(0) +{ + operator=(other); +} + +/*! + Destroys the byte array matcher. +*/ +QByteArrayMatcher::~QByteArrayMatcher() +{ +} + +/*! + Assigns the \a other byte array matcher to this byte array matcher. +*/ +QByteArrayMatcher &QByteArrayMatcher::operator=(const QByteArrayMatcher &other) +{ + q_pattern = other.q_pattern; + qMemCopy(p.q_skiptable, other.p.q_skiptable, sizeof(p.q_skiptable)); + return *this; +} + +/*! + Sets the byte array that this byte array matcher will search for + to \a pattern. + + \sa pattern(), indexIn() +*/ +void QByteArrayMatcher::setPattern(const QByteArray &pattern) +{ + q_pattern = pattern; + p.p = reinterpret_cast<const uchar *>(pattern.constData()); + p.l = pattern.size(); + bm_init_skiptable(p.p, p.l, p.q_skiptable); +} + +/*! + Searches the byte array \a ba, from byte position \a from (default + 0, i.e. from the first byte), for the byte array pattern() that + was set in the constructor or in the most recent call to + setPattern(). Returns the position where the pattern() matched in + \a ba, or -1 if no match was found. +*/ +int QByteArrayMatcher::indexIn(const QByteArray &ba, int from) const +{ + if (from < 0) + from = 0; + return bm_find(reinterpret_cast<const uchar *>(ba.constData()), ba.size(), from, + p.p, p.l, p.q_skiptable); +} + +/*! + Searches the char string \a str, which has length \a len, from + byte position \a from (default 0, i.e. from the first byte), for + the byte array pattern() that was set in the constructor or in the + most recent call to setPattern(). Returns the position where the + pattern() matched in \a str, or -1 if no match was found. +*/ +int QByteArrayMatcher::indexIn(const char *str, int len, int from) const +{ + if (from < 0) + from = 0; + return bm_find(reinterpret_cast<const uchar *>(str), len, from, + p.p, p.l, p.q_skiptable); +} + +/*! + \fn QByteArray QByteArrayMatcher::pattern() const + + Returns the byte array pattern that this byte array matcher will + search for. + + \sa setPattern() +*/ + + +static int findChar(const char *str, int len, char ch, int from) +{ + const uchar *s = (const uchar *)str; + uchar c = (uchar)ch; + if (from < 0) + from = qMax(from + len, 0); + if (from < len) { + const uchar *n = s + from - 1; + const uchar *e = s + len; + while (++n != e) + if (*n == c) + return n - s; + } + return -1; +} + +/*! \internal + */ +static int qFindByteArrayBoyerMoore( + const char *haystack, int haystackLen, int haystackOffset, + const char *needle, int needleLen) +{ + uchar skiptable[256]; + bm_init_skiptable((const uchar *)needle, needleLen, skiptable); + if (haystackOffset < 0) + haystackOffset = 0; + return bm_find((const uchar *)haystack, haystackLen, haystackOffset, + (const uchar *)needle, needleLen, skiptable); +} + +#define REHASH(a) \ + if (sl_minus_1 < sizeof(uint) * CHAR_BIT) \ + hashHaystack -= (a) << sl_minus_1; \ + hashHaystack <<= 1 + +/*! \internal + */ +int qFindByteArray( + const char *haystack0, int haystackLen, int from, + const char *needle, int needleLen) +{ + const int l = haystackLen; + const int sl = needleLen; + if (from < 0) + from += l; + if (uint(sl + from) > (uint)l) + return -1; + if (!sl) + return from; + if (!l) + return -1; + + if (sl == 1) + return findChar(haystack0, haystackLen, needle[0], from); + + /* + We use the Boyer-Moore algorithm in cases where the overhead + for the skip table should pay off, otherwise we use a simple + hash function. + */ + if (l > 500 && sl > 5) + return qFindByteArrayBoyerMoore(haystack0, haystackLen, from, + needle, needleLen); + + /* + We use some hashing for efficiency's sake. Instead of + comparing strings, we compare the hash value of str with that + of a part of this QString. Only if that matches, we call memcmp(). + */ + const char *haystack = haystack0 + from; + const char *end = haystack0 + (l - sl); + const uint sl_minus_1 = sl - 1; + uint hashNeedle = 0, hashHaystack = 0; + int idx; + for (idx = 0; idx < sl; ++idx) { + hashNeedle = ((hashNeedle<<1) + needle[idx]); + hashHaystack = ((hashHaystack<<1) + haystack[idx]); + } + hashHaystack -= *(haystack + sl_minus_1); + + while (haystack <= end) { + hashHaystack += *(haystack + sl_minus_1); + if (hashHaystack == hashNeedle && *needle == *haystack + && memcmp(needle, haystack, sl) == 0) + return haystack - haystack0; + + REHASH(*haystack); + ++haystack; + } + return -1; +} + +QT_END_NAMESPACE |