summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/webkit/WebCore/platform/text/TextDecoder.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/webkit/WebCore/platform/text/TextDecoder.cpp')
-rw-r--r--src/3rdparty/webkit/WebCore/platform/text/TextDecoder.cpp129
1 files changed, 129 insertions, 0 deletions
diff --git a/src/3rdparty/webkit/WebCore/platform/text/TextDecoder.cpp b/src/3rdparty/webkit/WebCore/platform/text/TextDecoder.cpp
new file mode 100644
index 0000000..e39a6b7
--- /dev/null
+++ b/src/3rdparty/webkit/WebCore/platform/text/TextDecoder.cpp
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextDecoder.h"
+
+#include "TextEncodingRegistry.h"
+
+// FIXME: Would be nice to also handle BOM for UTF-7 and UTF-32.
+
+namespace WebCore {
+
+TextDecoder::TextDecoder(const TextEncoding& encoding)
+ : m_encoding(encoding)
+ , m_checkedForBOM(false)
+ , m_numBufferedBytes(0)
+{
+}
+
+void TextDecoder::reset(const TextEncoding& encoding)
+{
+ m_encoding = encoding;
+ m_codec.clear();
+ m_checkedForBOM = false;
+ m_numBufferedBytes = 0;
+}
+
+String TextDecoder::checkForBOM(const char* data, size_t length, bool flush, bool stopOnError, bool& sawError)
+{
+ ASSERT(!m_checkedForBOM);
+
+ // Check to see if we found a BOM.
+ size_t numBufferedBytes = m_numBufferedBytes;
+ size_t buf1Len = numBufferedBytes;
+ size_t buf2Len = length;
+ const unsigned char* buf1 = m_bufferedBytes;
+ const unsigned char* buf2 = reinterpret_cast<const unsigned char*>(data);
+ unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
+ unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
+ unsigned char c3 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
+ unsigned char c4 = buf2Len ? (--buf2Len, *buf2++) : 0;
+
+ const TextEncoding* encodingConsideringBOM = &m_encoding;
+ bool foundBOM = true;
+ size_t lengthOfBOM = 0;
+ if (c1 == 0xFF && c2 == 0xFE) {
+ if (c3 != 0 || c4 != 0) {
+ encodingConsideringBOM = &UTF16LittleEndianEncoding();
+ lengthOfBOM = 2;
+ } else if (numBufferedBytes + length > sizeof(m_bufferedBytes)) {
+ encodingConsideringBOM = &UTF32LittleEndianEncoding();
+ lengthOfBOM = 4;
+ } else
+ foundBOM = false;
+ } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) {
+ encodingConsideringBOM = &UTF8Encoding();
+ lengthOfBOM = 3;
+ } else if (c1 == 0xFE && c2 == 0xFF) {
+ encodingConsideringBOM = &UTF16BigEndianEncoding();
+ lengthOfBOM = 2;
+ } else if (c1 == 0 && c2 == 0 && c3 == 0xFE && c4 == 0xFF) {
+ encodingConsideringBOM = &UTF32BigEndianEncoding();
+ lengthOfBOM = 4;
+ } else
+ foundBOM = false;
+
+ if (!foundBOM && numBufferedBytes + length <= sizeof(m_bufferedBytes) && !flush) {
+ // Continue to look for the BOM.
+ memcpy(&m_bufferedBytes[numBufferedBytes], data, length);
+ m_numBufferedBytes += length;
+ return "";
+ }
+
+ // Done checking for BOM.
+ m_codec.set(newTextCodec(*encodingConsideringBOM).release());
+ if (!m_codec)
+ return String();
+ m_checkedForBOM = true;
+
+ // Skip the BOM.
+ if (foundBOM) {
+ ASSERT(numBufferedBytes < lengthOfBOM);
+ size_t numUnbufferedBOMBytes = lengthOfBOM - numBufferedBytes;
+ ASSERT(numUnbufferedBOMBytes <= length);
+
+ data += numUnbufferedBOMBytes;
+ length -= numUnbufferedBOMBytes;
+ numBufferedBytes = 0;
+ m_numBufferedBytes = 0;
+ }
+
+ // Handle case where we have some buffered bytes to deal with.
+ if (numBufferedBytes) {
+ char bufferedBytes[sizeof(m_bufferedBytes)];
+ memcpy(bufferedBytes, m_bufferedBytes, numBufferedBytes);
+ m_numBufferedBytes = 0;
+
+ String bufferedResult = m_codec->decode(bufferedBytes, numBufferedBytes, false, stopOnError, sawError);
+ if (stopOnError && sawError)
+ return bufferedResult;
+ return bufferedResult + m_codec->decode(data, length, flush, stopOnError, sawError);
+ }
+
+ return m_codec->decode(data, length, flush, stopOnError, sawError);
+}
+
+} // namespace WebCore