diff options
Diffstat (limited to 'src/3rdparty/clucene/src/CLucene/util/Reader.cpp')
-rw-r--r-- | src/3rdparty/clucene/src/CLucene/util/Reader.cpp | 186 |
1 files changed, 186 insertions, 0 deletions
diff --git a/src/3rdparty/clucene/src/CLucene/util/Reader.cpp b/src/3rdparty/clucene/src/CLucene/util/Reader.cpp new file mode 100644 index 0000000..1ce9710 --- /dev/null +++ b/src/3rdparty/clucene/src/CLucene/util/Reader.cpp @@ -0,0 +1,186 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#include "CLucene/StdHeader.h" +#include "Reader.h" + +CL_NS_DEF(util) + +StringReader::StringReader ( const TCHAR* value ): + Reader(NULL,true){ + reader = new jstreams::StringReader<TCHAR>(value); +} +StringReader::StringReader ( const TCHAR* value, const int32_t length ): + Reader(NULL,true){ + reader = new jstreams::StringReader<TCHAR>(value,length); +} +StringReader::StringReader ( const TCHAR* value, const int32_t length, bool copyData ): + Reader(NULL,true){ + reader = new jstreams::StringReader<TCHAR>(value,length, copyData); +} +StringReader::~StringReader(){ +} + + +FileReader::FileReader ( const char* path, const char* enc, + const int32_t cachelen, const int32_t /*cachebuff*/ ): + Reader(NULL, true) +{ + this->input = new jstreams::FileInputStream(path, cachelen); + this->reader = new SimpleInputStreamReader(this->input,enc); //(this is a jstream object) +} + +FileReader::~FileReader (){ + if (input) + delete input; +} +int32_t FileReader::read(const TCHAR*& start, int32_t _min, int32_t _max) { + return reader->read(start, _min, _max); +} +int64_t FileReader::mark(int32_t readlimit) { + return reader->mark(readlimit); +} +int64_t FileReader::reset(int64_t newpos) { + return reader->reset(newpos); +} + + + +SimpleInputStreamReader::SimpleInputStreamReader(jstreams::StreamBase<char> *i, const char* enc) +{ + finishedDecoding = false; + input = i; + charbuf.setSize(262); + + if ( strcmp(enc,"ASCII")==0 ) + encoding = ASCII; +#ifdef _UCS2 + else if ( strcmp(enc,"UTF-8")==0 ) + encoding = UTF8; + else if ( strcmp(enc,"UCS-2LE")==0 ) + encoding = UCS2_LE; +#endif + else + _CLTHROWA(CL_ERR_IllegalArgument,"Unsupported encoding, use jstreams iconv based instead"); + + mark(262); + charsLeft = 0; +} +SimpleInputStreamReader::~SimpleInputStreamReader(){ + input = NULL; +} +int32_t SimpleInputStreamReader::decode(TCHAR* start, int32_t space){ + // decode from charbuf + const char *inbuf = charbuf.readPos; + const char *inbufend = charbuf.readPos + charbuf.avail; + TCHAR *outbuf = start; + const TCHAR *outbufend = outbuf + space; + + if ( encoding == ASCII ){ + while ( outbuf<outbufend && inbuf<inbufend ){ + *outbuf = *inbuf; + outbuf++; + inbuf++; + } + +#ifdef _UCS2 + } + else if ( encoding == UCS2_LE ){ + while ( outbuf<outbufend && (inbuf+1)<inbufend ){ + uint8_t c1 = *inbuf; + uint8_t c2 = *(inbuf+1); + unsigned short c = c1 | (c2<<8); + + #ifdef _UCS2 + *outbuf = c; + #else + *outbuf = LUCENE_OOR_CHAR(c); + #endif + outbuf++; + inbuf+=2; + } + + }else if ( encoding == UTF8 ){ + while ( outbuf<outbufend && inbuf<inbufend ){ + size_t utflen = lucene_utf8charlen(inbuf); + if ( utflen==0 ){ + error = "Invalid multibyte sequence."; + status = jstreams::Error; + return -1; + }else if ( inbuf+utflen > inbufend ){ + break; //character incomplete + }else{ + size_t rd = lucene_utf8towc(outbuf,inbuf,inbufend-inbuf); + if ( rd == 0 ){ + error = "Invalid multibyte sequence."; + status = jstreams::Error; + return -1; + }else{ + inbuf+=rd; + outbuf++; + } + } + } +#endif //_UCS2 + }else + _CLTHROWA(CL_ERR_Runtime,"Unexpected encoding"); + + if ( outbuf < outbufend ) { + //we had enough room to convert the entire input + if ( inbuf < inbufend ) { + // last character is incomplete + // move from inbuf to the end to the start of + // the buffer + memmove(charbuf.start, inbuf, inbufend-inbuf); + charbuf.readPos = charbuf.start; + charbuf.avail = inbufend-inbuf; + } else if ( outbuf < outbufend ) { //input sequence was completely converted + charbuf.readPos = charbuf.start; + charbuf.avail = 0; + if (input == NULL) { + finishedDecoding = true; + } + } + } else { + charbuf.readPos += charbuf.avail - (inbufend-inbuf); + charbuf.avail = inbufend-inbuf; + } + return outbuf-start; +} + +int32_t SimpleInputStreamReader::fillBuffer(TCHAR* start, int32_t space) { + // fill up charbuf + if (input && charbuf.readPos == charbuf.start) { + const char *begin; + int32_t numRead; + numRead = input->read(begin, 1, charbuf.size - charbuf.avail); + //printf("filled up charbuf\n"); + if (numRead < -1) { + error = input->getError(); + status = jstreams::Error; + input = 0; + return numRead; + } + if (numRead < 1) { + // signal end of input buffer + input = 0; + if (charbuf.avail) { + error = "stream ends on incomplete character"; + status = jstreams::Error; + } + return -1; + } + // copy data into other buffer + memmove( charbuf.start + charbuf.avail, begin, numRead * sizeof(char)); + charbuf.avail = numRead + charbuf.avail; + } + // decode + int32_t n = decode(start, space); + //printf("decoded %i\n", n); + return n; +} + +CL_NS_END |