From 42355ace21f6fb72fba49316c73c025a12482b09 Mon Sep 17 00:00:00 2001 From: albert-github Date: Fri, 22 Nov 2019 16:50:03 +0100 Subject: Problem converting UCS big endian file When having a file with a Big Endian BOM this is not always handled in a correct way when using the generic UCS-2 approach, using the explicit version works well. (problems see with a gcc (SUSE Linux) 7.4.1 20190905 [gcc-7-branch revision 275407] and glibc-2.26-lp151.18.7.x86_64). --- src/util.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/util.cpp b/src/util.cpp index f58a630..5868b62 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -8033,13 +8033,18 @@ bool readInputFile(const char *fileName,BufStr &inBuf,bool filter,bool isSourceC int start=0; if (size>=2 && - (((uchar)inBuf.at(0)==0xFF && (uchar)inBuf.at(1)==0xFE) || // Little endian BOM - ((uchar)inBuf.at(0)==0xFE && (uchar)inBuf.at(1)==0xFF) // big endian BOM - ) - ) // UCS-2 encoded file + ((uchar)inBuf.at(0)==0xFF && (uchar)inBuf.at(1)==0xFE) // Little endian BOM + ) // UCS-2LE encoded file + { + transcodeCharacterBuffer(fileName,inBuf,inBuf.curPos(), + "UCS-2LE","UTF-8"); + } + else if (size>=2 && + ((uchar)inBuf.at(0)==0xFE && (uchar)inBuf.at(1)==0xFF) // big endian BOM + ) // UCS-2BE encoded file { transcodeCharacterBuffer(fileName,inBuf,inBuf.curPos(), - "UCS-2","UTF-8"); + "UCS-2BE","UTF-8"); } else if (size>=3 && (uchar)inBuf.at(0)==0xEF && -- cgit v0.12