summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoralbert-github <albert.tests@gmail.com>2019-11-22 15:50:03 (GMT)
committeralbert-github <albert.tests@gmail.com>2019-11-22 15:50:03 (GMT)
commit42355ace21f6fb72fba49316c73c025a12482b09 (patch)
tree07d6f2c7e943368832be59b50c263a595b078044
parente7d47ec23ba1b109754d6e47b1eb4d341efc3b36 (diff)
downloadDoxygen-42355ace21f6fb72fba49316c73c025a12482b09.zip
Doxygen-42355ace21f6fb72fba49316c73c025a12482b09.tar.gz
Doxygen-42355ace21f6fb72fba49316c73c025a12482b09.tar.bz2
Problem converting UCS big endian file
When having a file with a Big Endian BOM this is not always handled in a correct way when using the generic UCS-2 approach, using the explicit version works well. (problems see with a gcc (SUSE Linux) 7.4.1 20190905 [gcc-7-branch revision 275407] and glibc-2.26-lp151.18.7.x86_64).
-rw-r--r--src/util.cpp15
1 files changed, 10 insertions, 5 deletions
diff --git a/src/util.cpp b/src/util.cpp
index f58a630..5868b62 100644
--- a/src/util.cpp
+++ b/src/util.cpp
@@ -8033,13 +8033,18 @@ bool readInputFile(const char *fileName,BufStr &inBuf,bool filter,bool isSourceC
int start=0;
if (size>=2 &&
- (((uchar)inBuf.at(0)==0xFF && (uchar)inBuf.at(1)==0xFE) || // Little endian BOM
- ((uchar)inBuf.at(0)==0xFE && (uchar)inBuf.at(1)==0xFF) // big endian BOM
- )
- ) // UCS-2 encoded file
+ ((uchar)inBuf.at(0)==0xFF && (uchar)inBuf.at(1)==0xFE) // Little endian BOM
+ ) // UCS-2LE encoded file
+ {
+ transcodeCharacterBuffer(fileName,inBuf,inBuf.curPos(),
+ "UCS-2LE","UTF-8");
+ }
+ else if (size>=2 &&
+ ((uchar)inBuf.at(0)==0xFE && (uchar)inBuf.at(1)==0xFF) // big endian BOM
+ ) // UCS-2BE encoded file
{
transcodeCharacterBuffer(fileName,inBuf,inBuf.curPos(),
- "UCS-2","UTF-8");
+ "UCS-2BE","UTF-8");
}
else if (size>=3 &&
(uchar)inBuf.at(0)==0xEF &&