Bug 796426 - Invalid 3-byte UTF8 found in input of graph

- Corrected counting of utf-8 characters - Corrected truncation of text for e.g. tooltips (Note dot 2.38.0 silently ignored the error, 2.40.1 shows the error.)
author: albert-github <albert.tests@gmail.com> 2018-05-28 12:51:55 (GMT)
committer: albert-github <albert.tests@gmail.com> 2018-05-28 12:51:55 (GMT)
commit: 21ce6ed9d2a37df2de846d48398261bb087c0a09 (patch)
tree: 190b626dd9fd978fbb28df5ae2ddd795cde2a6bd /src/util.cpp
parent: 63696c08425fc1662c5e76280e3cc74fb3769d80 (diff)
download: Doxygen-21ce6ed9d2a37df2de846d48398261bb087c0a09.zip
Doxygen-21ce6ed9d2a37df2de846d48398261bb087c0a09.tar.gz
Doxygen-21ce6ed9d2a37df2de846d48398261bb087c0a09.tar.bz2
1 files changed, 12 insertions, 15 deletions
diff --git a/src/util.cpp b/src/util.cpp
index ff0d0c6..4d1ed30 100644
--- a/src/util.cpp
+++ b/src/util.cpp
@@ -7394,23 +7394,23 @@ int nextUtf8CharPosition(const QCString &utf8Str,int len,int startPos)
   {
     if (((uchar)c&0xE0)==0xC0)
     {
-      bytes++; // 11xx.xxxx: >=2 byte character
+      bytes+=1; // 11xx.xxxx: >=2 byte character
     }
     if (((uchar)c&0xF0)==0xE0)
     {
-      bytes++; // 111x.xxxx: >=3 byte character
+      bytes+=2; // 111x.xxxx: >=3 byte character
     }
     if (((uchar)c&0xF8)==0xF0)
     {
-      bytes++; // 1111.xxxx: >=4 byte character
+      bytes+=3; // 1111.xxxx: >=4 byte character
     }
     if (((uchar)c&0xFC)==0xF8)
     {
-      bytes++; // 1111.1xxx: >=5 byte character
+      bytes+=4; // 1111.1xxx: >=5 byte character
     }
     if (((uchar)c&0xFE)==0xFC)
     {
-      bytes++; // 1111.1xxx: 6 byte character
+      bytes+=5; // 1111.1xxx: 6 byte character
     }
   }
   else if (c=='&') // skip over character entities
@@ -7444,11 +7444,10 @@ QCString parseCommentAsText(const Definition *scope,const MemberDef *md,
   root->accept(visitor);
   delete visitor;
   delete root;
-  QCString result = convertCharEntitiesToUTF8(s.data());
+  QCString result = convertCharEntitiesToUTF8(s.data()).stripWhiteSpace();
   int i=0;
   int charCnt=0;
   int l=result.length();
-  bool addEllipsis=FALSE;
   while ((i=nextUtf8CharPosition(result,l,i))<l)
   {
     charCnt++;
@@ -7459,19 +7458,17 @@ QCString parseCommentAsText(const Definition *scope,const MemberDef *md,
     while ((i=nextUtf8CharPosition(result,l,i))<l && charCnt<100)
     {
       charCnt++;
-      if (result.at(i)>=0 && isspace(result.at(i)))
-      {
-        addEllipsis=TRUE;
-      }
-      else if (result.at(i)==',' || 
-               result.at(i)=='.' || 
-               result.at(i)=='?')
+      if (result.at(i)==',' ||
+          result.at(i)=='.' ||
+          result.at(i)=='!' ||
+          result.at(i)=='?')
       {
+        i++; // we want to be "behind" last inspected character
         break;
       }
     }
   }
-  if (addEllipsis || charCnt==100) result=result.left(i)+"...";
+  if ( i < l) result=result.left(i)+"...";
   return result.data();
 }
author	albert-github <albert.tests@gmail.com>	2018-05-28 12:51:55 (GMT)
committer	albert-github <albert.tests@gmail.com>	2018-05-28 12:51:55 (GMT)
commit	21ce6ed9d2a37df2de846d48398261bb087c0a09 (patch)
tree	190b626dd9fd978fbb28df5ae2ddd795cde2a6bd /src/util.cpp
parent	63696c08425fc1662c5e76280e3cc74fb3769d80 (diff)
download	Doxygen-21ce6ed9d2a37df2de846d48398261bb087c0a09.zip Doxygen-21ce6ed9d2a37df2de846d48398261bb087c0a09.tar.gz Doxygen-21ce6ed9d2a37df2de846d48398261bb087c0a09.tar.bz2