From 90ae39c5d66718ecc0068af1c265f526642f66e6 Mon Sep 17 00:00:00 2001
From: Dimitri van Heesch <doxygen@gmail.com>
Date: Mon, 29 Mar 2021 20:55:03 +0200
Subject: Fixed potential crash in handling empty list item.

---
 src/doctokenizer.l | 18 ++++++++++++++----
 src/utf8.cpp       |  9 +++++----
 src/utf8.h         |  2 +-
 3 files changed, 20 insertions(+), 9 deletions(-)
diff --git a/src/doctokenizer.l b/src/doctokenizer.l
index e2aac01..c380cd2 100644
--- a/src/doctokenizer.l
+++ b/src/doctokenizer.l
@@ -27,6 +27,7 @@
 #include <ctype.h>
 #include <stack>
 #include <string>
+#include <cassert>
 
 #include "doctokenizer.h"
 #include "cmdmapper.h"
@@ -115,12 +116,12 @@ bool doctokenizerYYpopContext()
 
 QCString extractPartAfterNewLine(const QCString &text)
 {
-  int nl1 = text.findRev('\n');
+  int nl1 = text.find('\n');
   if (nl1!=-1)
   {
     return text.mid(nl1+1);
   }
-  int nl2 = text.findRev("\\ilinebr");
+  int nl2 = text.find("\\ilinebr");
   if (nl2!=-1)
   {
     if (text.at(nl2+8)==' ') nl2++; // skip space after \\ilinebr
@@ -499,6 +500,7 @@ RCSID "$"("Author"|"Date"|"Header"|"Id"|"Locker"|"Log"|"Name"|"RCSfile"|"Revisio
                          lineCount(yytext,yyleng);
                          QCString text=yytext;
                          size_t dashPos = static_cast<size_t>(text.findRev('-'));
+                         assert(dashPos!=std::string::npos);
                          g_token->isEnumList = text.at(dashPos+1)=='#';
                          g_token->id         = -1;
                          g_token->indent     = computeIndent(yytext,dashPos);
@@ -517,6 +519,7 @@ RCSID "$"("Author"|"Date"|"Header"|"Id"|"Locker"|"Log"|"Name"|"RCSfile"|"Revisio
                            reg::Match match;
                            reg::search(text,match,re);
                            size_t listPos = match.position();
+                           assert(listPos!=std::string::npos);
                            g_token->isEnumList = FALSE;
                            g_token->id         = -1;
                            g_token->indent     = computeIndent(yytext,listPos);
@@ -534,9 +537,11 @@ RCSID "$"("Author"|"Date"|"Header"|"Id"|"Locker"|"Log"|"Name"|"RCSfile"|"Revisio
                            static const reg::Ex re(R"(\d+)");
                            reg::Match match;
                            reg::search(text,match,re);
+                           size_t markPos = match.position();
+                           assert(markPos!=std::string::npos);
                            g_token->isEnumList = true;
                            g_token->id         = std::stoul(match.str());
-                           g_token->indent     = computeIndent(yytext,match.position());
+                           g_token->indent     = computeIndent(yytext,markPos);
                            return TK_LISTITEM;
                          }
                        }
@@ -544,6 +549,7 @@ RCSID "$"("Author"|"Date"|"Header"|"Id"|"Locker"|"Log"|"Name"|"RCSfile"|"Revisio
                          lineCount(yytext,yyleng);
                          QCString text=extractPartAfterNewLine(yytext);
                          size_t dashPos = static_cast<size_t>(text.findRev('-'));
+                         assert(dashPos!=std::string::npos);
                          g_token->isEnumList = text.at(dashPos+1)=='#';
                          g_token->id         = -1;
                          g_token->indent     = computeIndent(text,dashPos);
@@ -562,6 +568,7 @@ RCSID "$"("Author"|"Date"|"Header"|"Id"|"Locker"|"Log"|"Name"|"RCSfile"|"Revisio
                            reg::Match match;
                            reg::search(text,match,re);
                            size_t markPos = match.position();
+                           assert(markPos!=std::string::npos);
                            g_token->isEnumList = FALSE;
                            g_token->id         = -1;
                            g_token->indent     = computeIndent(text.c_str(),markPos);
@@ -580,15 +587,18 @@ RCSID "$"("Author"|"Date"|"Header"|"Id"|"Locker"|"Log"|"Name"|"RCSfile"|"Revisio
                            static const reg::Ex re(R"(\d+)");
                            reg::Match match;
                            reg::search(text,match,re);
+                           size_t markPos = match.position();
+                           assert(markPos!=std::string::npos);
                            g_token->isEnumList = true;
                            g_token->id         = std::stoul(match.str());
-                           g_token->indent     = computeIndent(text.c_str(),match.position());
+                           g_token->indent     = computeIndent(text.c_str(),markPos);
                            return TK_LISTITEM;
                          }
                        }
 <St_Para>^{ENDLIST}       { /* end list */
                          lineCount(yytext,yyleng);
                          int dotPos = QCString(yytext).findRev('.');
+                         assert(dotPos!=-1);
                          g_token->indent     = computeIndent(yytext,dotPos);
                          return TK_ENDLIST;
                        }
diff --git a/src/utf8.cpp b/src/utf8.cpp
index 9c9034d..9ffc168 100644
--- a/src/utf8.cpp
+++ b/src/utf8.cpp
@@ -13,15 +13,16 @@
  *
  */
 
+#include <cstdint>
 #include <sstream>
 
 #include "utf8.h"
 #include "caseconvert.h"
 #include "textstream.h"
 
-int getUTF8CharNumBytes(char c)
+uint8_t getUTF8CharNumBytes(char c)
 {
-  int num=1;
+  uint8_t num=1;
   unsigned char uc = static_cast<unsigned char>(c);
   if (uc>=0x80u) // multibyte character
   {
@@ -173,8 +174,8 @@ std::string convertUTF8ToUpper(const std::string &input)
 const char *writeUTF8Char(TextStream &t,const char *s)
 {
   if (s==0) return 0;
-  int len = getUTF8CharNumBytes(*s);
-  for (int i=0;i<len;i++)
+  uint8_t len = getUTF8CharNumBytes(*s);
+  for (uint8_t i=0;i<len;i++)
   {
     if (s[i]==0) // detect premature end of string (due to invalid UTF8 char)
     {
diff --git a/src/utf8.h b/src/utf8.h
index 538230d..08979f3 100644
--- a/src/utf8.h
+++ b/src/utf8.h
@@ -51,7 +51,7 @@ uint32_t getUnicodeForUTF8CharAt(const std::string &input,size_t pos);
 /** Returns the number of bytes making up a single UTF8 character given the first byte
  *  in the sequence.
  */
-int getUTF8CharNumBytes(char firstByte);
+uint8_t getUTF8CharNumBytes(char firstByte);
 
 /** Writes the UTF8 character pointed to by s to stream t and returns a pointer
  *  to the next character.
-- 
cgit v0.12