Bug 705220 - Enables using unicode (non ASCII) to name output files

author: Dimitri van Heesch <dimitri@stack.nl> 2013-12-30 20:25:08 (GMT)
committer: Dimitri van Heesch <dimitri@stack.nl> 2013-12-30 20:25:08 (GMT)
commit: d5fb69739a1b2facf32a63ca94c12d097f8278cc (patch)
tree: da261caaf05e308cc5c01d12e9de437c9e1e015b /src/util.cpp
parent: 43461e44d49200fd1564c4e416db7384e7e5eab8 (diff)
download: Doxygen-d5fb69739a1b2facf32a63ca94c12d097f8278cc.zip
Doxygen-d5fb69739a1b2facf32a63ca94c12d097f8278cc.tar.gz
Doxygen-d5fb69739a1b2facf32a63ca94c12d097f8278cc.tar.bz2
1 files changed, 81 insertions, 16 deletions
diff --git a/src/util.cpp b/src/util.cpp
index 72696d3..6a405ca 100644
--- a/src/util.cpp
+++ b/src/util.cpp
@@ -5144,6 +5144,7 @@ bool hasVisibleRoot(BaseClassList *bcl)
 QCString escapeCharsInString(const char *name,bool allowDots,bool allowUnderscore)
 {
   static bool caseSenseNames = Config_getBool("CASE_SENSE_NAMES");
+  static bool allowUnicodeNames = Config_getBool("ALLOW_UNICODE_NAMES");
   static GrowBuf growBuf;
   growBuf.clear();
   char c;
@@ -5179,15 +5180,57 @@ QCString escapeCharsInString(const char *name,bool allowDots,bool allowUnderscor
       default: 
                 if (c<0)
                 {
-                  static char map[] = "0123456789ABCDEF";
                   char ids[5];
-                  unsigned char id = (unsigned char)c;
-                  ids[0]='_';
-                  ids[1]='x';
-                  ids[2]=map[id>>4];
-                  ids[3]=map[id&0xF];
-                  ids[4]=0;
-                  growBuf.addStr(ids);
+                  const unsigned char uc = (unsigned char)c;
+                  bool doEscape = TRUE;
+                  if (allowUnicodeNames && uc <= 0xf7)
+                  {
+                    const char* pt = p;
+                    ids[ 0 ] = c;
+                    int l = 0;
+                    if ((uc&0xE0)==0xC0)
+                    {
+                      l=2; // 11xx.xxxx: >=2 byte character
+                    }
+                    if ((uc&0xF0)==0xE0)
+                    {
+                      l=3; // 111x.xxxx: >=3 byte character
+                    }
+                    if ((uc&0xF8)==0xF0)
+                    {
+                      l=4; // 1111.xxxx: >=4 byte character
+                    }
+                    doEscape = l==0;
+                    for (int m=1; m<l && !doEscape; ++m)
+                    {
+                      unsigned char ct = (unsigned char)*pt;
+                      if (ct==0 || (ct&0xC0)!=0x80) // invalid unicode character
+                      {
+                        doEscape=TRUE;
+                      }
+                      else
+                      {
+                        ids[ m ] = *pt++;
+                      }
+                    }
+                    if ( !doEscape ) // got a valid unicode character
+                    {
+                      ids[ l ] = 0;
+                      growBuf.addStr( ids );
+                      p += l - 1;
+                    }
+                  }
+                  if (doEscape) // not a valid unicode char or escaping needed
+                  {
+                    static char map[] = "0123456789ABCDEF";
+                    unsigned char id = (unsigned char)c;
+                    ids[0]='_';
+                    ids[1]='x';
+                    ids[2]=map[id>>4];
+                    ids[3]=map[id&0xF];
+                    ids[4]=0;
+                    growBuf.addStr(ids);
+                  }
                 }
                 else if (caseSenseNames || !isupper(c))
                 {
@@ -6922,14 +6965,25 @@ const char *writeUtf8Char(FTextStream &t,const char *s)
   t << c;
   if (c<0) // multibyte character
   {
-    t << *s++;
-    if (((uchar)c&0xE0)==0xE0)
+    if (((uchar)c&0xE0)==0xC0)
+    {
+      t << *s++; // 11xx.xxxx: >=2 byte character
+    }
+    if (((uchar)c&0xF0)==0xE0)
     {
       t << *s++; // 111x.xxxx: >=3 byte character
     }
-    if (((uchar)c&0xF0)==0xF0)
+    if (((uchar)c&0xF8)==0xF0)
+    {
+      t << *s++; // 1111.xxxx: >=4 byte character
+    }
+    if (((uchar)c&0xFC)==0xF8)
+    {
+      t << *s++; // 1111.1xxx: >=5 byte character
+    }
+    if (((uchar)c&0xFE)==0xFC)
     {
-      t << *s++; // 1111.xxxx: 4 byte character
+      t << *s++; // 1111.1xxx: 6 byte character
     }
   }
   return s;
@@ -6942,14 +6996,25 @@ int nextUtf8CharPosition(const QCString &utf8Str,int len,int startPos)
   char c = utf8Str[startPos];
   if (c<0) // multibyte utf-8 character
   {
-    bytes++;   // 1xxx.xxxx: >=2 byte character
-    if (((uchar)c&0xE0)==0xE0)
+    if (((uchar)c&0xE0)==0xC0)
+    {
+      bytes++; // 11xx.xxxx: >=2 byte character
+    }
+    if (((uchar)c&0xF0)==0xE0)
     {
       bytes++; // 111x.xxxx: >=3 byte character
     }
-    if (((uchar)c&0xF0)==0xF0)
+    if (((uchar)c&0xF8)==0xF0)
+    {
+      bytes++; // 1111.xxxx: >=4 byte character
+    }
+    if (((uchar)c&0xFC)==0xF8)
+    {
+      bytes++; // 1111.1xxx: >=5 byte character
+    }
+    if (((uchar)c&0xFE)==0xFC)
     {
-      bytes++; // 1111.xxxx: 4 byte character
+      bytes++; // 1111.1xxx: 6 byte character
     }
   }
   else if (c=='&') // skip over character entities
author	Dimitri van Heesch <dimitri@stack.nl>	2013-12-30 20:25:08 (GMT)
committer	Dimitri van Heesch <dimitri@stack.nl>	2013-12-30 20:25:08 (GMT)
commit	d5fb69739a1b2facf32a63ca94c12d097f8278cc (patch)
tree	da261caaf05e308cc5c01d12e9de437c9e1e015b /src/util.cpp
parent	43461e44d49200fd1564c4e416db7384e7e5eab8 (diff)
download	Doxygen-d5fb69739a1b2facf32a63ca94c12d097f8278cc.zip Doxygen-d5fb69739a1b2facf32a63ca94c12d097f8278cc.tar.gz Doxygen-d5fb69739a1b2facf32a63ca94c12d097f8278cc.tar.bz2