From c9ae8635949850562efe5ffc1524c84cb2b705cf Mon Sep 17 00:00:00 2001 From: albert-github Date: Fri, 11 Dec 2020 15:37:04 +0100 Subject: issue #8142 UTF-8 in URL in source generates truncated URL in HTML Specifying the URL in a similar way as other IDs --- src/doctokenizer.l | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/doctokenizer.l b/src/doctokenizer.l index f756186..69a511a 100644 --- a/src/doctokenizer.l +++ b/src/doctokenizer.l @@ -384,7 +384,7 @@ OLISTITEM {BLANK}*[1-9][0-9]*"."{BLANK} ENDLIST {BLANK}*"."{BLANK}*\n ATTRNAME [a-z_A-Z\x80-\xFF][:a-z_A-Z0-9\x80-\xFF\-]* ATTRIB {ATTRNAME}{WS}*("="{WS}*(("\""[^\"]*"\"")|("'"[^\']*"'")|[^ \t\r\n'"><]+))? -URLCHAR [a-z_A-Z0-9\!\~\,\:\;\'\$\?\@\&\%\#\.\-\+\/\=] +URLCHAR [a-z_A-Z0-9\!\~\,\:\;\'\$\?\@\&\%\#\.\-\+\/\=\x80-\xFF] URLMASK ({URLCHAR}+([({]{URLCHAR}*[)}])?)+ URLPROTOCOL ("http:"|"https:"|"ftp:"|"file:"|"news:"|"irc") FILESCHAR [a-z_A-Z0-9\\:\\\/\-\+&#@] -- cgit v0.12