summaryrefslogtreecommitdiffstats
path: root/tclxml/tclxml-tcl
diff options
context:
space:
mode:
authorWilliam Joye <wjoye@cfa.harvard.edu>2016-10-27 20:17:32 (GMT)
committerWilliam Joye <wjoye@cfa.harvard.edu>2016-10-27 20:17:32 (GMT)
commitf7560d0a451a793441216d76eb4d9475aab61740 (patch)
tree1d7f6b96f50e96acb91525d1d933184eec54bb85 /tclxml/tclxml-tcl
parentea28451286d3ea4a772fa174483f9a7a66bb1ab3 (diff)
parent55c7ed6e4d159cebe06999bf61e668284a89cd69 (diff)
downloadblt-f7560d0a451a793441216d76eb4d9475aab61740.zip
blt-f7560d0a451a793441216d76eb4d9475aab61740.tar.gz
blt-f7560d0a451a793441216d76eb4d9475aab61740.tar.bz2
Merge commit '55c7ed6e4d159cebe06999bf61e668284a89cd69' as 'tclxml'
Diffstat (limited to 'tclxml/tclxml-tcl')
-rwxr-xr-xtclxml/tclxml-tcl/sgml-8.0.tcl143
-rwxr-xr-xtclxml/tclxml-tcl/sgml-8.1.tcl143
-rwxr-xr-xtclxml/tclxml-tcl/sgmlparser.tcl2814
-rwxr-xr-xtclxml/tclxml-tcl/tclparser-8.0.tcl359
-rwxr-xr-xtclxml/tclxml-tcl/tclparser-8.1.tcl614
-rwxr-xr-xtclxml/tclxml-tcl/xml-8.0.tcl92
-rwxr-xr-xtclxml/tclxml-tcl/xml-8.1.tcl135
-rw-r--r--tclxml/tclxml-tcl/xml__tcl.tcl272
-rw-r--r--tclxml/tclxml-tcl/xmldep.tcl179
-rw-r--r--tclxml/tclxml-tcl/xpath.tcl362
10 files changed, 5113 insertions, 0 deletions
diff --git a/tclxml/tclxml-tcl/sgml-8.0.tcl b/tclxml/tclxml-tcl/sgml-8.0.tcl
new file mode 100755
index 0000000..f1179cf
--- /dev/null
+++ b/tclxml/tclxml-tcl/sgml-8.0.tcl
@@ -0,0 +1,143 @@
+# sgml-8.0.tcl --
+#
+# This file provides generic parsing services for SGML-based
+# languages, namely HTML and XML.
+# This file supports Tcl 8.0 characters and regular expressions.
+#
+# NB. It is a misnomer. There is no support for parsing
+# arbitrary SGML as such.
+#
+# Copyright (c) 1998,1999 Zveno Pty Ltd
+# http://www.zveno.com/
+#
+# See the file "LICENSE" in this distribution for information on usage and
+# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
+#
+# $Id: sgml-8.0.tcl,v 1.1.1.1 2009/01/16 22:11:49 joye Exp $
+
+package require -exact Tcl 8.0
+
+package provide sgml 1.9
+
+namespace eval sgml {
+
+ # Convenience routine
+ proc cl x {
+ return "\[$x\]"
+ }
+
+ # Define various regular expressions
+
+ # Character classes
+ variable Char \t\n\r\ -\xFF
+ variable BaseChar A-Za-z
+ variable Letter $BaseChar
+ variable Digit 0-9
+ variable CombiningChar {}
+ variable Extender {}
+ variable Ideographic {}
+
+ # white space
+ variable Wsp " \t\r\n"
+ variable noWsp [cl ^$Wsp]
+
+ # Various XML names
+ variable NameChar \[-$Letter$Digit._:$CombiningChar$Extender\]
+ variable Name \[_:$BaseChar$Ideographic\]$NameChar*
+ variable Names ${Name}(?:$Wsp$Name)*
+ variable Nmtoken $NameChar+
+ variable Nmtokens ${Nmtoken}(?:$Wsp$Nmtoken)*
+
+ # table of predefined entities for XML
+
+ variable EntityPredef
+ array set EntityPredef {
+ lt < gt > amp & quot \" apos '
+ }
+
+}
+
+# These regular expressions are defined here once for better performance
+
+namespace eval sgml {
+ variable Wsp
+
+ # Watch out for case-sensitivity
+
+ set attlist_exp [cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*(#REQUIRED|#IMPLIED)
+ set attlist_enum_exp [cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*\\(([cl ^)]*)\\)[cl $Wsp]*("([cl ^")])")? ;# "
+ set attlist_fixed_exp [cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*(#FIXED)[cl $Wsp]*([cl ^$Wsp]+)
+
+ set param_entity_exp [cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*([cl ^"$Wsp]*)[cl $Wsp]*"([cl ^"]*)"
+
+ set notation_exp [cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*(.*)
+
+}
+
+### Utility procedures
+
+# sgml::noop --
+#
+# A do-nothing proc
+#
+# Arguments:
+# args arguments
+#
+# Results:
+# Nothing.
+
+proc sgml::noop args {
+ return 0
+}
+
+# sgml::identity --
+#
+# Identity function.
+#
+# Arguments:
+# a arbitrary argument
+#
+# Results:
+# $a
+
+proc sgml::identity a {
+ return $a
+}
+
+# sgml::Error --
+#
+# Throw an error
+#
+# Arguments:
+# args arguments
+#
+# Results:
+# Error return condition.
+
+proc sgml::Error args {
+ uplevel return -code error [list $args]
+}
+
+### Following procedures are based on html_library
+
+# sgml::zapWhite --
+#
+# Convert multiple white space into a single space.
+#
+# Arguments:
+# data plain text
+#
+# Results:
+# As above
+
+proc sgml::zapWhite data {
+ regsub -all "\[ \t\r\n\]+" $data { } data
+ return $data
+}
+
+proc sgml::Boolean value {
+ regsub {1|true|yes|on} $value 1 value
+ regsub {0|false|no|off} $value 0 value
+ return $value
+}
+
diff --git a/tclxml/tclxml-tcl/sgml-8.1.tcl b/tclxml/tclxml-tcl/sgml-8.1.tcl
new file mode 100755
index 0000000..60748bb
--- /dev/null
+++ b/tclxml/tclxml-tcl/sgml-8.1.tcl
@@ -0,0 +1,143 @@
+# sgml-8.1.tcl --
+#
+# This file provides generic parsing services for SGML-based
+# languages, namely HTML and XML.
+# This file supports Tcl 8.1 characters and regular expressions.
+#
+# NB. It is a misnomer. There is no support for parsing
+# arbitrary SGML as such.
+#
+# Copyright (c) 1998-2003 Zveno Pty Ltd
+# http://www.zveno.com/
+#
+# See the file "LICENSE" in this distribution for information on usage and
+# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
+#
+# $Id: sgml-8.1.tcl,v 1.1.1.1 2009/01/16 22:11:49 joye Exp $
+
+package require Tcl 8.1
+
+package provide sgml 1.9
+
+namespace eval sgml {
+
+ # Convenience routine
+ proc cl x {
+ return "\[$x\]"
+ }
+
+ # Define various regular expressions
+
+ # Character classes
+ variable Char \t\n\r\ -\uD7FF\uE000-\uFFFD\u10000-\u10FFFF
+ variable BaseChar \u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u0131\u0134-\u013E\u0141-\u0148\u014A-\u017E\u0180-\u01C3\u01CD-\u01F0\u01F4-\u01F5\u01FA-\u0217\u0250-\u02A8\u02BB-\u02C1\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D6\u03DA\u03DC\u03DE\u03E0\u03E2-\u03F3\u0401-\u040C\u040E-\u044F\u0451-\u045C\u045E-\u0481\u0490-\u04C4\u04C7-\u04C8\u04CB-\u04CC\u04D0-\u04EB\u04EE-\u04F5\u04F8-\u04F9\u0531-\u0556\u0559\u0561-\u0586\u05D0-\u05EA\u05F0-\u05F2\u0621-\u063A\u0641-\u064A\u0671-\u06B7\u06BA-\u06BE\u06C0-\u06CE\u06D0-\u06D3\u06D5\u06E5-\u06E6\u0905-\u0939\u093D\u0958-\u0961\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09DC-\u09DD\u09DF-\u09E1\u09F0-\u09F1\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8B\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABD\u0AE0\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B36-\u0B39\u0B3D\u0B5C-\u0B5D\u0B5F-\u0B61\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C60-\u0C61\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CDE\u0CE0-\u0CE1\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D60-\u0D61\u0E01-\u0E2E\u0E30\u0E32-\u0E33\u0E40-\u0E45\u0E81-\u0E82\u0E84\u0E87-\u0E88\u0E8A\u0E8D\u0E94-\u0E97\u0E99-\u0E9F\u0EA1-\u0EA3\u0EA5\u0EA7\u0EAA-\u0EAB\u0EAD-\u0EAE\u0EB0\u0EB2-\u0EB3\u0EBD\u0EC0-\u0EC4\u0F40-\u0F47\u0F49-\u0F69\u10A0-\u10C5\u10D0-\u10F6\u1100\u1102-\u1103\u1105-\u1107\u1109\u110B-\u110C\u110E-\u1112\u113C\u113E\u1140\u114C\u114E\u1150\u1154-\u1155\u1159\u115F-\u1161\u1163\u1165\u1167\u1169\u116D-\u116E\u1172-\u1173\u1175\u119E\u11A8\u11AB\u11AE-\u11AF\u11B7-\u11B8\u11BA\u11BC-\u11C2\u11EB\u11F0\u11F9\u1E00-\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FCC\u1FD0-\u1FD3\u1FD6-\u1FDB\u1FE0-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126\u212A-\u212B\u212E\u2180-\u2182\u3041-\u3094\u30A1-\u30FA\u3105-\u312C\uAC00-\uD7A3
+ variable Ideographic \u4E00-\u9FA5\u3007\u3021-\u3029
+ variable CombiningChar \u0300-\u0345\u0360-\u0361\u0483-\u0486\u0591-\u05A1\u05A3-\u05B9\u05BB-\u05BD\u05BF\u05C1-\u05C2\u05C4\u064B-\u0652\u0670\u06D6-\u06DC\u06DD-\u06DF\u06E0-\u06E4\u06E7-\u06E8\u06EA-\u06ED\u0901-\u0903\u093C\u093E-\u094C\u094D\u0951-\u0954\u0962-\u0963\u0981-\u0983\u09BC\u09BE\u09BF\u09C0-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09E2-\u09E3\u0A02\u0A3C\u0A3E\u0A3F\u0A40-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A70-\u0A71\u0A81-\u0A83\u0ABC\u0ABE-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0B01-\u0B03\u0B3C\u0B3E-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B82-\u0B83\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0C01-\u0C03\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C82-\u0C83\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0D02-\u0D03\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0E31\u0E34-\u0E3A\u0E47-\u0E4E\u0EB1\u0EB4-\u0EB9\u0EBB-\u0EBC\u0EC8-\u0ECD\u0F18-\u0F19\u0F35\u0F37\u0F39\u0F3E\u0F3F\u0F71-\u0F84\u0F86-\u0F8B\u0F90-\u0F95\u0F97\u0F99-\u0FAD\u0FB1-\u0FB7\u0FB9\u20D0-\u20DC\u20E1\u302A-\u302F\u3099\u309A
+ variable Digit \u0030-\u0039\u0660-\u0669\u06F0-\u06F9\u0966-\u096F\u09E6-\u09EF\u0A66-\u0A6F\u0AE6-\u0AEF\u0B66-\u0B6F\u0BE7-\u0BEF\u0C66-\u0C6F\u0CE6-\u0CEF\u0D66-\u0D6F\u0E50-\u0E59\u0ED0-\u0ED9\u0F20-\u0F29
+ variable Extender \u00B7\u02D0\u02D1\u0387\u0640\u0E46\u0EC6\u3005\u3031-\u3035\u309D-\u309E\u30FC-\u30FE
+ variable Letter $BaseChar|$Ideographic
+
+ # white space
+ variable Wsp " \t\r\n"
+ variable noWsp [cl ^$Wsp]
+
+ # Various XML names
+ variable NameChar \[-$Letter$Digit._:$CombiningChar$Extender\]
+ variable Name \[_:$BaseChar$Ideographic\]$NameChar*
+ variable Names ${Name}(?:$Wsp$Name)*
+ variable Nmtoken $NameChar+
+ variable Nmtokens ${Nmtoken}(?:$Wsp$Nmtoken)*
+
+ # table of predefined entities for XML
+
+ variable EntityPredef
+ array set EntityPredef {
+ lt < gt > amp & quot \" apos '
+ }
+
+}
+
+# These regular expressions are defined here once for better performance
+
+namespace eval sgml {
+ variable Wsp
+
+ # Watch out for case-sensitivity
+
+ set attlist_exp [cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*(#REQUIRED|#IMPLIED)
+ set attlist_enum_exp [cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*\\(([cl ^)]*)\\)[cl $Wsp]*("([cl ^")]*)")? ;# "
+ set attlist_fixed_exp [cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*(#FIXED)[cl $Wsp]*([cl ^$Wsp]+)
+
+ set param_entity_exp [cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*([cl ^"$Wsp]*)[cl $Wsp]*"([cl ^"]*)"
+
+ set notation_exp [cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*(.*)
+
+}
+
+### Utility procedures
+
+# sgml::noop --
+#
+# A do-nothing proc
+#
+# Arguments:
+# args arguments
+#
+# Results:
+# Nothing.
+
+proc sgml::noop args {
+ return 0
+}
+
+# sgml::identity --
+#
+# Identity function.
+#
+# Arguments:
+# a arbitrary argument
+#
+# Results:
+# $a
+
+proc sgml::identity a {
+ return $a
+}
+
+# sgml::Error --
+#
+# Throw an error
+#
+# Arguments:
+# args arguments
+#
+# Results:
+# Error return condition.
+
+proc sgml::Error args {
+ uplevel return -code error [list $args]
+}
+
+### Following procedures are based on html_library
+
+# sgml::zapWhite --
+#
+# Convert multiple white space into a single space.
+#
+# Arguments:
+# data plain text
+#
+# Results:
+# As above
+
+proc sgml::zapWhite data {
+ regsub -all "\[ \t\r\n\]+" $data { } data
+ return $data
+}
+
+proc sgml::Boolean value {
+ regsub {1|true|yes|on} $value 1 value
+ regsub {0|false|no|off} $value 0 value
+ return $value
+}
+
diff --git a/tclxml/tclxml-tcl/sgmlparser.tcl b/tclxml/tclxml-tcl/sgmlparser.tcl
new file mode 100755
index 0000000..2677a44
--- /dev/null
+++ b/tclxml/tclxml-tcl/sgmlparser.tcl
@@ -0,0 +1,2814 @@
+# sgmlparser.tcl --
+#
+# This file provides the generic part of a parser for SGML-based
+# languages, namely HTML and XML.
+#
+# NB. It is a misnomer. There is no support for parsing
+# arbitrary SGML as such.
+#
+# See sgml.tcl for variable definitions.
+#
+# Copyright (c) 2008 Explain
+# http://www.explain.com.au/
+# Copyright (c) 1998-2003 Zveno Pty Ltd
+# http://www.zveno.com/
+#
+# See the file "LICENSE" in this distribution for information on usage and
+# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
+#
+# $Id: sgmlparser.tcl,v 1.1.1.1 2009/01/16 22:11:49 joye Exp $
+
+package require sgml 1.9
+
+package require uri 1.1
+
+package provide sgmlparser 1.1
+
+namespace eval sgml {
+ namespace export tokenise parseEvent
+
+ namespace export parseDTD
+
+ # NB. Most namespace variables are defined in sgml-8.[01].tcl
+ # to account for differences between versions of Tcl.
+ # This especially includes the regular expressions used.
+
+ variable ParseEventNum
+ if {![info exists ParseEventNum]} {
+ set ParseEventNum 0
+ }
+ variable ParseDTDnum
+ if {![info exists ParseDTDNum]} {
+ set ParseDTDNum 0
+ }
+
+ variable declExpr [cl $::sgml::Wsp]*([cl ^$::sgml::Wsp]+)[cl $::sgml::Wsp]*([cl ^>]*)
+ variable EntityExpr [cl $::sgml::Wsp]*(%[cl $::sgml::Wsp])?[cl $::sgml::Wsp]*($::sgml::Name)[cl $::sgml::Wsp]+(.*)
+
+ #variable MarkupDeclExpr <([cl ^$::sgml::Wsp>]+)[cl $::sgml::Wsp]*([cl ^$::sgml::Wsp]+)[cl $::sgml::Wsp]*([cl ^>]*)>
+ #variable MarkupDeclSub "} {\\1} {\\2} {\\3} {"
+ variable MarkupDeclExpr <[cl $::sgml::Wsp]*([cl ^$::sgml::Wsp>]+)[cl $::sgml::Wsp]*([cl ^>]*)>
+ variable MarkupDeclSub "\} {\\1} {\\2} \{"
+
+ variable ExternalEntityExpr ^(PUBLIC|SYSTEM)[cl $::sgml::Wsp]+("|')(.*?)\\2([cl $::sgml::Wsp]+("|')(.*?)\\2)?([cl $::sgml::Wsp]+NDATA[cl $::sgml::Wsp]+($::xml::Name))?\$
+
+ variable StdOptions
+ array set StdOptions [list \
+ -elementstartcommand [namespace current]::noop \
+ -elementendcommand [namespace current]::noop \
+ -characterdatacommand [namespace current]::noop \
+ -processinginstructioncommand [namespace current]::noop \
+ -externalentitycommand {} \
+ -xmldeclcommand [namespace current]::noop \
+ -doctypecommand [namespace current]::noop \
+ -commentcommand [namespace current]::noop \
+ -entitydeclcommand [namespace current]::noop \
+ -unparsedentitydeclcommand [namespace current]::noop \
+ -parameterentitydeclcommand [namespace current]::noop \
+ -notationdeclcommand [namespace current]::noop \
+ -elementdeclcommand [namespace current]::noop \
+ -attlistdeclcommand [namespace current]::noop \
+ -paramentityparsing 1 \
+ -defaultexpandinternalentities 1 \
+ -startdoctypedeclcommand [namespace current]::noop \
+ -enddoctypedeclcommand [namespace current]::noop \
+ -entityreferencecommand {} \
+ -warningcommand [namespace current]::noop \
+ -errorcommand [namespace current]::Error \
+ -final 1 \
+ -validate 0 \
+ -baseuri {} \
+ -name {} \
+ -cmd {} \
+ -emptyelement [namespace current]::EmptyElement \
+ -parseattributelistcommand [namespace current]::noop \
+ -parseentitydeclcommand [namespace current]::noop \
+ -normalize 1 \
+ -internaldtd {} \
+ -reportempty 0 \
+ -ignorewhitespace 0 \
+ ]
+}
+
+# sgml::tokenise --
+#
+# Transform the given HTML/XML text into a Tcl list.
+#
+# Arguments:
+# sgml text to tokenize
+# elemExpr RE to recognise tags
+# elemSub transform for matched tags
+# args options
+#
+# Valid Options:
+# -internaldtdvariable
+# -final boolean True if no more data is to be supplied
+# -statevariable varName Name of a variable used to store info
+#
+# Results:
+# Returns a Tcl list representing the document.
+
+proc sgml::tokenise {sgml elemExpr elemSub args} {
+ array set options {-final 1}
+ array set options $args
+ set options(-final) [Boolean $options(-final)]
+
+ # If the data is not final then there must be a variable to store
+ # unused data.
+ if {!$options(-final) && ![info exists options(-statevariable)]} {
+ return -code error {option "-statevariable" required if not final}
+ }
+
+ # Pre-process stage
+ #
+ # Extract the internal DTD subset, if any
+
+ catch {upvar #0 $options(-internaldtdvariable) dtd}
+ if {[regexp {<!DOCTYPE[^[<]+\[([^]]+)\]} $sgml discard dtd]} {
+ regsub {(<!DOCTYPE[^[<]+)(\[[^]]+\])} $sgml {\1\&xml:intdtd;} sgml
+ }
+
+ # Protect Tcl special characters
+ regsub -all {([{}\\])} $sgml {\\\1} sgml
+
+ # Do the translation
+
+ if {[info exists options(-statevariable)]} {
+ # Mats: Several rewrites here to handle -final 0 option.
+ # If any cached unparsed xml (state(leftover)), prepend it.
+ upvar #0 $options(-statevariable) state
+ if {[string length $state(leftover)]} {
+ regsub -all $elemExpr $state(leftover)$sgml $elemSub sgml
+ set state(leftover) {}
+ } else {
+ regsub -all $elemExpr $sgml $elemSub sgml
+ }
+ set sgml "{} {} {} \{$sgml\}"
+
+ # Performance note (Tcl 8.0):
+ # Use of lindex, lreplace will cause parsing to list object
+
+ # This RE only fixes chopped inside tags, not chopped text.
+ if {[regexp {^([^<]*)(<[^>]*$)} [lindex $sgml end] x text rest]} {
+ set sgml [lreplace $sgml end end $text]
+ # Mats: unmatched stuff means that it is chopped off. Cache it for next round.
+ set state(leftover) $rest
+ }
+
+ # Patch from bug report #596959, Marshall Rose
+ if {[string compare [lindex $sgml 4] ""]} {
+ set sgml [linsert $sgml 0 {} {} {} {} {}]
+ }
+
+ } else {
+
+ # Performance note (Tcl 8.0):
+ # In this case, no conversion to list object is performed
+
+ # Mats: This fails if not -final and $sgml is chopped off right in a tag.
+ regsub -all $elemExpr $sgml $elemSub sgml
+ set sgml "{} {} {} \{$sgml\}"
+ }
+
+ return $sgml
+
+}
+
+# sgml::parseEvent --
+#
+# Produces an event stream for a XML/HTML document,
+# given the Tcl list format returned by tokenise.
+#
+# This procedure checks that the document is well-formed,
+# and throws an error if the document is found to be not
+# well formed. Warnings are passed via the -warningcommand script.
+#
+# The procedure only check for well-formedness,
+# no DTD is required. However, facilities are provided for entity expansion.
+#
+# Arguments:
+# sgml Instance data, as a Tcl list.
+# args option/value pairs
+#
+# Valid Options:
+# -final Indicates end of document data
+# -validate Boolean to enable validation
+# -baseuri URL for resolving relative URLs
+# -elementstartcommand Called when an element starts
+# -elementendcommand Called when an element ends
+# -characterdatacommand Called when character data occurs
+# -entityreferencecommand Called when an entity reference occurs
+# -processinginstructioncommand Called when a PI occurs
+# -externalentitycommand Called for an external entity reference
+#
+# -xmldeclcommand Called when the XML declaration occurs
+# -doctypecommand Called when the document type declaration occurs
+# -commentcommand Called when a comment occurs
+# -entitydeclcommand Called when a parsed entity is declared
+# -unparsedentitydeclcommand Called when an unparsed external entity is declared
+# -parameterentitydeclcommand Called when a parameter entity is declared
+# -notationdeclcommand Called when a notation is declared
+# -elementdeclcommand Called when an element is declared
+# -attlistdeclcommand Called when an attribute list is declared
+# -paramentityparsing Boolean to enable/disable parameter entity substitution
+# -defaultexpandinternalentities Boolean to enable/disable expansion of entities declared in internal DTD subset
+#
+# -startdoctypedeclcommand Called when the Doc Type declaration starts (see also -doctypecommand)
+# -enddoctypedeclcommand Called when the Doc Type declaration ends (see also -doctypecommand)
+#
+# -errorcommand Script to evaluate for a fatal error
+# -warningcommand Script to evaluate for a reportable warning
+# -statevariable global state variable
+# -normalize whether to normalize names
+# -reportempty whether to include an indication of empty elements
+# -ignorewhitespace whether to automatically strip whitespace
+#
+# Results:
+# The various callback scripts are invoked.
+# Returns empty string.
+#
+# BUGS:
+# If command options are set to empty string then they should not be invoked.
+
+proc sgml::parseEvent {sgml args} {
+ variable Wsp
+ variable noWsp
+ variable Nmtoken
+ variable Name
+ variable ParseEventNum
+ variable StdOptions
+
+ array set options [array get StdOptions]
+ catch {array set options $args}
+
+ # Mats:
+ # If the data is not final then there must be a variable to persistently store the parse state.
+ if {!$options(-final) && ![info exists options(-statevariable)]} {
+ return -code error {option "-statevariable" required if not final}
+ }
+
+ foreach {opt value} [array get options *command] {
+ if {[string compare $opt "-externalentitycommand"] && ![string length $value]} {
+ set options($opt) [namespace current]::noop
+ }
+ }
+
+ if {![info exists options(-statevariable)]} {
+ set options(-statevariable) [namespace current]::ParseEvent[incr ParseEventNum]
+ }
+ if {![info exists options(entities)]} {
+ set options(entities) [namespace current]::Entities$ParseEventNum
+ array set $options(entities) [array get [namespace current]::EntityPredef]
+ }
+ if {![info exists options(extentities)]} {
+ set options(extentities) [namespace current]::ExtEntities$ParseEventNum
+ }
+ if {![info exists options(parameterentities)]} {
+ set options(parameterentities) [namespace current]::ParamEntities$ParseEventNum
+ }
+ if {![info exists options(externalparameterentities)]} {
+ set options(externalparameterentities) [namespace current]::ExtParamEntities$ParseEventNum
+ }
+ if {![info exists options(elementdecls)]} {
+ set options(elementdecls) [namespace current]::ElementDecls$ParseEventNum
+ }
+ if {![info exists options(attlistdecls)]} {
+ set options(attlistdecls) [namespace current]::AttListDecls$ParseEventNum
+ }
+ if {![info exists options(notationdecls)]} {
+ set options(notationdecls) [namespace current]::NotationDecls$ParseEventNum
+ }
+ if {![info exists options(namespaces)]} {
+ set options(namespaces) [namespace current]::Namespaces$ParseEventNum
+ }
+
+ # For backward-compatibility
+ catch {set options(-baseuri) $options(-baseurl)}
+
+ # Choose an external entity resolver
+
+ if {![string length $options(-externalentitycommand)]} {
+ if {$options(-validate)} {
+ set options(-externalentitycommand) [namespace code ResolveEntity]
+ } else {
+ set options(-externalentitycommand) [namespace code noop]
+ }
+ }
+
+ upvar #0 $options(-statevariable) state
+ upvar #0 $options(entities) entities
+
+ # Mats:
+ # The problem is that the state is not maintained when -final 0 !
+ # I've switched back to an older version here.
+
+ if {![info exists state(line)]} {
+ # Initialise the state variable
+ array set state {
+ mode normal
+ haveXMLDecl 0
+ haveDocElement 0
+ inDTD 0
+ context {}
+ stack {}
+ line 0
+ defaultNS {}
+ defaultNSURI {}
+ }
+ }
+
+ foreach {tag close param text} $sgml {
+
+ # Keep track of lines in the input
+ incr state(line) [regsub -all \n $param {} discard]
+ incr state(line) [regsub -all \n $text {} discard]
+
+ # If the current mode is cdata or comment then we must undo what the
+ # regsub has done to reconstitute the data
+
+ set empty {}
+ switch $state(mode) {
+ comment {
+ # This had "[string length $param] && " as a guard -
+ # can't remember why :-(
+ if {[regexp ([cl ^-]*)--\$ $tag discard comm1]} {
+ # end of comment (in tag)
+ set tag {}
+ set close {}
+ set state(mode) normal
+ DeProtect1 $options(-commentcommand) $state(commentdata)<$comm1
+ unset state(commentdata)
+ } elseif {[regexp ([cl ^-]*)--\$ $param discard comm1]} {
+ # end of comment (in attributes)
+ DeProtect1 $options(-commentcommand) $state(commentdata)<$close$tag>$comm1
+ unset state(commentdata)
+ set tag {}
+ set param {}
+ set close {}
+ set state(mode) normal
+ } elseif {[regexp ([cl ^-]*)-->(.*) $text discard comm1 text]} {
+ # end of comment (in text)
+ DeProtect1 $options(-commentcommand) $state(commentdata)<$close$tag$param>$comm1
+ unset state(commentdata)
+ set tag {}
+ set param {}
+ set close {}
+ set state(mode) normal
+ } else {
+ # comment continues
+ append state(commentdata) <$close$tag$param>$text
+ continue
+ }
+ }
+ cdata {
+ if {[string length $param] && [regexp ([cl ^\]]*)\]\][cl $Wsp]*\$ $tag discard cdata1]} {
+ # end of CDATA (in tag)
+ PCDATA [array get options] $state(cdata)<[subst -nocommand -novariable $close$cdata1]
+ set text [subst -novariable -nocommand $text]
+ set tag {}
+ unset state(cdata)
+ set state(mode) normal
+ } elseif {[regexp ([cl ^\]]*)\]\][cl $Wsp]*\$ $param discard cdata1]} {
+ # end of CDATA (in attributes)
+ PCDATA [array get options] $state(cdata)<[subst -nocommand -novariable $close$tag$cdata1]
+ set text [subst -novariable -nocommand $text]
+ set tag {}
+ set param {}
+ unset state(cdata)
+ set state(mode) normal
+ } elseif {[regexp (.*)\]\][cl $Wsp]*>(.*) $text discard cdata1 text]} {
+ # end of CDATA (in text)
+ PCDATA [array get options] $state(cdata)<[subst -nocommand -novariable $close$tag$param>$cdata1]
+ set text [subst -novariable -nocommand $text]
+ set tag {}
+ set param {}
+ set close {}
+ unset state(cdata)
+ set state(mode) normal
+ } else {
+ # CDATA continues
+ append state(cdata) [subst -nocommand -novariable <$close$tag$param>$text]
+ continue
+ }
+ }
+ continue {
+ # We're skipping elements looking for the close tag
+ switch -glob -- [string length $tag],[regexp {^\?|!.*} $tag],$close {
+ 0,* {
+ continue
+ }
+ *,0, {
+ if {![string compare $tag $state(continue:tag)]} {
+ set empty [uplevel #0 $options(-emptyelement) [list $tag $param $empty]]
+ if {![string length $empty]} {
+ incr state(continue:level)
+ }
+ }
+ continue
+ }
+ *,0,/ {
+ if {![string compare $tag $state(continue:tag)]} {
+ incr state(continue:level) -1
+ }
+ if {!$state(continue:level)} {
+ unset state(continue:tag)
+ unset state(continue:level)
+ set state(mode) {}
+ }
+ }
+ default {
+ continue
+ }
+ }
+ }
+ default {
+ # The trailing slash on empty elements can't be automatically separated out
+ # in the RE, so we must do it here.
+ regexp (.*)(/)[cl $Wsp]*$ $param discard param empty
+ }
+ }
+
+ # default: normal mode
+
+ # Bug: if the attribute list has a right angle bracket then the empty
+ # element marker will not be seen
+
+ set empty [uplevel #0 $options(-emptyelement) [list $tag $param $empty]]
+
+ switch -glob -- [string length $tag],[regexp {^\?|!.*} $tag],$close,$empty {
+
+ 0,0,, {
+ # Ignore empty tag - dealt with non-normal mode above
+ }
+ *,0,, {
+
+ # Start tag for an element.
+
+ # Check if the internal DTD entity is in an attribute value
+ regsub -all &xml:intdtd\; $param \[$options(-internaldtd)\] param
+
+ set code [catch {ParseEvent:ElementOpen $tag $param [array get options]} msg]
+ set state(haveDocElement) 1
+ switch $code {
+ 0 {# OK}
+ 3 {
+ # break
+ return {}
+ }
+ 4 {
+ # continue
+ # Remember this tag and look for its close
+ set state(continue:tag) $tag
+ set state(continue:level) 1
+ set state(mode) continue
+ continue
+ }
+ default {
+ return -code $code -errorinfo $::errorInfo $msg
+ }
+ }
+
+ }
+
+ *,0,/, {
+
+ # End tag for an element.
+
+ set code [catch {ParseEvent:ElementClose $tag [array get options]} msg]
+ switch $code {
+ 0 {# OK}
+ 3 {
+ # break
+ return {}
+ }
+ 4 {
+ # continue
+ # skip sibling nodes
+ set state(continue:tag) [lindex $state(stack) end]
+ set state(continue:level) 1
+ set state(mode) continue
+ continue
+ }
+ default {
+ return -code $code -errorinfo $::errorInfo $msg
+ }
+ }
+
+ }
+
+ *,0,,/ {
+
+ # Empty element
+
+ # The trailing slash sneaks through into the param variable
+ regsub -all /[cl $::sgml::Wsp]*\$ $param {} param
+
+ set code [catch {ParseEvent:ElementOpen $tag $param [array get options] -empty 1} msg]
+ set state(haveDocElement) 1
+ switch $code {
+ 0 {# OK}
+ 3 {
+ # break
+ return {}
+ }
+ 4 {
+ # continue
+ # Pretty useless since it closes straightaway
+ }
+ default {
+ return -code $code -errorinfo $::errorInfo $msg
+ }
+ }
+ set code [catch {ParseEvent:ElementClose $tag [array get options] -empty 1} msg]
+ switch $code {
+ 0 {# OK}
+ 3 {
+ # break
+ return {}
+ }
+ 4 {
+ # continue
+ # skip sibling nodes
+ set state(continue:tag) [lindex $state(stack) end]
+ set state(continue:level) 1
+ set state(mode) continue
+ continue
+ }
+ default {
+ return -code $code -errorinfo $::errorInfo $msg
+ }
+ }
+
+ }
+
+ *,1,* {
+ # Processing instructions or XML declaration
+ switch -glob -- $tag {
+
+ {\?xml} {
+ # XML Declaration
+ if {$state(haveXMLDecl)} {
+ uplevel #0 $options(-errorcommand) [list illegalcharacter "unexpected characters \"<$tag\" around line $state(line)"]
+ } elseif {![regexp {\?$} $param]} {
+ uplevel #0 $options(-errorcommand) [list missingcharacters "XML Declaration missing characters \"?>\" around line $state(line)"]
+ } else {
+
+ # We can do the parsing in one step with Tcl 8.1 RE's
+ # This has the benefit of performing better WF checking
+
+ set adv_re [format {^[%s]*version[%s]*=[%s]*("|')(-+|[a-zA-Z0-9_.:]+)\1([%s]+encoding[%s]*=[%s]*("|')([A-Za-z][-A-Za-z0-9._]*)\4)?([%s]*standalone[%s]*=[%s]*("|')(yes|no)\7)?[%s]*\?$} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp]
+
+ if {[catch {regexp $adv_re $param discard delimiter version discard delimiter encoding discard delimiter standalone} matches]} {
+ # Otherwise we must fallback to 8.0.
+ # This won't detect certain well-formedness errors
+
+ # Get the version number
+ if {[regexp [format {[%s]*version[%s]*=[%s]*"(-+|[a-zA-Z0-9_.:]+)"[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard version] || [regexp [format {[%s]*version[%s]*=[%s]*'(-+|[a-zA-Z0-9_.:]+)'[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard version]} {
+ if {[string compare $version "1.0"]} {
+ # Should we support future versions?
+ # At least 1.X?
+ uplevel #0 $options(-errorcommand) [list versionincompatibility "document XML version \"$version\" is incompatible with XML version 1.0"]
+ }
+ } else {
+ uplevel #0 $options(-errorcommand) [list missingversion "XML Declaration missing version information around line $state(line)"]
+ }
+
+ # Get the encoding declaration
+ set encoding {}
+ regexp [format {[%s]*encoding[%s]*=[%s]*"([A-Za-z]([A-Za-z0-9._]|-)*)"[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard encoding
+ regexp [format {[%s]*encoding[%s]*=[%s]*'([A-Za-z]([A-Za-z0-9._]|-)*)'[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard encoding
+
+ # Get the standalone declaration
+ set standalone {}
+ regexp [format {[%s]*standalone[%s]*=[%s]*"(yes|no)"[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard standalone
+ regexp [format {[%s]*standalone[%s]*=[%s]*'(yes|no)'[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard standalone
+
+ # Invoke the callback
+ uplevel #0 $options(-xmldeclcommand) [list $version $encoding $standalone]
+
+ } elseif {$matches == 0} {
+ uplevel #0 $options(-errorcommand) [list illformeddeclaration "XML Declaration not well-formed around line $state(line)"]
+ } else {
+
+ # Invoke the callback
+ uplevel #0 $options(-xmldeclcommand) [list $version $encoding $standalone]
+
+ }
+
+ }
+
+ }
+
+ {\?*} {
+ # Processing instruction
+ set tag [string range $tag 1 end]
+ if {[regsub {\?$} $tag {} tag]} {
+ if {[string length [string trim $param]]} {
+ uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$param\" in processing instruction around line $state(line)"]
+ }
+ } elseif {![regexp ^$Name\$ $tag]} {
+ uplevel #0 $options(-errorcommand) [list illegalcharacter "illegal character in processing instruction target \"$tag\""]
+ } elseif {[regexp {[xX][mM][lL]} $tag]} {
+ uplevel #0 $options(-errorcommand) [list illegalcharacters "characters \"xml\" not permitted in processing instruction target \"$tag\""]
+ } elseif {![regsub {\?$} $param {} param]} {
+ uplevel #0 $options(-errorcommand) [list missingquestion "PI: expected '?' character around line $state(line)"]
+ }
+ set code [catch {uplevel #0 $options(-processinginstructioncommand) [list $tag [string trimleft $param]]} msg]
+ switch $code {
+ 0 {# OK}
+ 3 {
+ # break
+ return {}
+ }
+ 4 {
+ # continue
+ # skip sibling nodes
+ set state(continue:tag) [lindex $state(stack) end]
+ set state(continue:level) 1
+ set state(mode) continue
+ continue
+ }
+ default {
+ return -code $code -errorinfo $::errorInfo $msg
+ }
+ }
+ }
+
+ !DOCTYPE {
+ # External entity reference
+ # This should move into xml.tcl
+ # Parse the params supplied. Looking for Name, ExternalID and MarkupDecl
+ set matched [regexp ^[cl $Wsp]*($Name)[cl $Wsp]*(.*) $param x state(doc_name) param]
+ set state(doc_name) [Normalize $state(doc_name) $options(-normalize)]
+ set externalID {}
+ set pubidlit {}
+ set systemlit {}
+ set externalID {}
+ if {[regexp -nocase ^[cl $Wsp]*(SYSTEM|PUBLIC)(.*) $param x id param]} {
+ switch [string toupper $id] {
+ SYSTEM {
+ if {[regexp ^[cl $Wsp]+"([cl ^"]*)"(.*) $param x systemlit param] || [regexp ^[cl $Wsp]+'([cl ^']*)'(.*) $param x systemlit param]} {
+ set externalID [list SYSTEM $systemlit] ;# "
+ } else {
+ uplevel #0 $options(-errorcommand) {syntaxerror {syntax error: SYSTEM identifier not followed by literal}}
+ }
+ }
+ PUBLIC {
+ if {[regexp ^[cl $Wsp]+"([cl ^"]*)"(.*) $param x pubidlit param] || [regexp ^[cl $Wsp]+'([cl ^']*)'(.*) $param x pubidlit param]} {
+ if {[regexp ^[cl $Wsp]+"([cl ^"]*)"(.*) $param x systemlit param] || [regexp ^[cl $Wsp]+'([cl ^']*)'(.*) $param x systemlit param]} {
+ set externalID [list PUBLIC $pubidlit $systemlit]
+ } else {
+ uplevel #0 $options(-errorcommand) [list syntaxerror "syntax error: PUBLIC identifier not followed by system literal around line $state(line)"]
+ }
+ } else {
+ uplevel #0 $options(-errorcommand) [list syntaxerror "syntax error: PUBLIC identifier not followed by literal around line $state(line)"]
+ }
+ }
+ }
+ if {[regexp -nocase ^[cl $Wsp]+NDATA[cl $Wsp]+($Name)(.*) $param x notation param]} {
+ lappend externalID $notation
+ }
+ }
+
+ set state(inDTD) 1
+
+ ParseEvent:DocTypeDecl [array get options] $state(doc_name) $pubidlit $systemlit $options(-internaldtd)
+
+ set state(inDTD) 0
+
+ }
+
+ !--* {
+
+ # Start of a comment
+ # See if it ends in the same tag, otherwise change the
+ # parsing mode
+
+ regexp {!--(.*)} $tag discard comm1
+ if {[regexp ([cl ^-]*)--[cl $Wsp]*\$ $comm1 discard comm1_1]} {
+ # processed comment (end in tag)
+ uplevel #0 $options(-commentcommand) [list $comm1_1]
+ } elseif {[regexp ([cl ^-]*)--[cl $Wsp]*\$ $param discard comm2]} {
+ # processed comment (end in attributes)
+ uplevel #0 $options(-commentcommand) [list $comm1$comm2]
+ } elseif {[regexp ([cl ^-]*)-->(.*) $text discard comm2 text]} {
+ # processed comment (end in text)
+ uplevel #0 $options(-commentcommand) [list $comm1$param$empty>$comm2]
+ } else {
+ # start of comment
+ set state(mode) comment
+ set state(commentdata) "$comm1$param$empty>$text"
+ continue
+ }
+ }
+
+ {!\[CDATA\[*} {
+
+ regexp {!\[CDATA\[(.*)} $tag discard cdata1
+ if {[regexp {(.*)]]$} $cdata1 discard cdata2]} {
+ # processed CDATA (end in tag)
+ PCDATA [array get options] [subst -novariable -nocommand $cdata2]
+ set text [subst -novariable -nocommand $text]
+ } elseif {[regexp {(.*)]]$} $param discard cdata2]} {
+ # processed CDATA (end in attribute)
+ # Backslashes in param are quoted at this stage
+ PCDATA [array get options] $cdata1[subst -novariable -nocommand $cdata2]
+ set text [subst -novariable -nocommand $text]
+ } elseif {[regexp {(.*)]]>(.*)} $text discard cdata2 text]} {
+ # processed CDATA (end in text)
+ # Backslashes in param and text are quoted at this stage
+ PCDATA [array get options] $cdata1[subst -novariable -nocommand $param]$empty>[subst -novariable -nocommand $cdata2]
+ set text [subst -novariable -nocommand $text]
+ } else {
+ # start CDATA
+ set state(cdata) "$cdata1$param>$text"
+ set state(mode) cdata
+ continue
+ }
+
+ }
+
+ !ELEMENT -
+ !ATTLIST -
+ !ENTITY -
+ !NOTATION {
+ uplevel #0 $options(-errorcommand) [list illegaldeclaration "[string range $tag 1 end] declaration not expected in document instance around line $state(line)"]
+ }
+
+ default {
+ uplevel #0 $options(-errorcommand) [list unknowninstruction "unknown processing instruction \"<$tag>\" around line $state(line)"]
+ }
+ }
+ }
+ *,1,* -
+ *,0,/,/ {
+ # Syntax error
+ uplevel #0 $options(-errorcommand) [list syntaxerror "syntax error: closed/empty tag: tag $tag param $param empty $empty close $close around line $state(line)"]
+ }
+ }
+
+ # Process character data
+
+ if {$state(haveDocElement) && [llength $state(stack)]} {
+
+ # Check if the internal DTD entity is in the text
+ regsub -all &xml:intdtd\; $text \[$options(-internaldtd)\] text
+
+ # Look for entity references
+ if {([array size entities] || \
+ [string length $options(-entityreferencecommand)]) && \
+ $options(-defaultexpandinternalentities) && \
+ [regexp {&[^;]+;} $text]} {
+
+ # protect Tcl specials
+ # NB. braces and backslashes may already be protected
+ regsub -all {\\({|}|\\)} $text {\1} text
+ regsub -all {([][$\\{}])} $text {\\\1} text
+
+ # Mark entity references
+ regsub -all {&([^;]+);} $text [format {%s; %s {\1} ; %s %s} \}\} [namespace code [list Entity [array get options] $options(-entityreferencecommand) [namespace code [list PCDATA [array get options]]] $options(entities)]] [namespace code [list DeProtect [namespace code [list PCDATA [array get options]]]]] \{\{] text
+ set text "uplevel #0 [namespace code [list DeProtect1 [namespace code [list PCDATA [array get options]]]]] {{$text}}"
+ eval $text
+ } else {
+
+ # Restore protected special characters
+ regsub -all {\\([][{}\\])} $text {\1} text
+ PCDATA [array get options] $text
+ }
+ } elseif {[string length [string trim $text]]} {
+ uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$text\" in document prolog around line $state(line)"]
+ }
+
+ }
+
+ # If this is the end of the document, close all open containers
+ if {$options(-final) && [llength $state(stack)]} {
+ eval $options(-errorcommand) [list unclosedelement "element [lindex $state(stack) end] remains unclosed around line $state(line)"]
+ }
+
+ return {}
+}
+
+# sgml::DeProtect --
+#
+# Invoke given command after removing protecting backslashes
+# from given text.
+#
+# Arguments:
+# cmd Command to invoke
+# text Text to deprotect
+#
+# Results:
+# Depends on command
+
+proc sgml::DeProtect1 {cmd text} {
+ if {[string compare {} $text]} {
+ regsub -all {\\([]$[{}\\])} $text {\1} text
+ uplevel #0 $cmd [list $text]
+ }
+}
+proc sgml::DeProtect {cmd text} {
+ set text [lindex $text 0]
+ if {[string compare {} $text]} {
+ regsub -all {\\([]$[{}\\])} $text {\1} text
+ uplevel #0 $cmd [list $text]
+ }
+}
+
+# sgml::ParserDelete --
+#
+# Free all memory associated with parser
+#
+# Arguments:
+# var global state array
+#
+# Results:
+# Variables unset
+
+proc sgml::ParserDelete var {
+ upvar #0 $var state
+
+ if {![info exists state]} {
+ return -code error "unknown parser"
+ }
+
+ catch {unset $state(entities)}
+ catch {unset $state(parameterentities)}
+ catch {unset $state(elementdecls)}
+ catch {unset $state(attlistdecls)}
+ catch {unset $state(notationdecls)}
+ catch {unset $state(namespaces)}
+
+ unset state
+
+ return {}
+}
+
+# sgml::ParseEvent:ElementOpen --
+#
+# Start of an element.
+#
+# Arguments:
+# tag Element name
+# attr Attribute list
+# opts Options
+# args further configuration options
+#
+# Options:
+# -empty boolean
+# indicates whether the element was an empty element
+#
+# Results:
+# Modify state and invoke callback
+
+proc sgml::ParseEvent:ElementOpen {tag attr opts args} {
+ variable Name
+ variable Wsp
+
+ array set options $opts
+ upvar #0 $options(-statevariable) state
+ array set cfg {-empty 0}
+ array set cfg $args
+ set handleEmpty 0
+
+ if {$options(-normalize)} {
+ set tag [string toupper $tag]
+ }
+
+ # Update state
+ lappend state(stack) $tag
+
+ # Parse attribute list into a key-value representation
+ if {[string compare $options(-parseattributelistcommand) {}]} {
+ if {[catch {uplevel #0 $options(-parseattributelistcommand) [list $opts $attr]} attr]} {
+ if {[string compare [lindex $attr 0] "unterminated attribute value"]} {
+ uplevel #0 $options(-errorcommand) [list unterminatedattribute "$attr around line $state(line)"]
+ set attr {}
+ } else {
+
+ # It is most likely that a ">" character was in an attribute value.
+ # This manifests itself by ">" appearing in the element's text.
+ # In this case the callback should return a three element list;
+ # the message "unterminated attribute value", the attribute list it
+ # did manage to parse and the remainder of the attribute list.
+
+ foreach {msg attlist brokenattr} $attr break
+
+ upvar text elemText
+ if {[string first > $elemText] >= 0} {
+
+ # Now piece the attribute list back together
+ regexp [cl $Wsp]*($Name)[cl $Wsp]*=[cl $Wsp]*("|')(.*) $brokenattr discard attname delimiter attvalue
+ regexp (.*)>([cl ^>]*)\$ $elemText discard remattlist elemText
+ regexp ([cl ^$delimiter]*)${delimiter}(.*) $remattlist discard remattvalue remattlist
+
+ # Gotcha: watch out for empty element syntax
+ if {[string match */ [string trimright $remattlist]]} {
+ set remattlist [string range $remattlist 0 end-1]
+ set handleEmpty 1
+ set cfg(-empty) 1
+ }
+
+ append attvalue >$remattvalue
+ lappend attlist $attname $attvalue
+
+ # Complete parsing the attribute list
+ if {[catch {uplevel #0 $options(-parseattributelistcommand) [list $options(-statevariable) $remattlist]} attr]} {
+ uplevel #0 $options(-errorcommand) [list unterminatedattribute "$attr around line $state(line)"]
+ set attr {}
+ set attlist {}
+ } else {
+ eval lappend attlist $attr
+ }
+
+ set attr $attlist
+
+ } else {
+ uplevel #0 $options(-errorcommand) [list unterminatedattribute "$attr around line $state(line)"]
+ set attr {}
+ }
+ }
+ }
+ }
+
+ set empty {}
+ if {$cfg(-empty) && $options(-reportempty)} {
+ set empty {-empty 1}
+ }
+
+ # Check for namespace declarations
+ upvar #0 $options(namespaces) namespaces
+ set nsdecls {}
+ if {[llength $attr]} {
+ array set attrlist $attr
+ foreach {attrName attrValue} [array get attrlist xmlns*] {
+ unset attrlist($attrName)
+ set colon [set prefix {}]
+ if {[regexp {^xmlns(:(.+))?$} $attrName discard colon prefix]} {
+ switch -glob [string length $colon],[string length $prefix] {
+ 0,0 {
+ # default NS declaration
+ lappend state(defaultNSURI) $attrValue
+ lappend state(defaultNS) [llength $state(stack)]
+ lappend nsdecls $attrValue {}
+ }
+ 0,* {
+ # Huh?
+ }
+ *,0 {
+ # Error
+ uplevel #0 $state(-warningcommand) "no prefix specified for namespace URI \"$attrValue\" in element \"$tag\""
+ }
+ default {
+ set namespaces($prefix,[llength $state(stack)]) $attrValue
+ lappend nsdecls $attrValue $prefix
+ }
+ }
+ }
+ }
+ if {[llength $nsdecls]} {
+ set nsdecls [list -namespacedecls $nsdecls]
+ }
+ set attr [array get attrlist]
+ }
+
+ # Check whether this element has an expanded name
+ set ns {}
+ if {[regexp {([^:]+):(.*)$} $tag discard prefix tag]} {
+ set nsspec [lsort -dictionary -decreasing [array names namespaces $prefix,*]]
+ if {[llength $nsspec]} {
+ set nsuri $namespaces([lindex $nsspec 0])
+ set ns [list -namespace $nsuri]
+ } else {
+ uplevel #0 $options(-errorcommand) [list namespaceundeclared "no namespace declared for prefix \"$prefix\" in element $tag"]
+ }
+ } elseif {[llength $state(defaultNSURI)]} {
+ set ns [list -namespace [lindex $state(defaultNSURI) end]]
+ }
+
+ # Invoke callback
+ set code [catch {uplevel #0 $options(-elementstartcommand) [list $tag $attr] $empty $ns $nsdecls} msg]
+
+ # Sometimes empty elements must be handled here (see above)
+ if {$code == 0 && $handleEmpty} {
+ ParseEvent:ElementClose $tag $opts -empty 1
+ }
+
+ return -code $code -errorinfo $::errorInfo $msg
+}
+
+# sgml::ParseEvent:ElementClose --
+#
+# End of an element.
+#
+# Arguments:
+# tag Element name
+# opts Options
+# args further configuration options
+#
+# Options:
+# -empty boolean
+# indicates whether the element as an empty element
+#
+# Results:
+# Modify state and invoke callback
+
+proc sgml::ParseEvent:ElementClose {tag opts args} {
+ array set options $opts
+ upvar #0 $options(-statevariable) state
+ array set cfg {-empty 0}
+ array set cfg $args
+
+ # WF check
+ if {[string compare $tag [lindex $state(stack) end]]} {
+ uplevel #0 $options(-errorcommand) [list illegalendtag "end tag \"$tag\" does not match open element \"[lindex $state(stack) end]\" around line $state(line)"]
+ return
+ }
+
+ # Check whether this element has an expanded name
+ upvar #0 $options(namespaces) namespaces
+ set ns {}
+ if {[regexp {([^:]+):(.*)$} $tag discard prefix tag]} {
+ set nsuri $namespaces([lindex [lsort -dictionary -decreasing [array names namespaces $prefix,*]] 0])
+ set ns [list -namespace $nsuri]
+ } elseif {[llength $state(defaultNSURI)]} {
+ set ns [list -namespace [lindex $state(defaultNSURI) end]]
+ }
+
+ # Pop namespace stacks, if any
+ if {[llength $state(defaultNS)]} {
+ if {[llength $state(stack)] == [lindex $state(defaultNS) end]} {
+ set state(defaultNS) [lreplace $state(defaultNS) end end]
+ }
+ }
+ foreach nsspec [array names namespaces *,[llength $state(stack)]] {
+ unset namespaces($nsspec)
+ }
+
+ # Update state
+ set state(stack) [lreplace $state(stack) end end]
+
+ set empty {}
+ if {$cfg(-empty) && $options(-reportempty)} {
+ set empty {-empty 1}
+ }
+
+ # Invoke callback
+ # Mats: Shall be same as sgml::ParseEvent:ElementOpen to handle exceptions in callback.
+ set code [catch {uplevel #0 $options(-elementendcommand) [list $tag] $empty $ns} msg]
+ return -code $code -errorinfo $::errorInfo $msg
+}
+
+# sgml::PCDATA --
+#
+# Process PCDATA before passing to application
+#
+# Arguments:
+# opts options
+# pcdata Character data to be processed
+#
+# Results:
+# Checks that characters are legal,
+# checks -ignorewhitespace setting.
+
+proc sgml::PCDATA {opts pcdata} {
+ array set options $opts
+
+ if {$options(-ignorewhitespace) && \
+ ![string length [string trim $pcdata]]} {
+ return {}
+ }
+
+ if {![regexp ^[cl $::sgml::Char]*\$ $pcdata]} {
+ upvar \#0 $options(-statevariable) state
+ uplevel \#0 $options(-errorcommand) [list illegalcharacters "illegal, non-Unicode characters found in text \"$pcdata\" around line $state(line)"]
+ }
+
+ uplevel \#0 $options(-characterdatacommand) [list $pcdata]
+}
+
+# sgml::Normalize --
+#
+# Perform name normalization if required
+#
+# Arguments:
+# name name to normalize
+# req normalization required
+#
+# Results:
+# Name returned as upper-case if normalization required
+
+proc sgml::Normalize {name req} {
+ if {$req} {
+ return [string toupper $name]
+ } else {
+ return $name
+ }
+}
+
+# sgml::Entity --
+#
+# Resolve XML entity references (syntax: &xxx;).
+#
+# Arguments:
+# opts options
+# entityrefcmd application callback for entity references
+# pcdatacmd application callback for character data
+# entities name of array containing entity definitions.
+# ref entity reference (the "xxx" bit)
+#
+# Results:
+# Returns substitution text for given entity.
+
+proc sgml::Entity {opts entityrefcmd pcdatacmd entities ref} {
+ array set options $opts
+ upvar #0 $options(-statevariable) state
+
+ if {![string length $entities]} {
+ set entities [namespace current]::EntityPredef
+ }
+
+ # SRB: Bug fix 2008-11-18 #812051: surround case labels in braces for compatibility with Freewrap
+ switch -glob -- $ref {
+ {%*} {
+ # Parameter entity - not recognised outside of a DTD
+ }
+ {#x*} {
+ # Character entity - hex
+ if {[catch {format %c [scan [string range $ref 2 end] %x tmp; set tmp]} char]} {
+ return -code error "malformed character entity \"$ref\""
+ }
+ uplevel #0 $pcdatacmd [list $char]
+
+ return {}
+
+ }
+ {#*} {
+ # Character entity - decimal
+ if {[catch {format %c [scan [string range $ref 1 end] %d tmp; set tmp]} char]} {
+ return -code error "malformed character entity \"$ref\""
+ }
+ uplevel #0 $pcdatacmd [list $char]
+
+ return {}
+
+ }
+ default {
+ # General entity
+ upvar #0 $entities map
+ if {[info exists map($ref)]} {
+
+ if {![regexp {<|&} $map($ref)]} {
+
+ # Simple text replacement - optimise
+ uplevel #0 $pcdatacmd [list $map($ref)]
+
+ return {}
+
+ }
+
+ # Otherwise an additional round of parsing is required.
+ # This only applies to XML, since HTML doesn't have general entities
+
+ # Must parse the replacement text for start & end tags, etc
+ # This text must be self-contained: balanced closing tags, and so on
+
+ set tokenised [tokenise $map($ref) $::xml::tokExpr $::xml::substExpr]
+ set options(-final) 0
+ eval parseEvent [list $tokenised] [array get options]
+
+ return {}
+
+ } elseif {[string compare $entityrefcmd "::sgml::noop"]} {
+
+ set result [uplevel #0 $entityrefcmd [list $ref]]
+
+ if {[string length $result]} {
+ uplevel #0 $pcdatacmd [list $result]
+ }
+
+ return {}
+
+ } else {
+
+ # Reconstitute entity reference
+
+ uplevel #0 $options(-errorcommand) [list illegalentity "undefined entity reference \"$ref\""]
+
+ return {}
+
+ }
+ }
+ }
+
+ # If all else fails leave the entity reference untouched
+ uplevel #0 $pcdatacmd [list &$ref\;]
+
+ return {}
+}
+
+####################################
+#
+# DTD parser for SGML (XML).
+#
+# This DTD actually only handles XML DTDs. Other language's
+# DTD's, such as HTML, must be written in terms of a XML DTD.
+#
+####################################
+
+# sgml::ParseEvent:DocTypeDecl --
+#
+# Entry point for DTD parsing
+#
+# Arguments:
+# opts configuration options
+# docEl document element name
+# pubId public identifier
+# sysId system identifier (a URI)
+# intSSet internal DTD subset
+
+proc sgml::ParseEvent:DocTypeDecl {opts docEl pubId sysId intSSet} {
+ array set options {}
+ array set options $opts
+
+ set code [catch {uplevel #0 $options(-doctypecommand) [list $docEl $pubId $sysId $intSSet]} err]
+ switch $code {
+ 3 {
+ # break
+ return {}
+ }
+ 0 -
+ 4 {
+ # continue
+ }
+ default {
+ return -code $code $err
+ }
+ }
+
+ # Otherwise we'll parse the DTD and report it piecemeal
+
+ # The internal DTD subset is processed first (XML 2.8)
+ # During this stage, parameter entities are only allowed
+ # between markup declarations
+
+ ParseDTD:Internal [array get options] $intSSet
+
+ # The external DTD subset is processed last (XML 2.8)
+ # During this stage, parameter entities may occur anywhere
+
+ # We must resolve the external identifier to obtain the
+ # DTD data. The application may supply its own resolver.
+
+ if {[string length $pubId] || [string length $sysId]} {
+ uplevel #0 $options(-externalentitycommand) [list $options(-cmd) $options(-baseuri) $sysId $pubId]
+ }
+
+ return {}
+}
+
+# sgml::ParseDTD:Internal --
+#
+# Parse the internal DTD subset.
+#
+# Parameter entities are only allowed between markup declarations.
+#
+# Arguments:
+# opts configuration options
+# dtd DTD data
+#
+# Results:
+# Markup declarations parsed may cause callback invocation
+
+proc sgml::ParseDTD:Internal {opts dtd} {
+ variable MarkupDeclExpr
+ variable MarkupDeclSub
+
+ array set options {}
+ array set options $opts
+
+ upvar #0 $options(-statevariable) state
+ upvar #0 $options(parameterentities) PEnts
+ upvar #0 $options(externalparameterentities) ExtPEnts
+
+ # Bug 583947: remove comments before further processing
+ regsub -all {<!--.*?-->} $dtd {} dtd
+
+ # Tokenize the DTD
+
+ # Protect Tcl special characters
+ regsub -all {([{}\\])} $dtd {\\\1} dtd
+
+ regsub -all $MarkupDeclExpr $dtd $MarkupDeclSub dtd
+
+ # Entities may have angle brackets in their replacement
+ # text, which breaks the RE processing. So, we must
+ # use a similar technique to processing doc instances
+ # to rebuild the declarations from the pieces
+
+ set mode {} ;# normal
+ set delimiter {}
+ set name {}
+ set param {}
+
+ set state(inInternalDTD) 1
+
+ # Process the tokens
+ foreach {decl value text} [lrange "{} {} \{$dtd\}" 3 end] {
+
+ # Keep track of line numbers
+ incr state(line) [regsub -all \n $text {} discard]
+
+ ParseDTD:EntityMode [array get options] mode replText decl value text $delimiter $name $param
+
+ ParseDTD:ProcessMarkupDecl [array get options] decl value delimiter name mode replText text param
+
+ # There may be parameter entity references between markup decls
+
+ if {[regexp {%.*;} $text]} {
+
+ # Protect Tcl special characters
+ regsub -all {([{}\\])} $text {\\\1} text
+
+ regsub -all %($::sgml::Name)\; $text "\} {\\1} \{" text
+
+ set PElist "\{$text\}"
+ set PElist [lreplace $PElist end end]
+ foreach {text entref} $PElist {
+ if {[string length [string trim $text]]} {
+ uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text in internal DTD subset around line $state(line)"]
+ }
+
+ # Expand parameter entity and recursively parse
+ # BUG: no checks yet for recursive entity references
+
+ if {[info exists PEnts($entref)]} {
+ set externalParser [$options(-cmd) entityparser]
+ $externalParser parse $PEnts($entref) -dtdsubset internal
+ } elseif {[info exists ExtPEnts($entref)]} {
+ set externalParser [$options(-cmd) entityparser]
+ $externalParser parse $ExtPEnts($entref) -dtdsubset external
+ #$externalParser free
+ } else {
+ uplevel #0 $options(-errorcommand) [list illegalreference "reference to undeclared parameter entity \"$entref\""]
+ }
+ }
+
+ }
+
+ }
+
+ return {}
+}
+
+# sgml::ParseDTD:EntityMode --
+#
+# Perform special processing for various parser modes
+#
+# Arguments:
+# opts configuration options
+# modeVar pass-by-reference mode variable
+# replTextVar pass-by-ref
+# declVar pass-by-ref
+# valueVar pass-by-ref
+# textVar pass-by-ref
+# delimiter delimiter currently in force
+# name
+# param
+#
+# Results:
+# Depends on current mode
+
+proc sgml::ParseDTD:EntityMode {opts modeVar replTextVar declVar valueVar textVar delimiter name param} {
+ upvar 1 $modeVar mode
+ upvar 1 $replTextVar replText
+ upvar 1 $declVar decl
+ upvar 1 $valueVar value
+ upvar 1 $textVar text
+ array set options $opts
+
+ switch $mode {
+ {} {
+ # Pass through to normal processing section
+ }
+ entity {
+ # Look for closing delimiter
+ if {[regexp ([cl ^$delimiter]*)${delimiter}(.*) $decl discard val1 remainder]} {
+ append replText <$val1
+ DTD:ENTITY [array get options] $name [string trim $param] $delimiter$replText$delimiter
+ set decl /
+ set text $remainder\ $value>$text
+ set value {}
+ set mode {}
+ } elseif {[regexp ([cl ^$delimiter]*)${delimiter}(.*) $value discard val2 remainder]} {
+ append replText <$decl\ $val2
+ DTD:ENTITY [array get options] $name [string trim $param] $delimiter$replText$delimiter
+ set decl /
+ set text $remainder>$text
+ set value {}
+ set mode {}
+ } elseif {[regexp ([cl ^$delimiter]*)${delimiter}(.*) $text discard val3 remainder]} {
+ append replText <$decl\ $value>$val3
+ DTD:ENTITY [array get options] $name [string trim $param] $delimiter$replText$delimiter
+ set decl /
+ set text $remainder
+ set value {}
+ set mode {}
+ } else {
+
+ # Remain in entity mode
+ append replText <$decl\ $value>$text
+ return -code continue
+
+ }
+ }
+
+ ignore {
+ upvar #0 $options(-statevariable) state
+
+ if {[regexp {]](.*)$} $decl discard remainder]} {
+ set state(condSections) [lreplace $state(condSections) end end]
+ set decl $remainder
+ set mode {}
+ } elseif {[regexp {]](.*)$} $value discard remainder]} {
+ set state(condSections) [lreplace $state(condSections) end end]
+ regexp <[cl $::sgml::Wsp]*($::sgml::Name)(.*) $remainder discard decl value
+ set mode {}
+ } elseif {[regexp {]]>(.*)$} $text discard remainder]} {
+ set state(condSections) [lreplace $state(condSections) end end]
+ set decl /
+ set value {}
+ set text $remainder
+ #regexp <[cl $::sgml::Wsp]*($::sgml::Name)([cl ^>]*)>(.*) $remainder discard decl value text
+ set mode {}
+ } else {
+ set decl /
+ }
+
+ }
+
+ comment {
+ # Look for closing comment delimiter
+
+ upvar #0 $options(-statevariable) state
+
+ if {[regexp (.*?)--(.*)\$ $decl discard data1 remainder]} {
+ } elseif {[regexp (.*?)--(.*)\$ $value discard data1 remainder]} {
+ } elseif {[regexp (.*?)--(.*)\$ $text discard data1 remainder]} {
+ } else {
+ # comment continues
+ append state(commentdata) <$decl\ $value>$text
+ set decl /
+ set value {}
+ set text {}
+ }
+ }
+
+ }
+
+ return {}
+}
+
+# sgml::ParseDTD:ProcessMarkupDecl --
+#
+# Process a single markup declaration
+#
+# Arguments:
+# opts configuration options
+# declVar pass-by-ref
+# valueVar pass-by-ref
+# delimiterVar pass-by-ref for current delimiter in force
+# nameVar pass-by-ref
+# modeVar pass-by-ref for current parser mode
+# replTextVar pass-by-ref
+# textVar pass-by-ref
+# paramVar pass-by-ref
+#
+# Results:
+# Depends on markup declaration. May change parser mode
+
+proc sgml::ParseDTD:ProcessMarkupDecl {opts declVar valueVar delimiterVar nameVar modeVar replTextVar textVar paramVar} {
+ upvar 1 $modeVar mode
+ upvar 1 $replTextVar replText
+ upvar 1 $textVar text
+ upvar 1 $declVar decl
+ upvar 1 $valueVar value
+ upvar 1 $nameVar name
+ upvar 1 $delimiterVar delimiter
+ upvar 1 $paramVar param
+
+ variable declExpr
+ variable ExternalEntityExpr
+
+ array set options $opts
+ upvar #0 $options(-statevariable) state
+
+ switch -glob -- $decl {
+
+ / {
+ # continuation from entity processing
+ }
+
+ !ELEMENT {
+ # Element declaration
+ if {[regexp $declExpr $value discard tag cmodel]} {
+ DTD:ELEMENT [array get options] $tag $cmodel
+ } else {
+ uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed element declaration around line $state(line)"]
+ }
+ }
+
+ !ATTLIST {
+ # Attribute list declaration
+ variable declExpr
+ if {[regexp $declExpr $value discard tag attdefns]} {
+ if {[catch {DTD:ATTLIST [array get options] $tag $attdefns} err]} {
+ #puts stderr "Stack trace: $::errorInfo\n***\n"
+ # Atttribute parsing has bugs at the moment
+ #return -code error "$err around line $state(line)"
+ return {}
+ }
+ } else {
+ uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed attribute list declaration around line $state(line)"]
+ }
+ }
+
+ !ENTITY {
+ # Entity declaration
+ variable EntityExpr
+
+ if {[regexp $EntityExpr $value discard param name value]} {
+
+ # Entity replacement text may have a '>' character.
+ # In this case, the real delimiter will be in the following
+ # text. This is complicated by the possibility of there
+ # being several '<','>' pairs in the replacement text.
+ # At this point, we are searching for the matching quote delimiter.
+
+ if {[regexp $ExternalEntityExpr $value]} {
+ DTD:ENTITY [array get options] $name [string trim $param] $value
+ } elseif {[regexp ("|')(.*?)\\1(.*) $value discard delimiter replText value]} {
+
+ if {[string length [string trim $value]]} {
+ uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed entity declaration around line $state(line)"]
+ } else {
+ DTD:ENTITY [array get options] $name [string trim $param] $delimiter$replText$delimiter
+ }
+ } elseif {[regexp ("|')(.*) $value discard delimiter replText]} {
+ append replText >$text
+ set text {}
+ set mode entity
+ } else {
+ uplevel #0 $options(-errorcommand) [list illegaldeclaration "no delimiter for entity declaration around line $state(line)"]
+ }
+
+ } else {
+ uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed entity declaration around line $state(line)"]
+ }
+ }
+
+ !NOTATION {
+ # Notation declaration
+ if {[regexp $declExpr param discard tag notation]} {
+ DTD:ENTITY [array get options] $tag $notation
+ } else {
+ uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed entity declaration around line $state(line)"]
+ }
+ }
+
+ !--* {
+ # Start of a comment
+
+ if {[regexp !--(.*?)--\$ $decl discard data]} {
+ if {[string length [string trim $value]]} {
+ uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$value\""]
+ }
+ uplevel #0 $options(-commentcommand) [list $data]
+ set decl /
+ set value {}
+ } elseif {[regexp -- ^(.*?)--\$ $value discard data2]} {
+ regexp !--(.*)\$ $decl discard data1
+ uplevel #0 $options(-commentcommand) [list $data1\ $data2]
+ set decl /
+ set value {}
+ } elseif {[regexp (.*?)-->(.*)\$ $text discard data3 remainder]} {
+ regexp !--(.*)\$ $decl discard data1
+ uplevel #0 $options(-commentcommand) [list $data1\ $value>$data3]
+ set decl /
+ set value {}
+ set text $remainder
+ } else {
+ regexp !--(.*)\$ $decl discard data1
+ set state(commentdata) $data1\ $value>$text
+ set decl /
+ set value {}
+ set text {}
+ set mode comment
+ }
+ }
+
+ !*INCLUDE* -
+ !*IGNORE* {
+ if {$state(inInternalDTD)} {
+ uplevel #0 $options(-errorcommand) [list illegalsection "conditional section not permitted in internal DTD subset around line $state(line)"]
+ }
+
+ if {[regexp {^!\[INCLUDE\[(.*)} $decl discard remainder]} {
+ # Push conditional section stack, popped by ]]> sequence
+
+ if {[regexp {(.*?)]]$} $remainder discard r2]} {
+ # section closed immediately
+ if {[string length [string trim $r2]]} {
+ uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r2\" in conditional section"]
+ }
+ } elseif {[regexp {(.*?)]](.*)} $value discard r2 r3]} {
+ # section closed immediately
+ if {[string length [string trim $r2]]} {
+ uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r2\" in conditional section"]
+ }
+ if {[string length [string trim $r3]]} {
+ uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r3\" in conditional section"]
+ }
+ } else {
+
+ lappend state(condSections) INCLUDE
+
+ set parser [$options(-cmd) entityparser]
+ $parser parse $remainder\ $value> -dtdsubset external
+ #$parser free
+
+ if {[regexp {(.*?)]]>(.*)} $text discard t1 t2]} {
+ if {[string length [string trim $t1]]} {
+ uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$t1\""]
+ }
+ if {![llength $state(condSections)]} {
+ uplevel #0 $options(-errorcommand) [list illegalsection "extraneous conditional section close"]
+ }
+ set state(condSections) [lreplace $state(condSections) end end]
+ set text $t2
+ }
+
+ }
+ } elseif {[regexp {^!\[IGNORE\[(.*)} $decl discard remainder]} {
+ # Set ignore mode. Still need a stack
+ set mode ignore
+
+ if {[regexp {(.*?)]]$} $remainder discard r2]} {
+ # section closed immediately
+ if {[string length [string trim $r2]]} {
+ uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r2\" in conditional section"]
+ }
+ } elseif {[regexp {(.*?)]](.*)} $value discard r2 r3]} {
+ # section closed immediately
+ if {[string length [string trim $r2]]} {
+ uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r2\" in conditional section"]
+ }
+ if {[string length [string trim $r3]]} {
+ uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r3\" in conditional section"]
+ }
+ } else {
+
+ lappend state(condSections) IGNORE
+
+ if {[regexp {(.*?)]]>(.*)} $text discard t1 t2]} {
+ if {[string length [string trim $t1]]} {
+ uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$t1\""]
+ }
+ if {![llength $state(condSections)]} {
+ uplevel #0 $options(-errorcommand) [list illegalsection "extraneous conditional section close"]
+ }
+ set state(condSections) [lreplace $state(condSections) end end]
+ set text $t2
+ }
+
+ }
+ } else {
+ uplevel #0 $options(-errorcommand) [list illegaldeclaration "illegal markup declaration \"$decl\" around line $state(line)"]
+ }
+
+ }
+
+ default {
+ if {[regexp {^\?(.*)} $decl discard target]} {
+ # Processing instruction
+ } else {
+ uplevel #0 $options(-errorcommand) [list illegaldeclaration "illegal markup declaration \"$decl\""]
+ }
+ }
+ }
+
+ return {}
+}
+
+# sgml::ParseDTD:External --
+#
+# Parse the external DTD subset.
+#
+# Parameter entities are allowed anywhere.
+#
+# Arguments:
+# opts configuration options
+# dtd DTD data
+#
+# Results:
+# Markup declarations parsed may cause callback invocation
+
+proc sgml::ParseDTD:External {opts dtd} {
+ variable MarkupDeclExpr
+ variable MarkupDeclSub
+ variable declExpr
+
+ array set options $opts
+ upvar #0 $options(parameterentities) PEnts
+ upvar #0 $options(externalparameterentities) ExtPEnts
+ upvar #0 $options(-statevariable) state
+
+ # As with the internal DTD subset, watch out for
+ # entities with angle brackets
+ set mode {} ;# normal
+ set delimiter {}
+ set name {}
+ set param {}
+
+ set oldState 0
+ catch {set oldState $state(inInternalDTD)}
+ set state(inInternalDTD) 0
+
+ # Initialise conditional section stack
+ if {![info exists state(condSections)]} {
+ set state(condSections) {}
+ }
+ set startCondSectionDepth [llength $state(condSections)]
+
+ while {[string length $dtd]} {
+ set progress 0
+ set PEref {}
+ if {![string compare $mode "ignore"]} {
+ set progress 1
+ if {[regexp {]]>(.*)} $dtd discard dtd]} {
+ set remainder {}
+ set mode {} ;# normal
+ set state(condSections) [lreplace $state(condSections) end end]
+ continue
+ } else {
+ uplevel #0 $options(-errorcommand) [list missingdelimiter "IGNORE conditional section closing delimiter not found"]
+ }
+ } elseif {[regexp ^(.*?)%($::sgml::Name)\;(.*)\$ $dtd discard data PEref remainder]} {
+ set progress 1
+ } else {
+ set data $dtd
+ set dtd {}
+ set remainder {}
+ }
+
+ # Tokenize the DTD (so far)
+
+ # Protect Tcl special characters
+ regsub -all {([{}\\])} $data {\\\1} dataP
+
+ set n [regsub -all $MarkupDeclExpr $dataP $MarkupDeclSub dataP]
+
+ if {$n} {
+ set progress 1
+ # All but the last markup declaration should have no text
+ set dataP [lrange "{} {} \{$dataP\}" 3 end]
+ if {[llength $dataP] > 3} {
+ foreach {decl value text} [lrange $dataP 0 [expr [llength $dataP] - 4]] {
+ ParseDTD:EntityMode [array get options] mode replText decl value text $delimiter $name $param
+ ParseDTD:ProcessMarkupDecl [array get options] decl value delimiter name mode repltextVar text param
+
+ if {[string length [string trim $text]]} {
+ # check for conditional section close
+ if {[regexp {]]>(.*)$} $text discard text]} {
+ if {[string length [string trim $text]]} {
+ uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$text\""]
+ }
+ if {![llength $state(condSections)]} {
+ uplevel #0 $options(-errorcommand) [list illegalsection "extraneous conditional section close"]
+ }
+ set state(condSections) [lreplace $state(condSections) end end]
+ if {![string compare $mode "ignore"]} {
+ set mode {} ;# normal
+ }
+ } else {
+ uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$text\""]
+ }
+ }
+ }
+ }
+ # Do the last declaration
+ foreach {decl value text} [lrange $dataP [expr [llength $dataP] - 3] end] {
+ ParseDTD:EntityMode [array get options] mode replText decl value text $delimiter $name $param
+ ParseDTD:ProcessMarkupDecl [array get options] decl value delimiter name mode repltextVar text param
+ }
+ }
+
+ # Now expand the PE reference, if any
+ switch -glob $mode,[string length $PEref],$n {
+ ignore,0,* {
+ set dtd $text
+ }
+ ignore,*,* {
+ set dtd $text$remainder
+ }
+ *,0,0 {
+ set dtd $data
+ }
+ *,0,* {
+ set dtd $text
+ }
+ *,*,0 {
+ if {[catch {append data $PEnts($PEref)}]} {
+ if {[info exists ExtPEnts($PEref)]} {
+ set externalParser [$options(-cmd) entityparser]
+ $externalParser parse $ExtPEnts($PEref) -dtdsubset external
+ #$externalParser free
+ } else {
+ uplevel #0 $options(-errorcommand) [list entityundeclared "parameter entity \"$PEref\" not declared"]
+ }
+ }
+ set dtd $data$remainder
+ }
+ default {
+ if {[catch {append text $PEnts($PEref)}]} {
+ if {[info exists ExtPEnts($PEref)]} {
+ set externalParser [$options(-cmd) entityparser]
+ $externalParser parse $ExtPEnts($PEref) -dtdsubset external
+ #$externalParser free
+ } else {
+ uplevel #0 $options(-errorcommand) [list entityundeclared "parameter entity \"$PEref\" not declared"]
+ }
+ }
+ set dtd $text$remainder
+ }
+ }
+
+ # Check whether a conditional section has been terminated
+ if {[regexp {^(.*?)]]>(.*)$} $dtd discard t1 t2]} {
+ if {![regexp <.*> $t1]} {
+ if {[string length [string trim $t1]]} {
+ uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$t1\""]
+ }
+ if {![llength $state(condSections)]} {
+ uplevel #0 $options(-errorcommand) [list illegalsection "extraneous conditional section close"]
+ }
+ set state(condSections) [lreplace $state(condSections) end end]
+ if {![string compare $mode "ignore"]} {
+ set mode {} ;# normal
+ }
+ set dtd $t2
+ set progress 1
+ }
+ }
+
+ if {!$progress} {
+ # No parameter entity references were found and
+ # the text does not contain a well-formed markup declaration
+ # Avoid going into an infinite loop
+ upvar #0 $options(-errorcommand) [list syntaxerror "external entity does not contain well-formed markup declaration"]
+ break
+ }
+ }
+
+ set state(inInternalDTD) $oldState
+
+ # Check that conditional sections have been closed properly
+ if {[llength $state(condSections)] > $startCondSectionDepth} {
+ uplevel #0 $options(-errorcommand) [list syntaxerror "[lindex $state(condSections) end] conditional section not closed"]
+ }
+ if {[llength $state(condSections)] < $startCondSectionDepth} {
+ uplevel #0 $options(-errorcommand) [list syntaxerror "too many conditional section closures"]
+ }
+
+ return {}
+}
+
+# Procedures for handling the various declarative elements in a DTD.
+# New elements may be added by creating a procedure of the form
+# parse:DTD:_element_
+
+# For each of these procedures, the various regular expressions they use
+# are created outside of the proc to avoid overhead at runtime
+
+# sgml::DTD:ELEMENT --
+#
+# <!ELEMENT ...> defines an element.
+#
+# The content model for the element is stored in the contentmodel array,
+# indexed by the element name. The content model is parsed into the
+# following list form:
+#
+# {} Content model is EMPTY.
+# Indicated by an empty list.
+# * Content model is ANY.
+# Indicated by an asterix.
+# {ELEMENT ...}
+# Content model is element-only.
+# {MIXED {element1 element2 ...}}
+# Content model is mixed (PCDATA and elements).
+# The second element of the list contains the
+# elements that may occur. #PCDATA is assumed
+# (ie. the list is normalised).
+#
+# Arguments:
+# opts configuration options
+# name element GI
+# modspec unparsed content model specification
+
+proc sgml::DTD:ELEMENT {opts name modspec} {
+ variable Wsp
+ array set options $opts
+
+ upvar #0 $options(elementdecls) elements
+
+ if {$options(-validate) && [info exists elements($name)]} {
+ eval $options(-errorcommand) [list elementdeclared "element \"$name\" already declared"]
+ } else {
+ switch -- $modspec {
+ EMPTY {
+ set elements($name) {}
+ uplevel #0 $options(-elementdeclcommand) $name {{}}
+ }
+ ANY {
+ set elements($name) *
+ uplevel #0 $options(-elementdeclcommand) $name *
+ }
+ default {
+ # Don't parse the content model for now,
+ # just pass the model to the application
+ if {0 && [regexp [format {^\([%s]*#PCDATA[%s]*(\|([^)]+))?[%s]*\)*[%s]*$} $Wsp $Wsp $Wsp $Wsp] discard discard mtoks]} {
+ set cm($name) [list MIXED [split $mtoks |]]
+ } elseif {0} {
+ if {[catch {CModelParse $state(state) $value} result]} {
+ eval $options(-errorcommand) [list element? $result]
+ } else {
+ set cm($id) [list ELEMENT $result]
+ }
+ } else {
+ set elements($name) $modspec
+ uplevel #0 $options(-elementdeclcommand) $name [list $modspec]
+ }
+ }
+ }
+ }
+}
+
+# sgml::CModelParse --
+#
+# Parse an element content model (non-mixed).
+# A syntax tree is constructed.
+# A transition table is built next.
+#
+# This is going to need alot of work!
+#
+# Arguments:
+# state state array variable
+# value the content model data
+#
+# Results:
+# A Tcl list representing the content model.
+
+proc sgml::CModelParse {state value} {
+ upvar #0 $state var
+
+ # First build syntax tree
+ set syntaxTree [CModelMakeSyntaxTree $state $value]
+
+ # Build transition table
+ set transitionTable [CModelMakeTransitionTable $state $syntaxTree]
+
+ return [list $syntaxTree $transitionTable]
+}
+
+# sgml::CModelMakeSyntaxTree --
+#
+# Construct a syntax tree for the regular expression.
+#
+# Syntax tree is represented as a Tcl list:
+# rep {:choice|:seq {{rep list1} {rep list2} ...}}
+# where: rep is repetition character, *, + or ?. {} for no repetition
+# listN is nested expression or Name
+#
+# Arguments:
+# spec Element specification
+#
+# Results:
+# Syntax tree for element spec as nested Tcl list.
+#
+# Examples:
+# (memo)
+# {} {:seq {{} memo}}
+# (front, body, back?)
+# {} {:seq {{} front} {{} body} {? back}}
+# (head, (p | list | note)*, div2*)
+# {} {:seq {{} head} {* {:choice {{} p} {{} list} {{} note}}} {* div2}}
+# (p | a | ul)+
+# + {:choice {{} p} {{} a} {{} ul}}
+
+proc sgml::CModelMakeSyntaxTree {state spec} {
+ upvar #0 $state var
+ variable Wsp
+ variable name
+
+ # Translate the spec into a Tcl list.
+
+ # None of the Tcl special characters are allowed in a content model spec.
+ if {[regexp {\$|\[|\]|\{|\}} $spec]} {
+ return -code error "illegal characters in specification"
+ }
+
+ regsub -all [format {(%s)[%s]*(\?|\*|\+)?[%s]*(,|\|)?} $name $Wsp $Wsp] $spec [format {%sCModelSTname %s {\1} {\2} {\3}} \n $state] spec
+ regsub -all {\(} $spec "\nCModelSTopenParen $state " spec
+ regsub -all [format {\)[%s]*(\?|\*|\+)?[%s]*(,|\|)?} $Wsp $Wsp] $spec [format {%sCModelSTcloseParen %s {\1} {\2}} \n $state] spec
+
+ array set var {stack {} state start}
+ eval $spec
+
+ # Peel off the outer seq, its redundant
+ return [lindex [lindex $var(stack) 1] 0]
+}
+
+# sgml::CModelSTname --
+#
+# Processes a name in a content model spec.
+#
+# Arguments:
+# state state array variable
+# name name specified
+# rep repetition operator
+# cs choice or sequence delimiter
+#
+# Results:
+# See CModelSTcp.
+
+proc sgml::CModelSTname {state name rep cs args} {
+ if {[llength $args]} {
+ return -code error "syntax error in specification: \"$args\""
+ }
+
+ CModelSTcp $state $name $rep $cs
+}
+
+# sgml::CModelSTcp --
+#
+# Process a content particle.
+#
+# Arguments:
+# state state array variable
+# name name specified
+# rep repetition operator
+# cs choice or sequence delimiter
+#
+# Results:
+# The content particle is added to the current group.
+
+proc sgml::CModelSTcp {state cp rep cs} {
+ upvar #0 $state var
+
+ switch -glob -- [lindex $var(state) end]=$cs {
+ start= {
+ set var(state) [lreplace $var(state) end end end]
+ # Add (dummy) grouping, either choice or sequence will do
+ CModelSTcsSet $state ,
+ CModelSTcpAdd $state $cp $rep
+ }
+ :choice= -
+ :seq= {
+ set var(state) [lreplace $var(state) end end end]
+ CModelSTcpAdd $state $cp $rep
+ }
+ start=| -
+ start=, {
+ set var(state) [lreplace $var(state) end end [expr {$cs == "," ? ":seq" : ":choice"}]]
+ CModelSTcsSet $state $cs
+ CModelSTcpAdd $state $cp $rep
+ }
+ :choice=| -
+ :seq=, {
+ CModelSTcpAdd $state $cp $rep
+ }
+ :choice=, -
+ :seq=| {
+ return -code error "syntax error in specification: incorrect delimiter after \"$cp\", should be \"[expr {$cs == "," ? "|" : ","}]\""
+ }
+ end=* {
+ return -code error "syntax error in specification: no delimiter before \"$cp\""
+ }
+ default {
+ return -code error "syntax error"
+ }
+ }
+
+}
+
+# sgml::CModelSTcsSet --
+#
+# Start a choice or sequence on the stack.
+#
+# Arguments:
+# state state array
+# cs choice oir sequence
+#
+# Results:
+# state is modified: end element of state is appended.
+
+proc sgml::CModelSTcsSet {state cs} {
+ upvar #0 $state var
+
+ set cs [expr {$cs == "," ? ":seq" : ":choice"}]
+
+ if {[llength $var(stack)]} {
+ set var(stack) [lreplace $var(stack) end end $cs]
+ } else {
+ set var(stack) [list $cs {}]
+ }
+}
+
+# sgml::CModelSTcpAdd --
+#
+# Append a content particle to the top of the stack.
+#
+# Arguments:
+# state state array
+# cp content particle
+# rep repetition
+#
+# Results:
+# state is modified: end element of state is appended.
+
+proc sgml::CModelSTcpAdd {state cp rep} {
+ upvar #0 $state var
+
+ if {[llength $var(stack)]} {
+ set top [lindex $var(stack) end]
+ lappend top [list $rep $cp]
+ set var(stack) [lreplace $var(stack) end end $top]
+ } else {
+ set var(stack) [list $rep $cp]
+ }
+}
+
+# sgml::CModelSTopenParen --
+#
+# Processes a '(' in a content model spec.
+#
+# Arguments:
+# state state array
+#
+# Results:
+# Pushes stack in state array.
+
+proc sgml::CModelSTopenParen {state args} {
+ upvar #0 $state var
+
+ if {[llength $args]} {
+ return -code error "syntax error in specification: \"$args\""
+ }
+
+ lappend var(state) start
+ lappend var(stack) [list {} {}]
+}
+
+# sgml::CModelSTcloseParen --
+#
+# Processes a ')' in a content model spec.
+#
+# Arguments:
+# state state array
+# rep repetition
+# cs choice or sequence delimiter
+#
+# Results:
+# Stack is popped, and former top of stack is appended to previous element.
+
+proc sgml::CModelSTcloseParen {state rep cs args} {
+ upvar #0 $state var
+
+ if {[llength $args]} {
+ return -code error "syntax error in specification: \"$args\""
+ }
+
+ set cp [lindex $var(stack) end]
+ set var(stack) [lreplace $var(stack) end end]
+ set var(state) [lreplace $var(state) end end]
+ CModelSTcp $state $cp $rep $cs
+}
+
+# sgml::CModelMakeTransitionTable --
+#
+# Given a content model's syntax tree, constructs
+# the transition table for the regular expression.
+#
+# See "Compilers, Principles, Techniques, and Tools",
+# Aho, Sethi and Ullman. Section 3.9, algorithm 3.5.
+#
+# Arguments:
+# state state array variable
+# st syntax tree
+#
+# Results:
+# The transition table is returned, as a key/value Tcl list.
+
+proc sgml::CModelMakeTransitionTable {state st} {
+ upvar #0 $state var
+
+ # Construct nullable, firstpos and lastpos functions
+ array set var {number 0}
+ foreach {nullable firstpos lastpos} [ \
+ TraverseDepth1st $state $st {
+ # Evaluated for leaf nodes
+ # Compute nullable(n)
+ # Compute firstpos(n)
+ # Compute lastpos(n)
+ set nullable [nullable leaf $rep $name]
+ set firstpos [list {} $var(number)]
+ set lastpos [list {} $var(number)]
+ set var(pos:$var(number)) $name
+ } {
+ # Evaluated for nonterminal nodes
+ # Compute nullable, firstpos, lastpos
+ set firstpos [firstpos $cs $firstpos $nullable]
+ set lastpos [lastpos $cs $lastpos $nullable]
+ set nullable [nullable nonterm $rep $cs $nullable]
+ } \
+ ] break
+
+ set accepting [incr var(number)]
+ set var(pos:$accepting) #
+
+ # var(pos:N) maps from position to symbol.
+ # Construct reverse map for convenience.
+ # NB. A symbol may appear in more than one position.
+ # var is about to be reset, so use different arrays.
+
+ foreach {pos symbol} [array get var pos:*] {
+ set pos [lindex [split $pos :] 1]
+ set pos2symbol($pos) $symbol
+ lappend sym2pos($symbol) $pos
+ }
+
+ # Construct the followpos functions
+ catch {unset var}
+ followpos $state $st $firstpos $lastpos
+
+ # Construct transition table
+ # Dstates is [union $marked $unmarked]
+ set unmarked [list [lindex $firstpos 1]]
+ while {[llength $unmarked]} {
+ set T [lindex $unmarked 0]
+ lappend marked $T
+ set unmarked [lrange $unmarked 1 end]
+
+ # Find which input symbols occur in T
+ set symbols {}
+ foreach pos $T {
+ if {$pos != $accepting && [lsearch $symbols $pos2symbol($pos)] < 0} {
+ lappend symbols $pos2symbol($pos)
+ }
+ }
+ foreach a $symbols {
+ set U {}
+ foreach pos $sym2pos($a) {
+ if {[lsearch $T $pos] >= 0} {
+ # add followpos($pos)
+ if {$var($pos) == {}} {
+ lappend U $accepting
+ } else {
+ eval lappend U $var($pos)
+ }
+ }
+ }
+ set U [makeSet $U]
+ if {[llength $U] && [lsearch $marked $U] < 0 && [lsearch $unmarked $U] < 0} {
+ lappend unmarked $U
+ }
+ set Dtran($T,$a) $U
+ }
+
+ }
+
+ return [list [array get Dtran] [array get sym2pos] $accepting]
+}
+
+# sgml::followpos --
+#
+# Compute the followpos function, using the already computed
+# firstpos and lastpos.
+#
+# Arguments:
+# state array variable to store followpos functions
+# st syntax tree
+# firstpos firstpos functions for the syntax tree
+# lastpos lastpos functions
+#
+# Results:
+# followpos functions for each leaf node, in name/value format
+
+proc sgml::followpos {state st firstpos lastpos} {
+ upvar #0 $state var
+
+ switch -- [lindex [lindex $st 1] 0] {
+ :seq {
+ for {set i 1} {$i < [llength [lindex $st 1]]} {incr i} {
+ followpos $state [lindex [lindex $st 1] $i] \
+ [lindex [lindex $firstpos 0] [expr $i - 1]] \
+ [lindex [lindex $lastpos 0] [expr $i - 1]]
+ foreach pos [lindex [lindex [lindex $lastpos 0] [expr $i - 1]] 1] {
+ eval lappend var($pos) [lindex [lindex [lindex $firstpos 0] $i] 1]
+ set var($pos) [makeSet $var($pos)]
+ }
+ }
+ }
+ :choice {
+ for {set i 1} {$i < [llength [lindex $st 1]]} {incr i} {
+ followpos $state [lindex [lindex $st 1] $i] \
+ [lindex [lindex $firstpos 0] [expr $i - 1]] \
+ [lindex [lindex $lastpos 0] [expr $i - 1]]
+ }
+ }
+ default {
+ # No action at leaf nodes
+ }
+ }
+
+ switch -- [lindex $st 0] {
+ ? {
+ # We having nothing to do here ! Doing the same as
+ # for * effectively converts this qualifier into the other.
+ }
+ * {
+ foreach pos [lindex $lastpos 1] {
+ eval lappend var($pos) [lindex $firstpos 1]
+ set var($pos) [makeSet $var($pos)]
+ }
+ }
+ }
+
+}
+
+# sgml::TraverseDepth1st --
+#
+# Perform depth-first traversal of a tree.
+# A new tree is constructed, with each node computed by f.
+#
+# Arguments:
+# state state array variable
+# t The tree to traverse, a Tcl list
+# leaf Evaluated at a leaf node
+# nonTerm Evaluated at a nonterminal node
+#
+# Results:
+# A new tree is returned.
+
+proc sgml::TraverseDepth1st {state t leaf nonTerm} {
+ upvar #0 $state var
+
+ set nullable {}
+ set firstpos {}
+ set lastpos {}
+
+ switch -- [lindex [lindex $t 1] 0] {
+ :seq -
+ :choice {
+ set rep [lindex $t 0]
+ set cs [lindex [lindex $t 1] 0]
+
+ foreach child [lrange [lindex $t 1] 1 end] {
+ foreach {childNullable childFirstpos childLastpos} \
+ [TraverseDepth1st $state $child $leaf $nonTerm] break
+ lappend nullable $childNullable
+ lappend firstpos $childFirstpos
+ lappend lastpos $childLastpos
+ }
+
+ eval $nonTerm
+ }
+ default {
+ incr var(number)
+ set rep [lindex [lindex $t 0] 0]
+ set name [lindex [lindex $t 1] 0]
+ eval $leaf
+ }
+ }
+
+ return [list $nullable $firstpos $lastpos]
+}
+
+# sgml::firstpos --
+#
+# Computes the firstpos function for a nonterminal node.
+#
+# Arguments:
+# cs node type, choice or sequence
+# firstpos firstpos functions for the subtree
+# nullable nullable functions for the subtree
+#
+# Results:
+# firstpos function for this node is returned.
+
+proc sgml::firstpos {cs firstpos nullable} {
+ switch -- $cs {
+ :seq {
+ set result [lindex [lindex $firstpos 0] 1]
+ for {set i 0} {$i < [llength $nullable]} {incr i} {
+ if {[lindex [lindex $nullable $i] 1]} {
+ eval lappend result [lindex [lindex $firstpos [expr $i + 1]] 1]
+ } else {
+ break
+ }
+ }
+ }
+ :choice {
+ foreach child $firstpos {
+ eval lappend result $child
+ }
+ }
+ }
+
+ return [list $firstpos [makeSet $result]]
+}
+
+# sgml::lastpos --
+#
+# Computes the lastpos function for a nonterminal node.
+# Same as firstpos, only logic is reversed
+#
+# Arguments:
+# cs node type, choice or sequence
+# lastpos lastpos functions for the subtree
+# nullable nullable functions forthe subtree
+#
+# Results:
+# lastpos function for this node is returned.
+
+proc sgml::lastpos {cs lastpos nullable} {
+ switch -- $cs {
+ :seq {
+ set result [lindex [lindex $lastpos end] 1]
+ for {set i [expr [llength $nullable] - 1]} {$i >= 0} {incr i -1} {
+ if {[lindex [lindex $nullable $i] 1]} {
+ eval lappend result [lindex [lindex $lastpos $i] 1]
+ } else {
+ break
+ }
+ }
+ }
+ :choice {
+ foreach child $lastpos {
+ eval lappend result $child
+ }
+ }
+ }
+
+ return [list $lastpos [makeSet $result]]
+}
+
+# sgml::makeSet --
+#
+# Turn a list into a set, ie. remove duplicates.
+#
+# Arguments:
+# s a list
+#
+# Results:
+# A set is returned, which is a list with duplicates removed.
+
+proc sgml::makeSet s {
+ foreach r $s {
+ if {[llength $r]} {
+ set unique($r) {}
+ }
+ }
+ return [array names unique]
+}
+
+# sgml::nullable --
+#
+# Compute the nullable function for a node.
+#
+# Arguments:
+# nodeType leaf or nonterminal
+# rep repetition applying to this node
+# name leaf node: symbol for this node, nonterm node: choice or seq node
+# subtree nonterm node: nullable functions for the subtree
+#
+# Results:
+# Returns nullable function for this branch of the tree.
+
+proc sgml::nullable {nodeType rep name {subtree {}}} {
+ switch -glob -- $rep:$nodeType {
+ :leaf -
+ +:leaf {
+ return [list {} 0]
+ }
+ \\*:leaf -
+ \\?:leaf {
+ return [list {} 1]
+ }
+ \\*:nonterm -
+ \\?:nonterm {
+ return [list $subtree 1]
+ }
+ :nonterm -
+ +:nonterm {
+ switch -- $name {
+ :choice {
+ set result 0
+ foreach child $subtree {
+ set result [expr $result || [lindex $child 1]]
+ }
+ }
+ :seq {
+ set result 1
+ foreach child $subtree {
+ set result [expr $result && [lindex $child 1]]
+ }
+ }
+ }
+ return [list $subtree $result]
+ }
+ }
+}
+
+# sgml::DTD:ATTLIST --
+#
+# <!ATTLIST ...> defines an attribute list.
+#
+# Arguments:
+# opts configuration opions
+# name Element GI
+# attspec unparsed attribute definitions
+#
+# Results:
+# Attribute list variables are modified.
+
+proc sgml::DTD:ATTLIST {opts name attspec} {
+ variable attlist_exp
+ variable attlist_enum_exp
+ variable attlist_fixed_exp
+
+ array set options $opts
+
+ # Parse the attribute list. If it were regular, could just use foreach,
+ # but some attributes may have values.
+ regsub -all {([][$\\])} $attspec {\\\1} attspec
+ regsub -all $attlist_exp $attspec "\}\nDTDAttribute {$options(-attlistdeclcommand)} $name $options(attlistdecls) {\\1} {\\2} {\\3} {} \{" attspec
+ regsub -all $attlist_enum_exp $attspec "\}\nDTDAttribute {$options(-attlistdeclcommand)} $name $options(attlistdecls) {\\1} {\\2} {} {\\4} \{" attspec
+ regsub -all $attlist_fixed_exp $attspec "\}\nDTDAttribute {$options(-attlistdeclcommand)} $name $options(attlistdecls) {\\1} {\\2} {\\3} {\\4} \{" attspec
+
+ eval "noop \{$attspec\}"
+
+ return {}
+}
+
+# sgml::DTDAttribute --
+#
+# Parse definition of a single attribute.
+#
+# Arguments:
+# callback attribute defn callback
+# name element name
+# var array variable
+# att attribute name
+# type type of this attribute
+# default default value of the attribute
+# value other information
+# text other text (should be empty)
+#
+# Results:
+# Attribute defn added to array, unless it already exists
+
+proc sgml::DTDAttribute args {
+ # BUG: Some problems with parameter passing - deal with it later
+ foreach {callback name var att type default value text} $args break
+
+ upvar #0 $var atts
+
+ if {[string length [string trim $text]]} {
+ return -code error "unexpected text \"$text\" in attribute definition"
+ }
+
+ # What about overridden attribute defns?
+ # A non-validating app may want to know about them
+ # (eg. an editor)
+ if {![info exists atts($name/$att)]} {
+ set atts($name/$att) [list $type $default $value]
+ uplevel #0 $callback [list $name $att $type $default $value]
+ }
+
+ return {}
+}
+
+# sgml::DTD:ENTITY --
+#
+# <!ENTITY ...> declaration.
+#
+# Callbacks:
+# -entitydeclcommand for general entity declaration
+# -unparsedentitydeclcommand for unparsed external entity declaration
+# -parameterentitydeclcommand for parameter entity declaration
+#
+# Arguments:
+# opts configuration options
+# name name of entity being defined
+# param whether a parameter entity is being defined
+# value unparsed replacement text
+#
+# Results:
+# Modifies the caller's entities array variable
+
+proc sgml::DTD:ENTITY {opts name param value} {
+
+ array set options $opts
+
+ if {[string compare % $param]} {
+ # Entity declaration - general or external
+ upvar #0 $options(entities) ents
+ upvar #0 $options(extentities) externals
+
+ if {[info exists ents($name)] || [info exists externals($name)]} {
+ eval $options(-warningcommand) entity [list "entity \"$name\" already declared"]
+ } else {
+ if {[catch {uplevel #0 $options(-parseentitydeclcommand) [list $value]} value]} {
+ return -code error "unable to parse entity declaration due to \"$value\""
+ }
+ switch -glob [lindex $value 0],[lindex $value 3] {
+ internal, {
+ set ents($name) [EntitySubst [array get options] [lindex $value 1]]
+ uplevel #0 $options(-entitydeclcommand) [list $name $ents($name)]
+ }
+ internal,* {
+ return -code error "unexpected NDATA declaration"
+ }
+ external, {
+ set externals($name) [lrange $value 1 2]
+ uplevel #0 $options(-entitydeclcommand) [eval list $name [lrange $value 1 2]]
+ }
+ external,* {
+ set externals($name) [lrange $value 1 3]
+ uplevel #0 $options(-unparsedentitydeclcommand) [eval list $name [lrange $value 1 3]]
+ }
+ default {
+ return -code error "internal error: unexpected parser state"
+ }
+ }
+ }
+ } else {
+ # Parameter entity declaration
+ upvar #0 $options(parameterentities) PEnts
+ upvar #0 $options(externalparameterentities) ExtPEnts
+
+ if {[info exists PEnts($name)] || [info exists ExtPEnts($name)]} {
+ eval $options(-warningcommand) parameterentity [list "parameter entity \"$name\" already declared"]
+ } else {
+ if {[catch {uplevel #0 $options(-parseentitydeclcommand) [list $value]} value]} {
+ return -code error "unable to parse parameter entity declaration due to \"$value\""
+ }
+ if {[string length [lindex $value 3]]} {
+ return -code error "NDATA illegal in parameter entity declaration"
+ }
+ switch [lindex $value 0] {
+ internal {
+ # Substitute character references and PEs (XML: 4.5)
+ set value [EntitySubst [array get options] [lindex $value 1]]
+
+ set PEnts($name) $value
+ uplevel #0 $options(-parameterentitydeclcommand) [list $name $value]
+ }
+ external -
+ default {
+ # Get the replacement text now.
+ # Could wait until the first reference, but easier
+ # to just do it now.
+
+ set token [uri::geturl [uri::resolve $options(-baseuri) [lindex $value 1]]]
+
+ set ExtPEnts($name) [lindex [array get $token data] 1]
+ uplevel #0 $options(-parameterentitydeclcommand) [eval list $name [lrange $value 1 2]]
+ }
+ }
+ }
+ }
+}
+
+# sgml::EntitySubst --
+#
+# Perform entity substitution on an entity replacement text.
+# This differs slightly from other substitution procedures,
+# because only parameter and character entity substitution
+# is performed, not general entities.
+# See XML Rec. section 4.5.
+#
+# Arguments:
+# opts configuration options
+# value Literal entity value
+#
+# Results:
+# Expanded replacement text
+
+proc sgml::EntitySubst {opts value} {
+ array set options $opts
+
+ # Protect Tcl special characters
+ regsub -all {([{}\\])} $value {\\\1} value
+
+ # Find entity references
+ regsub -all (&#\[0-9\]+|&#x\[0-9a-fA-F\]+|%${::sgml::Name})\; $value "\[EntitySubstValue [list $options(parameterentities)] {\\1}\]" value
+
+ set result [subst $value]
+
+ return $result
+}
+
+# sgml::EntitySubstValue --
+#
+# Handle a single character or parameter entity substitution
+#
+# Arguments:
+# PEvar array variable containing PE declarations
+# ref character or parameter entity reference
+#
+# Results:
+# Replacement text
+
+proc sgml::EntitySubstValue {PEvar ref} {
+ # SRB: Bug fix 2008-11-18 #812051: surround case labels in braces for compatibility with Freewrap
+ switch -glob -- $ref {
+ {&#x*} {
+ scan [string range $ref 3 end] %x hex
+ return [format %c $hex]
+ }
+ {&#*} {
+ return [format %c [string range $ref 2 end]]
+ }
+ {%*} {
+ upvar #0 $PEvar PEs
+ set ref [string range $ref 1 end]
+ if {[info exists PEs($ref)]} {
+ return $PEs($ref)
+ } else {
+ return -code error "parameter entity \"$ref\" not declared"
+ }
+ }
+ default {
+ return -code error "internal error - unexpected entity reference"
+ }
+ }
+ return {}
+}
+
+# sgml::DTD:NOTATION --
+#
+# Process notation declaration
+#
+# Arguments:
+# opts configuration options
+# name notation name
+# value unparsed notation spec
+
+proc sgml::DTD:NOTATION {opts name value} {
+ return {}
+
+ variable notation_exp
+ upvar opts state
+
+ if {[regexp $notation_exp $value x scheme data] == 2} {
+ } else {
+ eval $state(-errorcommand) [list notationvalue "notation value \"$value\" incorrectly specified"]
+ }
+}
+
+# sgml::ResolveEntity --
+#
+# Default entity resolution routine
+#
+# Arguments:
+# cmd command of parent parser
+# base base URL for relative URLs
+# sysId system identifier
+# pubId public identifier
+
+proc sgml::ResolveEntity {cmd base sysId pubId} {
+ variable ParseEventNum
+
+ if {[catch {uri::resolve $base $sysId} url]} {
+ return -code error "unable to resolve system identifier \"$sysId\""
+ }
+ if {[catch {uri::geturl $url} token]} {
+ return -code error "unable to retrieve external entity \"$url\" for system identifier \"$sysId\""
+ }
+
+ upvar #0 $token data
+
+ set parser [uplevel #0 $cmd entityparser]
+
+ set body {}
+ catch {set body $data(body)}
+ catch {set body $data(data)}
+ if {[string length $body]} {
+ uplevel #0 $parser parse [list $body] -dtdsubset external
+ }
+ $parser free
+
+ return {}
+}
diff --git a/tclxml/tclxml-tcl/tclparser-8.0.tcl b/tclxml/tclxml-tcl/tclparser-8.0.tcl
new file mode 100755
index 0000000..e2573f8
--- /dev/null
+++ b/tclxml/tclxml-tcl/tclparser-8.0.tcl
@@ -0,0 +1,359 @@
+# tclparser-8.0.tcl --
+#
+# This file provides a Tcl implementation of a XML parser.
+# This file supports Tcl 8.0.
+#
+# See xml-8.[01].tcl for definitions of character sets and
+# regular expressions.
+#
+# Copyright (c) 2005-2008 by Explain.
+# http://www.explain.com.au/
+# Copyright (c) 1998-2004 Zveno Pty Ltd
+# http://www.zveno.com/
+#
+# See the file "LICENSE" in this distribution for information on usage and
+# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
+#
+# $Id: tclparser-8.0.tcl,v 1.1.1.1 2009/01/16 22:11:49 joye Exp $
+
+package require -exact Tcl 8.0
+
+package require xmldefs 3.2
+
+package require sgmlparser 1.0
+
+package provide xml::tclparser 3.2
+
+namespace eval xml {
+
+ # Procedures for parsing XML documents
+ namespace export parser
+ # Procedures for parsing XML DTDs
+ namespace export DTDparser
+
+ # Counter for creating unique parser objects
+ variable ParserCounter 0
+
+}
+
+# xml::parser --
+#
+# Creates XML parser object.
+#
+# Arguments:
+# args Unique name for parser object
+# plus option/value pairs
+#
+# Recognised Options:
+# -final Indicates end of document data
+# -elementstartcommand Called when an element starts
+# -elementendcommand Called when an element ends
+# -characterdatacommand Called when character data occurs
+# -processinginstructioncommand Called when a PI occurs
+# -externalentityrefcommand Called for an external entity reference
+#
+# (Not compatible with expat)
+# -xmldeclcommand Called when the XML declaration occurs
+# -doctypecommand Called when the document type declaration occurs
+#
+# -errorcommand Script to evaluate for a fatal error
+# -warningcommand Script to evaluate for a reportable warning
+# -statevariable global state variable
+# -reportempty whether to provide empty element indication
+#
+# Results:
+# The state variable is initialised.
+
+proc xml::parser {args} {
+ variable ParserCounter
+
+ if {[llength $args] > 0} {
+ set name [lindex $args 0]
+ set args [lreplace $args 0 0]
+ } else {
+ set name parser[incr ParserCounter]
+ }
+
+ if {[info command [namespace current]::$name] != {}} {
+ return -code error "unable to create parser object \"[namespace current]::$name\" command"
+ }
+
+ # Initialise state variable and object command
+ upvar \#0 [namespace current]::$name parser
+ set sgml_ns [namespace parent]::sgml
+ array set parser [list name $name \
+ -final 1 \
+ -elementstartcommand ${sgml_ns}::noop \
+ -elementendcommand ${sgml_ns}::noop \
+ -characterdatacommand ${sgml_ns}::noop \
+ -processinginstructioncommand ${sgml_ns}::noop \
+ -externalentityrefcommand ${sgml_ns}::noop \
+ -xmldeclcommand ${sgml_ns}::noop \
+ -doctypecommand ${sgml_ns}::noop \
+ -warningcommand ${sgml_ns}::noop \
+ -statevariable [namespace current]::$name \
+ -reportempty 0 \
+ internaldtd {} \
+ ]
+
+ proc [namespace current]::$name {method args} \
+ "eval ParseCommand $name \$method \$args"
+
+ eval ParseCommand [list $name] configure $args
+
+ return [namespace current]::$name
+}
+
+# xml::ParseCommand --
+#
+# Handles parse object command invocations
+#
+# Valid Methods:
+# cget
+# configure
+# parse
+# reset
+#
+# Arguments:
+# parser parser object
+# method minor command
+# args other arguments
+#
+# Results:
+# Depends on method
+
+proc xml::ParseCommand {parser method args} {
+ upvar \#0 [namespace current]::$parser state
+
+ switch -- $method {
+ cget {
+ return $state([lindex $args 0])
+ }
+ configure {
+ foreach {opt value} $args {
+ set state($opt) $value
+ }
+ }
+ parse {
+ ParseCommand_parse $parser [lindex $args 0]
+ }
+ reset {
+ if {[llength $args]} {
+ return -code error "too many arguments"
+ }
+ ParseCommand_reset $parser
+ }
+ default {
+ return -code error "unknown method \"$method\""
+ }
+ }
+
+ return {}
+}
+
+# xml::ParseCommand_parse --
+#
+# Parses document instance data
+#
+# Arguments:
+# object parser object
+# xml data
+#
+# Results:
+# Callbacks are invoked, if any are defined
+
+proc xml::ParseCommand_parse {object xml} {
+ upvar \#0 [namespace current]::$object parser
+ variable Wsp
+ variable tokExpr
+ variable substExpr
+
+ set parent [namespace parent]
+ if {![string compare :: $parent]} {
+ set parent {}
+ }
+
+ set tokenised [lrange \
+ [${parent}::sgml::tokenise $xml \
+ $tokExpr \
+ $substExpr \
+ -internaldtdvariable [namespace current]::${object}(internaldtd)] \
+ 4 end]
+
+ eval ${parent}::sgml::parseEvent \
+ [list $tokenised \
+ -emptyelement [namespace code ParseEmpty] \
+ -parseattributelistcommand [namespace code ParseAttrs]] \
+ [array get parser -*command] \
+ [array get parser -entityvariable] \
+ [array get parser -reportempty] \
+ [array get parser -final] \
+ -normalize 0 \
+ -internaldtd [list $parser(internaldtd)]
+
+ return {}
+}
+
+# xml::ParseEmpty -- Tcl 8.0 version
+#
+# Used by parser to determine whether an element is empty.
+# This should be dead easy in XML. The only complication is
+# that the RE above can't catch the trailing slash, so we have
+# to dig it out of the tag name or attribute list.
+#
+# Tcl 8.1 REs should fix this.
+#
+# Arguments:
+# tag element name
+# attr attribute list (raw)
+# e End tag delimiter.
+#
+# Results:
+# "/" if the trailing slash is found. Optionally, return a list
+# containing new values for the tag name and/or attribute list.
+
+proc xml::ParseEmpty {tag attr e} {
+
+ if {[string match */ [string trimright $tag]] && \
+ ![string length $attr]} {
+ regsub {/$} $tag {} tag
+ return [list / $tag $attr]
+ } elseif {[string match */ [string trimright $attr]]} {
+ regsub {/$} [string trimright $attr] {} attr
+ return [list / $tag $attr]
+ } else {
+ return {}
+ }
+
+}
+
+# xml::ParseAttrs --
+#
+# Parse element attributes.
+#
+# There are two forms for name-value pairs:
+#
+# name="value"
+# name='value'
+#
+# Watch out for the trailing slash on empty elements.
+#
+# Arguments:
+# attrs attribute string given in a tag
+#
+# Results:
+# Returns a Tcl list representing the name-value pairs in the
+# attribute string
+
+proc xml::ParseAttrs attrs {
+ variable Wsp
+ variable Name
+
+ # First check whether there's any work to do
+ if {![string compare {} [string trim $attrs]]} {
+ return {}
+ }
+
+ # Strip the trailing slash on empty elements
+ regsub [format {/[%s]*$} " \t\n\r"] $attrs {} atList
+
+ set mode name
+ set result {}
+ foreach component [split $atList =] {
+ switch $mode {
+ name {
+ set component [string trim $component]
+ if {[regexp $Name $component]} {
+ lappend result $component
+ } else {
+ return -code error "invalid attribute name \"$component\""
+ }
+ set mode value:start
+ }
+ value:start {
+ set component [string trimleft $component]
+ set delimiter [string index $component 0]
+ set value {}
+ switch -- $delimiter {
+ \" -
+ ' {
+ if {[regexp [format {%s([^%s]*)%s(.*)} $delimiter $delimiter $delimiter] $component discard value remainder]} {
+ lappend result $value
+ set remainder [string trim $remainder]
+ if {[string length $remainder]} {
+ if {[regexp $Name $remainder]} {
+ lappend result $remainder
+ set mode value:start
+ } else {
+ return -code error "invalid attribute name \"$remainder\""
+ }
+ } else {
+ set mode end
+ }
+ } else {
+ set value [string range $component 1 end]
+ set mode value:continue
+ }
+ }
+ default {
+ return -code error "invalid value for attribute \"[lindex $result end]\""
+ }
+ }
+ }
+ value:continue {
+ if {[regexp [format {([^%s]*)%s(.*)} $delimiter $delimiter] $component discard valuepart remainder]} {
+ append value = $valuepart
+ lappend result $value
+ set remainder [string trim $remainder]
+ if {[string length $remainder]} {
+ if {[regexp $Name $remainder]} {
+ lappend result $remainder
+ set mode value:start
+ } else {
+ return -code error "invalid attribute name \"$remainder\""
+ }
+ } else {
+ set mode end
+ }
+ } else {
+ append value = $component
+ }
+ }
+ end {
+ return -code error "unexpected data found after end of attribute list"
+ }
+ }
+ }
+
+ switch $mode {
+ name -
+ end {
+ # This is normal
+ }
+ default {
+ return -code error "unexpected end of attribute list"
+ }
+ }
+
+ return $result
+}
+
+# xml::ParseCommand_reset --
+#
+# Initialize parser data
+#
+# Arguments:
+# object parser object
+#
+# Results:
+# Parser data structure initialised
+
+proc xml::ParseCommand_reset object {
+ upvar \#0 [namespace current]::$object parser
+
+ array set parser [list \
+ -final 1 \
+ internaldtd {} \
+ ]
+}
+
diff --git a/tclxml/tclxml-tcl/tclparser-8.1.tcl b/tclxml/tclxml-tcl/tclparser-8.1.tcl
new file mode 100755
index 0000000..40a0af9
--- /dev/null
+++ b/tclxml/tclxml-tcl/tclparser-8.1.tcl
@@ -0,0 +1,614 @@
+# tclparser-8.1.tcl --
+#
+# This file provides a Tcl implementation of a XML parser.
+# This file supports Tcl 8.1.
+#
+# See xml-8.[01].tcl for definitions of character sets and
+# regular expressions.
+#
+# Copyright (c) 2005-2008 by Explain.
+# http://www.explain.com.au/
+# Copyright (c) 1998-2003 Zveno Pty Ltd
+# http://www.zveno.com/
+#
+# See the file "LICENSE" in this distribution for information on usage and
+# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
+#
+# $Id: tclparser-8.1.tcl,v 1.1.1.1 2009/01/16 22:11:49 joye Exp $
+
+package require Tcl 8.1
+
+package provide xml::tclparser 3.2
+
+package require xmldefs 3.2
+
+package require sgmlparser 1.0
+
+namespace eval xml::tclparser {
+
+ namespace export create createexternal externalentity parse configure get delete
+
+ # Tokenising expressions
+
+ variable tokExpr $::xml::tokExpr
+ variable substExpr $::xml::substExpr
+
+ # Register this parser class
+
+ ::xml::parserclass create tcl \
+ -createcommand [namespace code create] \
+ -createentityparsercommand [namespace code createentityparser] \
+ -parsecommand [namespace code parse] \
+ -configurecommand [namespace code configure] \
+ -deletecommand [namespace code delete] \
+ -resetcommand [namespace code reset]
+}
+
+# xml::tclparser::create --
+#
+# Creates XML parser object.
+#
+# Arguments:
+# name unique identifier for this instance
+#
+# Results:
+# The state variable is initialised.
+
+proc xml::tclparser::create name {
+
+ # Initialise state variable
+ upvar \#0 [namespace current]::$name parser
+ array set parser [list -name $name \
+ -cmd [uplevel 3 namespace current]::$name \
+ -final 1 \
+ -validate 0 \
+ -statevariable [namespace current]::$name \
+ -baseuri {} \
+ internaldtd {} \
+ entities [namespace current]::Entities$name \
+ extentities [namespace current]::ExtEntities$name \
+ parameterentities [namespace current]::PEntities$name \
+ externalparameterentities [namespace current]::ExtPEntities$name \
+ elementdecls [namespace current]::ElDecls$name \
+ attlistdecls [namespace current]::AttlistDecls$name \
+ notationdecls [namespace current]::NotDecls$name \
+ depth 0 \
+ leftover {} \
+ ]
+
+ # Initialise entities with predefined set
+ array set [namespace current]::Entities$name [array get ::sgml::EntityPredef]
+
+ return $parser(-cmd)
+}
+
+# xml::tclparser::createentityparser --
+#
+# Creates XML parser object for an entity.
+#
+# Arguments:
+# name name for the new parser
+# parent name of parent parser
+#
+# Results:
+# The state variable is initialised.
+
+proc xml::tclparser::createentityparser {parent name} {
+ upvar #0 [namespace current]::$parent p
+
+ # Initialise state variable
+ upvar \#0 [namespace current]::$name external
+ array set external [array get p]
+
+ regsub $parent $p(-cmd) {} parentns
+
+ array set external [list -name $name \
+ -cmd $parentns$name \
+ -statevariable [namespace current]::$name \
+ internaldtd {} \
+ line 0 \
+ ]
+ incr external(depth)
+
+ return $external(-cmd)
+}
+
+# xml::tclparser::configure --
+#
+# Configures a XML parser object.
+#
+# Arguments:
+# name unique identifier for this instance
+# args option name/value pairs
+#
+# Results:
+# May change values of config options
+
+proc xml::tclparser::configure {name args} {
+ upvar \#0 [namespace current]::$name parser
+
+ # BUG: very crude, no checks for illegal args
+ # Mats: Should be synced with sgmlparser.tcl
+ set options {-elementstartcommand -elementendcommand \
+ -characterdatacommand -processinginstructioncommand \
+ -externalentitycommand -xmldeclcommand \
+ -doctypecommand -commentcommand \
+ -entitydeclcommand -unparsedentitydeclcommand \
+ -parameterentitydeclcommand -notationdeclcommand \
+ -elementdeclcommand -attlistdeclcommand \
+ -paramentityparsing -defaultexpandinternalentities \
+ -startdoctypedeclcommand -enddoctypedeclcommand \
+ -entityreferencecommand -warningcommand \
+ -defaultcommand -unknownencodingcommand -notstandalonecommand \
+ -startcdatasectioncommand -endcdatasectioncommand \
+ -errorcommand -final \
+ -validate -baseuri -baseurl \
+ -name -cmd -emptyelement \
+ -parseattributelistcommand -parseentitydeclcommand \
+ -normalize -internaldtd -dtdsubset \
+ -reportempty -ignorewhitespace \
+ -reportempty \
+ }
+ set usage [join $options ", "]
+ regsub -all -- - $options {} options
+ set pat ^-([join $options |])$
+ foreach {flag value} $args {
+ if {[regexp $pat $flag]} {
+ # Validate numbers
+ if {[info exists parser($flag)] && \
+ [string is integer -strict $parser($flag)] && \
+ ![string is integer -strict $value]} {
+ return -code error "Bad value for $flag ($value), must be integer"
+ }
+ set parser($flag) $value
+ } else {
+ return -code error "Unknown option $flag, can be: $usage"
+ }
+ }
+
+ # Backward-compatibility: -baseuri is a synonym for -baseurl
+ catch {set parser(-baseuri) $parser(-baseurl)}
+
+ return {}
+}
+
+# xml::tclparser::parse --
+#
+# Parses document instance data
+#
+# Arguments:
+# name parser object
+# xml data
+# args configuration options
+#
+# Results:
+# Callbacks are invoked
+
+proc xml::tclparser::parse {name xml args} {
+
+ array set options $args
+ upvar \#0 [namespace current]::$name parser
+ variable tokExpr
+ variable substExpr
+
+ # Mats:
+ if {[llength $args]} {
+ eval {configure $name} $args
+ }
+
+ set parseOptions [list \
+ -emptyelement [namespace code ParseEmpty] \
+ -parseattributelistcommand [namespace code ParseAttrs] \
+ -parseentitydeclcommand [namespace code ParseEntity] \
+ -normalize 0]
+ eval lappend parseOptions \
+ [array get parser -*command] \
+ [array get parser -reportempty] \
+ [array get parser -ignorewhitespace] \
+ [array get parser -name] \
+ [array get parser -cmd] \
+ [array get parser -baseuri] \
+ [array get parser -validate] \
+ [array get parser -final] \
+ [array get parser -defaultexpandinternalentities] \
+ [array get parser entities] \
+ [array get parser extentities] \
+ [array get parser parameterentities] \
+ [array get parser externalparameterentities] \
+ [array get parser elementdecls] \
+ [array get parser attlistdecls] \
+ [array get parser notationdecls]
+
+ # Mats:
+ # If -final 0 we also need to maintain the state with a -statevariable !
+ if {!$parser(-final)} {
+ eval lappend parseOptions [array get parser -statevariable]
+ }
+
+ set dtdsubset no
+ catch {set dtdsubset $options(-dtdsubset)}
+ switch -- $dtdsubset {
+ internal {
+ # Bypass normal parsing
+ lappend parseOptions -statevariable $parser(-statevariable)
+ array set intOptions [array get ::sgml::StdOptions]
+ array set intOptions $parseOptions
+ ::sgml::ParseDTD:Internal [array get intOptions] $xml
+ return {}
+ }
+ external {
+ # Bypass normal parsing
+ lappend parseOptions -statevariable $parser(-statevariable)
+ array set intOptions [array get ::sgml::StdOptions]
+ array set intOptions $parseOptions
+ ::sgml::ParseDTD:External [array get intOptions] $xml
+ return {}
+ }
+ default {
+ # Pass through to normal processing
+ }
+ }
+
+ lappend tokenOptions \
+ -internaldtdvariable [namespace current]::${name}(internaldtd)
+
+ # Mats: If -final 0 we also need to maintain the state with a -statevariable !
+ if {!$parser(-final)} {
+ eval lappend tokenOptions [array get parser -statevariable] \
+ [array get parser -final]
+ }
+
+ # Mats:
+ # Why not the first four? Just padding? Lrange undos \n interp.
+ # It is necessary to have the first four as well if chopped off in
+ # middle of pcdata.
+ set tokenised [lrange \
+ [eval {::sgml::tokenise $xml $tokExpr $substExpr} $tokenOptions] \
+ 0 end]
+
+ lappend parseOptions -internaldtd [list $parser(internaldtd)]
+ eval ::sgml::parseEvent [list $tokenised] $parseOptions
+
+ return {}
+}
+
+# xml::tclparser::ParseEmpty -- Tcl 8.1+ version
+#
+# Used by parser to determine whether an element is empty.
+# This is usually dead easy in XML, but as always not quite.
+# Have to watch out for empty element syntax
+#
+# Arguments:
+# tag element name
+# attr attribute list (raw)
+# e End tag delimiter.
+#
+# Results:
+# Return value of e
+
+proc xml::tclparser::ParseEmpty {tag attr e} {
+ switch -glob [string length $e],[regexp "/[::xml::cl $::xml::Wsp]*$" $attr] {
+ 0,0 {
+ return {}
+ }
+ 0,* {
+ return /
+ }
+ default {
+ return $e
+ }
+ }
+}
+
+# xml::tclparser::ParseAttrs -- Tcl 8.1+ version
+#
+# Parse element attributes.
+#
+# There are two forms for name-value pairs:
+#
+# name="value"
+# name='value'
+#
+# Arguments:
+# opts parser options
+# attrs attribute string given in a tag
+#
+# Results:
+# Returns a Tcl list representing the name-value pairs in the
+# attribute string
+#
+# A ">" occurring in the attribute list causes problems when parsing
+# the XML. This manifests itself by an unterminated attribute value
+# and a ">" appearing the element text.
+# In this case return a three element list;
+# the message "unterminated attribute value", the attribute list it
+# did manage to parse and the remainder of the attribute list.
+
+proc xml::tclparser::ParseAttrs {opts attrs} {
+
+ set result {}
+
+ while {[string length [string trim $attrs]]} {
+ if {[regexp [::sgml::cl $::xml::Wsp]*($::xml::Name)[::sgml::cl $::xml::Wsp]*=[::sgml::cl $::xml::Wsp]*("|')([::sgml::cl ^<]*?)\\2(.*) $attrs discard attrName delimiter value attrs]} {
+ lappend result $attrName [NormalizeAttValue $opts $value]
+ } elseif {[regexp [::sgml::cl $::xml::Wsp]*$::xml::Name[::sgml::cl $::xml::Wsp]*=[::sgml::cl $::xml::Wsp]*("|')[::sgml::cl ^<]*\$ $attrs]} {
+ return -code error [list {unterminated attribute value} $result $attrs]
+ } else {
+ return -code error "invalid attribute list"
+ }
+ }
+
+ return $result
+}
+
+# xml::tclparser::NormalizeAttValue --
+#
+# Perform attribute value normalisation. This involves:
+# . character references are appended to the value
+# . entity references are recursively processed and replacement value appended
+# . whitespace characters cause a space to be appended
+# . other characters appended as-is
+#
+# Arguments:
+# opts parser options
+# value unparsed attribute value
+#
+# Results:
+# Normalised value returned.
+
+proc xml::tclparser::NormalizeAttValue {opts value} {
+
+ # sgmlparser already has backslashes protected
+ # Protect Tcl specials
+ regsub -all {([][$])} $value {\\\1} value
+
+ # Deal with white space
+ regsub -all "\[$::xml::Wsp\]" $value { } value
+
+ # Find entity refs
+ regsub -all {&([^;]+);} $value {[NormalizeAttValue:DeRef $opts {\1}]} value
+
+ return [subst $value]
+}
+
+# xml::tclparser::NormalizeAttValue:DeRef --
+#
+# Handler to normalize attribute values
+#
+# Arguments:
+# opts parser options
+# ref entity reference
+#
+# Results:
+# Returns character
+
+proc xml::tclparser::NormalizeAttValue:DeRef {opts ref} {
+ # SRB: Bug fix 2008-11-18 #812051: surround case labels in braces for compatibility with Freewrap
+ switch -glob -- $ref {
+ {#x*} {
+ scan [string range $ref 2 end] %x value
+ set char [format %c $value]
+ # Check that the char is legal for XML
+ if {[regexp [format {^[%s]$} $::xml::Char] $char]} {
+ return $char
+ } else {
+ return -code error "illegal character"
+ }
+ }
+ {#*} {
+ scan [string range $ref 1 end] %d value
+ set char [format %c $value]
+ # Check that the char is legal for XML
+ if {[regexp [format {^[%s]$} $::xml::Char] $char]} {
+ return $char
+ } else {
+ return -code error "illegal character"
+ }
+ }
+ lt -
+ gt -
+ amp -
+ quot -
+ apos {
+ array set map {lt < gt > amp & quot \" apos '}
+ return $map($ref)
+ }
+ default {
+ # A general entity. Must resolve to a text value - no element structure.
+
+ array set options $opts
+ upvar #0 $options(entities) map
+
+ if {[info exists map($ref)]} {
+
+ if {[regexp < $map($ref)]} {
+ return -code error "illegal character \"<\" in attribute value"
+ }
+
+ if {![regexp & $map($ref)]} {
+ # Simple text replacement
+ return $map($ref)
+ }
+
+ # There are entity references in the replacement text.
+ # Can't use child entity parser since must catch element structures
+
+ return [NormalizeAttValue $opts $map($ref)]
+
+ } elseif {[string compare $options(-entityreferencecommand) "::sgml::noop"]} {
+
+ set result [uplevel #0 $options(-entityreferencecommand) [list $ref]]
+
+ return $result
+
+ } else {
+ return -code error "unable to resolve entity reference \"$ref\""
+ }
+ }
+ }
+}
+
+# xml::tclparser::ParseEntity --
+#
+# Parse general entity declaration
+#
+# Arguments:
+# data text to parse
+#
+# Results:
+# Tcl list containing entity declaration
+
+proc xml::tclparser::ParseEntity data {
+ set data [string trim $data]
+ if {[regexp $::sgml::ExternalEntityExpr $data discard type delimiter1 id1 discard delimiter2 id2 optNDATA ndata]} {
+ switch $type {
+ PUBLIC {
+ return [list external $id2 $id1 $ndata]
+ }
+ SYSTEM {
+ return [list external $id1 {} $ndata]
+ }
+ }
+ } elseif {[regexp {^("|')(.*?)\1$} $data discard delimiter value]} {
+ return [list internal $value]
+ } else {
+ return -code error "badly formed entity declaration"
+ }
+}
+
+# xml::tclparser::delete --
+#
+# Destroy parser data
+#
+# Arguments:
+# name parser object
+#
+# Results:
+# Parser data structure destroyed
+
+proc xml::tclparser::delete name {
+ upvar \#0 [namespace current]::$name parser
+ catch {::sgml::ParserDelete $parser(-statevariable)}
+ catch {unset parser}
+ return {}
+}
+
+# xml::tclparser::get --
+#
+# Retrieve additional information from the parser
+#
+# Arguments:
+# name parser object
+# method info to retrieve
+# args additional arguments for method
+#
+# Results:
+# Depends on method
+
+proc xml::tclparser::get {name method args} {
+ upvar #0 [namespace current]::$name parser
+
+ switch -- $method {
+
+ elementdecl {
+ switch [llength $args] {
+
+ 0 {
+ # Return all element declarations
+ upvar #0 $parser(elementdecls) elements
+ return [array get elements]
+ }
+
+ 1 {
+ # Return specific element declaration
+ upvar #0 $parser(elementdecls) elements
+ if {[info exists elements([lindex $args 0])]} {
+ return [array get elements [lindex $args 0]]
+ } else {
+ return -code error "element \"[lindex $args 0]\" not declared"
+ }
+ }
+
+ default {
+ return -code error "wrong number of arguments: should be \"elementdecl ?element?\""
+ }
+ }
+ }
+
+ attlist {
+ if {[llength $args] != 1} {
+ return -code error "wrong number of arguments: should be \"get attlist element\""
+ }
+
+ upvar #0 $parser(attlistdecls)
+
+ return {}
+ }
+
+ entitydecl {
+ }
+
+ parameterentitydecl {
+ }
+
+ notationdecl {
+ }
+
+ default {
+ return -code error "unknown method \"$method\""
+ }
+ }
+
+ return {}
+}
+
+# xml::tclparser::ExternalEntity --
+#
+# Resolve and parse external entity
+#
+# Arguments:
+# name parser object
+# base base URL
+# sys system identifier
+# pub public identifier
+#
+# Results:
+# External entity is fetched and parsed
+
+proc xml::tclparser::ExternalEntity {name base sys pub} {
+}
+
+# xml::tclparser:: --
+#
+# Reset a parser instance, ready to parse another document
+#
+# Arguments:
+# name parser object
+#
+# Results:
+# Variables unset
+
+proc xml::tclparser::reset {name} {
+ upvar \#0 [namespace current]::$name parser
+
+ # Has this parser object been properly initialised?
+ if {![info exists parser] || \
+ ![info exists parser(-name)]} {
+ return [create $name]
+ }
+
+ array set parser {
+ -final 1
+ depth 0
+ leftover {}
+ }
+
+ foreach var {Entities ExtEntities PEntities ExtPEntities ElDecls AttlistDecls NotDecls} {
+ catch {unset [namespace current]::${var}$name}
+ }
+
+ # Initialise entities with predefined set
+ array set [namespace current]::Entities$name [array get ::sgml::EntityPredef]
+
+ return {}
+}
diff --git a/tclxml/tclxml-tcl/xml-8.0.tcl b/tclxml/tclxml-tcl/xml-8.0.tcl
new file mode 100755
index 0000000..db28423
--- /dev/null
+++ b/tclxml/tclxml-tcl/xml-8.0.tcl
@@ -0,0 +1,92 @@
+# xml-8.0.tcl --
+#
+# This file provides generic XML services for all implementations.
+# This file supports Tcl 8.0 regular expressions.
+#
+# See xmlparse.tcl for the Tcl implementation of a XML parser.
+#
+# Copyright (c) 2005 by Explain.
+# http://www.explain.com.au/
+# Copyright (c) 1998-2004 Zveno Pty Ltd
+# http://www.zveno.com/
+#
+# See the file "LICENSE" in this distribution for information on usage and
+# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
+#
+# $Id: xml-8.0.tcl,v 1.1.1.1 2009/01/16 22:11:49 joye Exp $
+
+package require -exact Tcl 8.0
+
+package require sgml 1.8
+
+package provide xmldefs 3.2
+
+namespace eval xml {
+
+ # Convenience routine
+ proc cl x {
+ return "\[$x\]"
+ }
+
+ # Define various regular expressions
+
+ # Characters
+ variable Char $::sgml::Char
+
+ # white space
+ variable Wsp " \t\r\n"
+ variable noWsp [cl ^$Wsp]
+
+ # Various XML names and tokens
+
+ variable NameChar $::sgml::NameChar
+ variable Name $::sgml::Name
+ variable Names $::sgml::Names
+ variable Nmtoken $::sgml::Nmtoken
+ variable Nmtokens $::sgml::Nmtokens
+
+ # The definition of the Namespace URI for XML Namespaces themselves.
+ # The prefix 'xml' is automatically bound to this URI.
+ variable xmlnsNS http://www.w3.org/XML/1998/namespace
+
+ # Tokenising expressions
+
+ variable tokExpr <(/?)([cl ^$Wsp>/]+)([cl $Wsp]*[cl ^>]*)>
+ variable substExpr "\}\n{\\2} {\\1} {\\3} \{"
+
+ # table of predefined entities
+
+ variable EntityPredef
+ array set EntityPredef {
+ lt < gt > amp & quot \" apos '
+ }
+
+}
+
+###
+### General utility procedures
+###
+
+# xml::noop --
+#
+# A do-nothing proc
+
+proc xml::noop args {}
+
+### Following procedures are based on html_library
+
+# xml::zapWhite --
+#
+# Convert multiple white space into a single space.
+#
+# Arguments:
+# data plain text
+#
+# Results:
+# As above
+
+proc xml::zapWhite data {
+ regsub -all "\[ \t\r\n\]+" $data { } data
+ return $data
+}
+
diff --git a/tclxml/tclxml-tcl/xml-8.1.tcl b/tclxml/tclxml-tcl/xml-8.1.tcl
new file mode 100755
index 0000000..5ec410b
--- /dev/null
+++ b/tclxml/tclxml-tcl/xml-8.1.tcl
@@ -0,0 +1,135 @@
+# xml.tcl --
+#
+# This file provides generic XML services for all implementations.
+# This file supports Tcl 8.1 regular expressions.
+#
+# See tclparser.tcl for the Tcl implementation of a XML parser.
+#
+# Copyright (c) 2005 by Explain.
+# http://www.explain.com.au/
+# Copyright (c) 1998-2004 Zveno Pty Ltd
+# http://www.zveno.com/
+#
+# See the file "LICENSE" in this distribution for information on usage and
+# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
+#
+# $Id: xml-8.1.tcl,v 1.1.1.1 2009/01/16 22:11:49 joye Exp $
+
+package require Tcl 8.1
+
+package provide xmldefs 3.2
+
+package require sgml 1.8
+
+namespace eval xml {
+
+ namespace export qnamesplit
+
+ # Convenience routine
+ proc cl x {
+ return "\[$x\]"
+ }
+
+ # Define various regular expressions
+
+ # Characters
+ variable Char $::sgml::Char
+
+ # white space
+ variable Wsp " \t\r\n"
+ variable allWsp [cl $Wsp]*
+ variable noWsp [cl ^$Wsp]
+
+ # Various XML names and tokens
+
+ variable NameChar $::sgml::NameChar
+ variable Name $::sgml::Name
+ variable Names $::sgml::Names
+ variable Nmtoken $::sgml::Nmtoken
+ variable Nmtokens $::sgml::Nmtokens
+
+ # XML Namespaces names
+
+ # NCName ::= Name - ':'
+ variable NCName $::sgml::Name
+ regsub -all : $NCName {} NCName
+ variable QName (${NCName}:)?$NCName ;# (Prefix ':')? LocalPart
+
+ # The definition of the Namespace URI for XML Namespaces themselves.
+ # The prefix 'xml' is automatically bound to this URI.
+ variable xmlnsNS http://www.w3.org/XML/1998/namespace
+
+ # table of predefined entities
+
+ variable EntityPredef
+ array set EntityPredef {
+ lt < gt > amp & quot \" apos '
+ }
+
+ # Expressions for pulling things apart
+ variable tokExpr <(/?)([::xml::cl ^$::xml::Wsp>/]+)([::xml::cl $::xml::Wsp]*[::xml::cl ^>]*)>
+ variable substExpr "\}\n{\\2} {\\1} {\\3} \{"
+
+}
+
+###
+### Exported procedures
+###
+
+# xml::qnamesplit --
+#
+# Split a QName into its constituent parts:
+# the XML Namespace prefix and the Local-name
+#
+# Arguments:
+# qname XML Qualified Name (see XML Namespaces [6])
+#
+# Results:
+# Returns prefix and local-name as a Tcl list.
+# Error condition returned if the prefix or local-name
+# are not valid NCNames (XML Name)
+
+proc xml::qnamesplit qname {
+ variable NCName
+ variable Name
+
+ set prefix {}
+ set localname $qname
+ if {[regexp : $qname]} {
+ if {![regexp ^($NCName)?:($NCName)\$ $qname discard prefix localname]} {
+ return -code error "name \"$qname\" is not a valid QName"
+ }
+ } elseif {![regexp ^$Name\$ $qname]} {
+ return -code error "name \"$qname\" is not a valid Name"
+ }
+
+ return [list $prefix $localname]
+}
+
+###
+### General utility procedures
+###
+
+# xml::noop --
+#
+# A do-nothing proc
+
+proc xml::noop args {}
+
+### Following procedures are based on html_library
+
+# xml::zapWhite --
+#
+# Convert multiple white space into a single space.
+#
+# Arguments:
+# data plain text
+#
+# Results:
+# As above
+
+proc xml::zapWhite data {
+ regsub -all "\[ \t\r\n\]+" $data { } data
+ return $data
+}
+
diff --git a/tclxml/tclxml-tcl/xml__tcl.tcl b/tclxml/tclxml-tcl/xml__tcl.tcl
new file mode 100644
index 0000000..bdb7bd9
--- /dev/null
+++ b/tclxml/tclxml-tcl/xml__tcl.tcl
@@ -0,0 +1,272 @@
+# xml__tcl.tcl --
+#
+# This file provides a Tcl implementation of the parser
+# class support found in ../tclxml.c. It is only used
+# when the C implementation is not installed (for some reason).
+#
+# Copyright (c) 2005 by Explain.
+# http://www.explain.com.au/
+# Copyright (c) 2000-2004 Zveno Pty Ltd
+# http://www.zveno.com/
+#
+# See the file "LICENSE" in this distribution for information on usage and
+# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
+#
+# $Id: xml__tcl.tcl,v 1.1.1.1 2009/01/16 22:11:49 joye Exp $
+
+package provide xml::tcl 3.2
+
+namespace eval xml {
+ namespace export configure parser parserclass
+
+ # Parser implementation classes
+ variable classes
+ array set classes {}
+
+ # Default parser class
+ variable default {}
+
+ # Counter for generating unique names
+ variable counter 0
+}
+
+# xml::configure --
+#
+# Configure the xml package
+#
+# Arguments:
+# None
+#
+# Results:
+# None (not yet implemented)
+
+proc xml::configure args {}
+
+# xml::parserclass --
+#
+# Implements the xml::parserclass command for managing
+# parser implementations.
+#
+# Arguments:
+# method subcommand
+# args method arguments
+#
+# Results:
+# Depends on method
+
+proc xml::parserclass {method args} {
+ variable classes
+ variable default
+
+ switch -- $method {
+
+ create {
+ if {[llength $args] < 1} {
+ return -code error "wrong number of arguments, should be xml::parserclass create name ?args?"
+ }
+
+ set name [lindex $args 0]
+ if {[llength [lrange $args 1 end]] % 2} {
+ return -code error "missing value for option \"[lindex $args end]\""
+ }
+ array set classes [list $name [list \
+ -createcommand [namespace current]::noop \
+ -createentityparsercommand [namespace current]::noop \
+ -parsecommand [namespace current]::noop \
+ -configurecommand [namespace current]::noop \
+ -getcommand [namespace current]::noop \
+ -deletecommand [namespace current]::noop \
+ ]]
+ # BUG: we're not checking that the arguments are kosher
+ set classes($name) [lrange $args 1 end]
+ set default $name
+ }
+
+ destroy {
+ if {[llength $args] < 1} {
+ return -code error "wrong number of arguments, should be xml::parserclass destroy name"
+ }
+
+ if {[info exists classes([lindex $args 0])]} {
+ unset classes([lindex $args 0])
+ } else {
+ return -code error "no such parser class \"[lindex $args 0]\""
+ }
+ }
+
+ info {
+ if {[llength $args] < 1} {
+ return -code error "wrong number of arguments, should be xml::parserclass info method"
+ }
+
+ switch -- [lindex $args 0] {
+ names {
+ return [array names classes]
+ }
+ default {
+ return $default
+ }
+ }
+ }
+
+ default {
+ return -code error "unknown method \"$method\""
+ }
+ }
+
+ return {}
+}
+
+# xml::parser --
+#
+# Create a parser object instance
+#
+# Arguments:
+# args optional name, configuration options
+#
+# Results:
+# Returns object name. Parser instance created.
+
+proc xml::parser args {
+ variable classes
+ variable default
+
+ if {[llength $args] < 1} {
+ # Create unique name, no options
+ set parserName [FindUniqueName]
+ } else {
+ if {[string index [lindex $args 0] 0] == "-"} {
+ # Create unique name, have options
+ set parserName [FindUniqueName]
+ } else {
+ # Given name, optional options
+ set parserName [lindex $args 0]
+ set args [lrange $args 1 end]
+ }
+ }
+
+ array set options [list \
+ -parser $default
+ ]
+ array set options $args
+
+ if {![info exists classes($options(-parser))]} {
+ return -code error "no such parser class \"$options(-parser)\""
+ }
+
+ # Now create the parser instance command and data structure
+ # The command must be created in the caller's namespace
+ uplevel 1 [list proc $parserName {method args} "eval [namespace current]::ParserCmd [list $parserName] \[list \$method\] \$args"]
+ upvar #0 [namespace current]::$parserName data
+ array set data [list class $options(-parser)]
+
+ array set classinfo $classes($options(-parser))
+ if {[string compare $classinfo(-createcommand) ""]} {
+ eval $classinfo(-createcommand) [list $parserName]
+ }
+ if {[string compare $classinfo(-configurecommand) ""] && \
+ [llength $args]} {
+ eval $classinfo(-configurecommand) [list $parserName] $args
+ }
+
+ return $parserName
+}
+
+# xml::FindUniqueName --
+#
+# Generate unique object name
+#
+# Arguments:
+# None
+#
+# Results:
+# Returns string.
+
+proc xml::FindUniqueName {} {
+ variable counter
+ return xmlparser[incr counter]
+}
+
+# xml::ParserCmd --
+#
+# Implements parser object command
+#
+# Arguments:
+# name object reference
+# method subcommand
+# args method arguments
+#
+# Results:
+# Depends on method
+
+proc xml::ParserCmd {name method args} {
+ variable classes
+ upvar #0 [namespace current]::$name data
+
+ array set classinfo $classes($data(class))
+
+ switch -- $method {
+
+ configure {
+ # BUG: We're not checking for legal options
+ array set data $args
+ eval $classinfo(-configurecommand) [list $name] $args
+ return {}
+ }
+
+ cget {
+ return $data([lindex $args 0])
+ }
+
+ entityparser {
+ set new [FindUniqueName]
+
+ upvar #0 [namespace current]::$name parent
+ upvar #0 [namespace current]::$new data
+ array set data [array get parent]
+
+ uplevel 1 [list proc $new {method args} "eval [namespace current]::ParserCmd [list $new] \[list \$method\] \$args"]
+
+ return [eval $classinfo(-createentityparsercommand) [list $name $new] $args]
+ }
+
+ free {
+ eval $classinfo(-deletecommand) [list $name]
+ unset data
+ uplevel 1 [list rename $name {}]
+ }
+
+ get {
+ eval $classinfo(-getcommand) [list $name] $args
+ }
+
+ parse {
+ if {[llength $args] < 1} {
+ return -code error "wrong number of arguments, should be $name parse xml ?options?"
+ }
+ eval $classinfo(-parsecommand) [list $name] $args
+ }
+
+ reset {
+ eval $classinfo(-resetcommand) [list $name]
+ }
+
+ default {
+ return -code error "unknown method"
+ }
+ }
+
+ return {}
+}
+
+# xml::noop --
+#
+# Do nothing utility proc
+#
+# Arguments:
+# args whatever
+#
+# Results:
+# Nothing happens
+
+proc xml::noop args {}
diff --git a/tclxml/tclxml-tcl/xmldep.tcl b/tclxml/tclxml-tcl/xmldep.tcl
new file mode 100644
index 0000000..bbb2613
--- /dev/null
+++ b/tclxml/tclxml-tcl/xmldep.tcl
@@ -0,0 +1,179 @@
+# xmldep.tcl --
+#
+# Find the dependencies in an XML document.
+# Supports external entities and XSL include/import.
+#
+# TODO:
+# XInclude
+#
+# Copyright (c) 2001-2003 Zveno Pty Ltd
+# http://www.zveno.com/
+#
+# See the file "LICENSE" in this distribution for information on usage and
+# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
+#
+# $Id: xmldep.tcl,v 1.1.1.1 2009/01/16 22:11:49 joye Exp $
+
+package require xml
+
+package provide xml::dep 1.0
+
+namespace eval xml::dep {
+ namespace export depend
+
+ variable extEntities
+ array set extEntities {}
+
+ variable XSLTNS http://www.w3.org/1999/XSL/Transform
+}
+
+# xml::dep::depend --
+#
+# Find the resources which an XML document
+# depends on. The document is parsed
+# sequentially, rather than using DOM, for efficiency.
+#
+# TODO:
+# Asynchronous parsing.
+#
+# Arguments:
+# xml XML document entity
+# args configuration options
+#
+# Results:
+# Returns list of resource (system) identifiers
+
+proc xml::dep::depend {xml args} {
+ variable resources
+ variable entities
+
+ set resources {}
+ catch {unset entities}
+ array set entities {}
+
+ set p [xml::parser \
+ -elementstartcommand [namespace code ElStart] \
+ -doctypecommand [namespace code DocTypeDecl] \
+ -entitydeclcommand [namespace code EntityDecl] \
+ -entityreferencecommand [namespace code EntityReference] \
+ -validate 1 \
+ ]
+ if {[llength $args]} {
+ eval [list $p] configure $args
+ }
+ $p parse $xml
+
+ return $resources
+}
+
+# xml::dep::ElStart --
+#
+# Process start element
+#
+# Arguments:
+# name tag name
+# atlist attribute list
+# args options
+#
+# Results:
+# May add to resources list
+
+proc xml::dep::ElStart {name atlist args} {
+ variable XSLTNS
+ variable resources
+
+ array set opts {
+ -namespace {}
+ }
+ array set opts $args
+
+ switch -- $opts(-namespace) \
+ $XSLTNS {
+ switch $name {
+ import -
+ include {
+ array set attr {
+ href {}
+ }
+ array set attr $atlist
+
+ if {[string length $attr(href)]} {
+ if {[lsearch $resources $attr(href)] < 0} {
+ lappend resources $attr(href)
+ }
+ }
+
+ }
+ }
+ }
+}
+
+# xml::dep::DocTypeDecl --
+#
+# Process Document Type Declaration
+#
+# Arguments:
+# name Document element
+# pubid Public identifier
+# sysid System identifier
+# dtd Internal DTD Subset
+#
+# Results:
+# Resource added to list
+
+proc xml::dep::DocTypeDecl {name pubid sysid dtd} {
+ variable resources
+
+ puts stderr [list DocTypeDecl $name $pubid $sysid dtd]
+
+ if {[string length $sysid] && \
+ [lsearch $resources $sysid] < 0} {
+ lappend resources $sysid
+ }
+
+ return {}
+}
+
+# xml::dep::EntityDecl --
+#
+# Process entity declaration, looking for external entity
+#
+# Arguments:
+# name entity name
+# sysid system identifier
+# pubid public identifier or repl. text
+#
+# Results:
+# Store external entity info for later reference
+
+proc xml::dep::EntityDecl {name sysid pubid} {
+ variable extEntities
+
+ puts stderr [list EntityDecl $name $sysid $pubid]
+
+ set extEntities($name) $sysid
+}
+
+# xml::dep::EntityReference --
+#
+# Process entity reference
+#
+# Arguments:
+# name entity name
+#
+# Results:
+# May add to resources list
+
+proc xml::dep::EntityReference name {
+ variable extEntities
+ variable resources
+
+ puts stderr [list EntityReference $name]
+
+ if {[info exists extEntities($name)] && \
+ [lsearch $resources $extEntities($name)] < 0} {
+ lappend resources $extEntities($name)
+ }
+
+}
+
diff --git a/tclxml/tclxml-tcl/xpath.tcl b/tclxml/tclxml-tcl/xpath.tcl
new file mode 100644
index 0000000..e772e67
--- /dev/null
+++ b/tclxml/tclxml-tcl/xpath.tcl
@@ -0,0 +1,362 @@
+# xpath.tcl --
+#
+# Provides an XPath parser for Tcl,
+# plus various support procedures
+#
+# Copyright (c) 2000-2003 Zveno Pty Ltd
+#
+# See the file "LICENSE" in this distribution for information on usage and
+# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
+#
+# $Id: xpath.tcl,v 1.1.1.1 2009/01/16 22:11:49 joye Exp $
+
+package provide xpath 1.0
+
+# We need the XML package for definition of Names
+package require xml
+
+namespace eval xpath {
+ namespace export split join createnode
+
+ variable axes {
+ ancestor
+ ancestor-or-self
+ attribute
+ child
+ descendant
+ descendant-or-self
+ following
+ following-sibling
+ namespace
+ parent
+ preceding
+ preceding-sibling
+ self
+ }
+
+ variable nodeTypes {
+ comment
+ text
+ processing-instruction
+ node
+ }
+
+ # NB. QName has parens for prefix
+
+ variable nodetestExpr ^(${::xml::QName})${::xml::allWsp}(\\(${::xml::allWsp}(("|')(.*?)\\5)?${::xml::allWsp}\\))?${::xml::allWsp}(.*)
+
+ variable nodetestExpr2 ((($::xml::QName)${::xml::allWsp}(\\(${::xml::allWsp}(("|')(.*?)\\7)?${::xml::allWsp}\\))?)|${::xml::allWsp}(\\*))${::xml::allWsp}(.*)
+}
+
+# xpath::split --
+#
+# Parse an XPath location path
+#
+# Arguments:
+# locpath location path
+#
+# Results:
+# A Tcl list representing the location path.
+# The list has the form: {{axis node-test {predicate predicate ...}} ...}
+# Where each list item is a location step.
+
+proc xpath::split locpath {
+ set leftover {}
+
+ set result [InnerSplit $locpath leftover]
+
+ if {[string length [string trim $leftover]]} {
+ return -code error "unexpected text \"$leftover\""
+ }
+
+ return $result
+}
+
+proc xpath::InnerSplit {locpath leftoverVar} {
+ upvar $leftoverVar leftover
+
+ variable axes
+ variable nodetestExpr
+ variable nodetestExpr2
+
+ # First determine whether we have an absolute location path
+ if {[regexp {^/(.*)} $locpath discard locpath]} {
+ set path {{}}
+ } else {
+ set path {}
+ }
+
+ while {[string length [string trimleft $locpath]]} {
+ if {[regexp {^\.\.(.*)} $locpath discard locpath]} {
+ # .. abbreviation
+ set axis parent
+ set nodetest *
+ } elseif {[regexp {^/(.*)} $locpath discard locpath]} {
+ # // abbreviation
+ set axis descendant-or-self
+ if {[regexp ^$nodetestExpr2 [string trimleft $locpath] discard discard discard nodetest discard typetest discard discard literal wildcard locpath]} {
+ set nodetest [ResolveWildcard $nodetest $typetest $wildcard $literal]
+ } else {
+ set leftover $locpath
+ return $path
+ }
+ } elseif {[regexp ^\\.${::xml::allWsp}(.*) $locpath discard locpath]} {
+ # . abbreviation
+ set axis self
+ set nodetest *
+ } elseif {[regexp ^@($::xml::QName)${::xml::allWsp}=${::xml::allWsp}"(\[^"\])"(.*) $locpath discard attrName discard attrValue locpath]} {
+ # @ abbreviation
+ set axis attribute
+ set nodetest $attrName
+ } elseif {[regexp ^@($::xml::QName)${::xml::allWsp}=${::xml::allWsp}'(\[^'\])'(.*) $locpath discard attrName discard attrValue locpath]} {
+ # @ abbreviation
+ set axis attribute
+ set nodetest $attrName
+ } elseif {[regexp ^@($::xml::QName)(.*) $locpath discard attrName discard2 locpath]} {
+ # @ abbreviation
+ set axis attribute
+ set nodetest $attrName
+ } elseif {[regexp ^((${::xml::QName})${::xml::allWsp}::${::xml::allWsp})?\\*(.*) $locpath discard discard axis discard locpath]} {
+ # wildcard specified
+ set nodetest *
+ if {![string length $axis]} {
+ set axis child
+ }
+ } elseif {[regexp ^((${::xml::QName})${::xml::allWsp}::${::xml::allWsp})?$nodetestExpr2 $locpath discard discard axis discard discard discard nodetest discard typetest discard discard literal wildcard locpath]} {
+ # nodetest, with or without axis
+ if {![string length $axis]} {
+ set axis child
+ }
+ set nodetest [ResolveWildcard $nodetest $typetest $wildcard $literal]
+ } else {
+ set leftover $locpath
+ return $path
+ }
+
+ # ParsePredicates
+ set predicates {}
+ set locpath [string trimleft $locpath]
+ while {[regexp {^\[(.*)} $locpath discard locpath]} {
+ if {[regexp {^([0-9]+)(\].*)} [string trim $locpath] discard posn locpath]} {
+ set predicate [list = {function position {}} [list number $posn]]
+ } else {
+ set leftover2 {}
+ set predicate [ParseExpr $locpath leftover2]
+ set locpath $leftover2
+ unset leftover2
+ }
+
+ if {[regexp {^\](.*)} [string trimleft $locpath] discard locpath]} {
+ lappend predicates $predicate
+ } else {
+ return -code error "unexpected text in predicate \"$locpath\""
+ }
+ }
+
+ set axis [string trim $axis]
+ set nodetest [string trim $nodetest]
+
+ # This step completed
+ if {[lsearch $axes $axis] < 0} {
+ return -code error "invalid axis \"$axis\""
+ }
+ lappend path [list $axis $nodetest $predicates]
+
+ # Move to next step
+
+ if {[string length $locpath] && ![regexp ^/(.*) $locpath discard locpath]} {
+ set leftover $locpath
+ return $path
+ }
+
+ }
+
+ return $path
+}
+
+# xpath::ParseExpr --
+#
+# Parse one expression in a predicate
+#
+# Arguments:
+# locpath location path to parse
+# leftoverVar Name of variable in which to store remaining path
+#
+# Results:
+# Returns parsed expression as a Tcl list
+
+proc xpath::ParseExpr {locpath leftoverVar} {
+ upvar $leftoverVar leftover
+ variable nodeTypes
+
+ set expr {}
+ set mode expr
+ set stack {}
+
+ while {[string index [string trimleft $locpath] 0] != "\]"} {
+ set locpath [string trimleft $locpath]
+ switch $mode {
+ expr {
+ # We're looking for a term
+ if {[regexp ^-(.*) $locpath discard locpath]} {
+ # UnaryExpr
+ lappend stack "-"
+ } elseif {[regexp ^\\\$({$::xml::QName})(.*) $locpath discard varname discard locpath]} {
+ # VariableReference
+ lappend stack [list varRef $varname]
+ set mode term
+ } elseif {[regexp {^\((.*)} $locpath discard locpath]} {
+ # Start grouping
+ set leftover2 {}
+ lappend stack [list group [ParseExpr $locpath leftover2]]
+ set locpath $leftover2
+ unset leftover2
+
+ if {[regexp {^\)(.*)} [string trimleft $locpath] discard locpath]} {
+ set mode term
+ } else {
+ return -code error "unexpected text \"$locpath\", expected \")\""
+ }
+
+ } elseif {[regexp {^"([^"]*)"(.*)} $locpath discard literal locpath]} {
+ # Literal (" delimited)
+ lappend stack [list literal $literal]
+ set mode term
+ } elseif {[regexp {^'([^']*)'(.*)} $locpath discard literal locpath]} {
+ # Literal (' delimited)
+ lappend stack [list literal $literal]
+ set mode term
+ } elseif {[regexp {^([0-9]+(\.[0-9]+)?)(.*)} $locpath discard number discard locpath]} {
+ # Number
+ lappend stack [list number $number]
+ set mode term
+ } elseif {[regexp {^(\.[0-9]+)(.*)} $locpath discard number locpath]} {
+ # Number
+ lappend stack [list number $number]
+ set mode term
+ } elseif {[regexp ^(${::xml::QName})\\(${::xml::allWsp}(.*) $locpath discard functionName discard locpath]} {
+ # Function call start or abbreviated node-type test
+
+ if {[lsearch $nodeTypes $functionName] >= 0} {
+ # Looking like a node-type test
+ if {[regexp ^\\)${::xml::allWsp}(.*) $locpath discard locpath]} {
+ lappend stack [list path [list child [list $functionName ()] {}]]
+ set mode term
+ } else {
+ return -code error "invalid node-type test \"$functionName\""
+ }
+ } else {
+ if {[regexp ^\\)${::xml::allWsp}(.*) $locpath discard locpath]} {
+ set parameters {}
+ } else {
+ set leftover2 {}
+ set parameters [ParseExpr $locpath leftover2]
+ set locpath $leftover2
+ unset leftover2
+ while {[regexp {^,(.*)} $locpath discard locpath]} {
+ set leftover2 {}
+ lappend parameters [ParseExpr $locpath leftover2]
+ set locpath $leftover2
+ unset leftover2
+ }
+
+ if {![regexp ^\\)${::xml::allWsp}(.*) [string trimleft $locpath] discard locpath]} {
+ return -code error "unexpected text \"locpath\" - expected \")\""
+ }
+ }
+
+ lappend stack [list function $functionName $parameters]
+ set mode term
+ }
+
+ } else {
+ # LocationPath
+ set leftover2 {}
+ lappend stack [list path [InnerSplit $locpath leftover2]]
+ set locpath $leftover2
+ unset leftover2
+ set mode term
+ }
+ }
+ term {
+ # We're looking for an expression operator
+ if {[regexp ^-(.*) $locpath discard locpath]} {
+ # UnaryExpr
+ set stack [linsert $stack 0 expr "-"]
+ set mode expr
+ } elseif {[regexp ^(and|or|\\=|!\\=|<|>|<\\=|>\\=|\\||\\+|\\-|\\*|div|mod)(.*) $locpath discard exprtype locpath]} {
+ # AndExpr, OrExpr, EqualityExpr, RelationalExpr or UnionExpr
+ set stack [linsert $stack 0 $exprtype]
+ set mode expr
+ } else {
+ return -code error "unexpected text \"$locpath\", expecting operator"
+ }
+ }
+ default {
+ # Should never be here!
+ return -code error "internal error"
+ }
+ }
+ }
+
+ set leftover $locpath
+ return $stack
+}
+
+# xpath::ResolveWildcard --
+
+proc xpath::ResolveWildcard {nodetest typetest wildcard literal} {
+ variable nodeTypes
+
+ switch -glob -- [string length $nodetest],[string length $typetest],[string length $wildcard],[string length $literal] {
+ 0,0,0,* {
+ return -code error "bad location step (nothing parsed)"
+ }
+ 0,0,* {
+ # Name wildcard specified
+ return *
+ }
+ *,0,0,* {
+ # Element type test - nothing to do
+ return $nodetest
+ }
+ *,0,*,* {
+ # Internal error?
+ return -code error "bad location step (found both nodetest and wildcard)"
+ }
+ *,*,0,0 {
+ # Node type test
+ if {[lsearch $nodeTypes $nodetest] < 0} {
+ return -code error "unknown node type \"$typetest\""
+ }
+ return [list $nodetest $typetest]
+ }
+ *,*,0,* {
+ # Node type test
+ if {[lsearch $nodeTypes $nodetest] < 0} {
+ return -code error "unknown node type \"$typetest\""
+ }
+ return [list $nodetest $literal]
+ }
+ default {
+ # Internal error?
+ return -code error "bad location step"
+ }
+ }
+}
+
+# xpath::join --
+#
+# Reconstitute an XPath location path from a
+# Tcl list representation.
+#
+# Arguments:
+# spath split path
+#
+# Results:
+# Returns an Xpath location path
+
+proc xpath::join spath {
+ return -code error "not yet implemented"
+}
+