diff options
author | William Joye <wjoye@cfa.harvard.edu> | 2019-01-02 20:21:08 (GMT) |
---|---|---|
committer | William Joye <wjoye@cfa.harvard.edu> | 2019-01-02 20:21:08 (GMT) |
commit | 8f5c6286538d2f0c762edeacf21f85eeeed1fe0b (patch) | |
tree | 658268b7e04eed42e7cecc6338f5ef7d18cb788a /tclxml/tclxml-tcl | |
parent | 7a0b03ed0c0b01990f519655fdd6b18e790c5ecc (diff) | |
download | blt-8f5c6286538d2f0c762edeacf21f85eeeed1fe0b.zip blt-8f5c6286538d2f0c762edeacf21f85eeeed1fe0b.tar.gz blt-8f5c6286538d2f0c762edeacf21f85eeeed1fe0b.tar.bz2 |
update TEA 3.13
Diffstat (limited to 'tclxml/tclxml-tcl')
-rwxr-xr-x | tclxml/tclxml-tcl/sgml-8.0.tcl | 143 | ||||
-rwxr-xr-x | tclxml/tclxml-tcl/sgml-8.1.tcl | 143 | ||||
-rwxr-xr-x | tclxml/tclxml-tcl/sgmlparser.tcl | 2814 | ||||
-rwxr-xr-x | tclxml/tclxml-tcl/tclparser-8.0.tcl | 359 | ||||
-rwxr-xr-x | tclxml/tclxml-tcl/tclparser-8.1.tcl | 614 | ||||
-rwxr-xr-x | tclxml/tclxml-tcl/xml-8.0.tcl | 92 | ||||
-rwxr-xr-x | tclxml/tclxml-tcl/xml-8.1.tcl | 135 | ||||
-rw-r--r-- | tclxml/tclxml-tcl/xml__tcl.tcl | 272 | ||||
-rw-r--r-- | tclxml/tclxml-tcl/xmldep.tcl | 179 | ||||
-rw-r--r-- | tclxml/tclxml-tcl/xpath.tcl | 362 |
10 files changed, 0 insertions, 5113 deletions
diff --git a/tclxml/tclxml-tcl/sgml-8.0.tcl b/tclxml/tclxml-tcl/sgml-8.0.tcl deleted file mode 100755 index f1179cf..0000000 --- a/tclxml/tclxml-tcl/sgml-8.0.tcl +++ /dev/null @@ -1,143 +0,0 @@ -# sgml-8.0.tcl -- -# -# This file provides generic parsing services for SGML-based -# languages, namely HTML and XML. -# This file supports Tcl 8.0 characters and regular expressions. -# -# NB. It is a misnomer. There is no support for parsing -# arbitrary SGML as such. -# -# Copyright (c) 1998,1999 Zveno Pty Ltd -# http://www.zveno.com/ -# -# See the file "LICENSE" in this distribution for information on usage and -# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES. -# -# $Id: sgml-8.0.tcl,v 1.1.1.1 2009/01/16 22:11:49 joye Exp $ - -package require -exact Tcl 8.0 - -package provide sgml 1.9 - -namespace eval sgml { - - # Convenience routine - proc cl x { - return "\[$x\]" - } - - # Define various regular expressions - - # Character classes - variable Char \t\n\r\ -\xFF - variable BaseChar A-Za-z - variable Letter $BaseChar - variable Digit 0-9 - variable CombiningChar {} - variable Extender {} - variable Ideographic {} - - # white space - variable Wsp " \t\r\n" - variable noWsp [cl ^$Wsp] - - # Various XML names - variable NameChar \[-$Letter$Digit._:$CombiningChar$Extender\] - variable Name \[_:$BaseChar$Ideographic\]$NameChar* - variable Names ${Name}(?:$Wsp$Name)* - variable Nmtoken $NameChar+ - variable Nmtokens ${Nmtoken}(?:$Wsp$Nmtoken)* - - # table of predefined entities for XML - - variable EntityPredef - array set EntityPredef { - lt < gt > amp & quot \" apos ' - } - -} - -# These regular expressions are defined here once for better performance - -namespace eval sgml { - variable Wsp - - # Watch out for case-sensitivity - - set attlist_exp [cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*(#REQUIRED|#IMPLIED) - set attlist_enum_exp [cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*\\(([cl ^)]*)\\)[cl $Wsp]*("([cl ^")])")? ;# " - set attlist_fixed_exp [cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*(#FIXED)[cl $Wsp]*([cl ^$Wsp]+) - - set param_entity_exp [cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*([cl ^"$Wsp]*)[cl $Wsp]*"([cl ^"]*)" - - set notation_exp [cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*(.*) - -} - -### Utility procedures - -# sgml::noop -- -# -# A do-nothing proc -# -# Arguments: -# args arguments -# -# Results: -# Nothing. - -proc sgml::noop args { - return 0 -} - -# sgml::identity -- -# -# Identity function. -# -# Arguments: -# a arbitrary argument -# -# Results: -# $a - -proc sgml::identity a { - return $a -} - -# sgml::Error -- -# -# Throw an error -# -# Arguments: -# args arguments -# -# Results: -# Error return condition. - -proc sgml::Error args { - uplevel return -code error [list $args] -} - -### Following procedures are based on html_library - -# sgml::zapWhite -- -# -# Convert multiple white space into a single space. -# -# Arguments: -# data plain text -# -# Results: -# As above - -proc sgml::zapWhite data { - regsub -all "\[ \t\r\n\]+" $data { } data - return $data -} - -proc sgml::Boolean value { - regsub {1|true|yes|on} $value 1 value - regsub {0|false|no|off} $value 0 value - return $value -} - diff --git a/tclxml/tclxml-tcl/sgml-8.1.tcl b/tclxml/tclxml-tcl/sgml-8.1.tcl deleted file mode 100755 index 60748bb..0000000 --- a/tclxml/tclxml-tcl/sgml-8.1.tcl +++ /dev/null @@ -1,143 +0,0 @@ -# sgml-8.1.tcl -- -# -# This file provides generic parsing services for SGML-based -# languages, namely HTML and XML. -# This file supports Tcl 8.1 characters and regular expressions. -# -# NB. It is a misnomer. There is no support for parsing -# arbitrary SGML as such. -# -# Copyright (c) 1998-2003 Zveno Pty Ltd -# http://www.zveno.com/ -# -# See the file "LICENSE" in this distribution for information on usage and -# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES. -# -# $Id: sgml-8.1.tcl,v 1.1.1.1 2009/01/16 22:11:49 joye Exp $ - -package require Tcl 8.1 - -package provide sgml 1.9 - -namespace eval sgml { - - # Convenience routine - proc cl x { - return "\[$x\]" - } - - # Define various regular expressions - - # Character classes - variable Char \t\n\r\ -\uD7FF\uE000-\uFFFD\u10000-\u10FFFF - variable BaseChar \u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u0131\u0134-\u013E\u0141-\u0148\u014A-\u017E\u0180-\u01C3\u01CD-\u01F0\u01F4-\u01F5\u01FA-\u0217\u0250-\u02A8\u02BB-\u02C1\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D6\u03DA\u03DC\u03DE\u03E0\u03E2-\u03F3\u0401-\u040C\u040E-\u044F\u0451-\u045C\u045E-\u0481\u0490-\u04C4\u04C7-\u04C8\u04CB-\u04CC\u04D0-\u04EB\u04EE-\u04F5\u04F8-\u04F9\u0531-\u0556\u0559\u0561-\u0586\u05D0-\u05EA\u05F0-\u05F2\u0621-\u063A\u0641-\u064A\u0671-\u06B7\u06BA-\u06BE\u06C0-\u06CE\u06D0-\u06D3\u06D5\u06E5-\u06E6\u0905-\u0939\u093D\u0958-\u0961\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09DC-\u09DD\u09DF-\u09E1\u09F0-\u09F1\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8B\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABD\u0AE0\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B36-\u0B39\u0B3D\u0B5C-\u0B5D\u0B5F-\u0B61\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C60-\u0C61\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CDE\u0CE0-\u0CE1\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D60-\u0D61\u0E01-\u0E2E\u0E30\u0E32-\u0E33\u0E40-\u0E45\u0E81-\u0E82\u0E84\u0E87-\u0E88\u0E8A\u0E8D\u0E94-\u0E97\u0E99-\u0E9F\u0EA1-\u0EA3\u0EA5\u0EA7\u0EAA-\u0EAB\u0EAD-\u0EAE\u0EB0\u0EB2-\u0EB3\u0EBD\u0EC0-\u0EC4\u0F40-\u0F47\u0F49-\u0F69\u10A0-\u10C5\u10D0-\u10F6\u1100\u1102-\u1103\u1105-\u1107\u1109\u110B-\u110C\u110E-\u1112\u113C\u113E\u1140\u114C\u114E\u1150\u1154-\u1155\u1159\u115F-\u1161\u1163\u1165\u1167\u1169\u116D-\u116E\u1172-\u1173\u1175\u119E\u11A8\u11AB\u11AE-\u11AF\u11B7-\u11B8\u11BA\u11BC-\u11C2\u11EB\u11F0\u11F9\u1E00-\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FCC\u1FD0-\u1FD3\u1FD6-\u1FDB\u1FE0-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126\u212A-\u212B\u212E\u2180-\u2182\u3041-\u3094\u30A1-\u30FA\u3105-\u312C\uAC00-\uD7A3 - variable Ideographic \u4E00-\u9FA5\u3007\u3021-\u3029 - variable CombiningChar \u0300-\u0345\u0360-\u0361\u0483-\u0486\u0591-\u05A1\u05A3-\u05B9\u05BB-\u05BD\u05BF\u05C1-\u05C2\u05C4\u064B-\u0652\u0670\u06D6-\u06DC\u06DD-\u06DF\u06E0-\u06E4\u06E7-\u06E8\u06EA-\u06ED\u0901-\u0903\u093C\u093E-\u094C\u094D\u0951-\u0954\u0962-\u0963\u0981-\u0983\u09BC\u09BE\u09BF\u09C0-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09E2-\u09E3\u0A02\u0A3C\u0A3E\u0A3F\u0A40-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A70-\u0A71\u0A81-\u0A83\u0ABC\u0ABE-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0B01-\u0B03\u0B3C\u0B3E-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B82-\u0B83\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0C01-\u0C03\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C82-\u0C83\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0D02-\u0D03\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0E31\u0E34-\u0E3A\u0E47-\u0E4E\u0EB1\u0EB4-\u0EB9\u0EBB-\u0EBC\u0EC8-\u0ECD\u0F18-\u0F19\u0F35\u0F37\u0F39\u0F3E\u0F3F\u0F71-\u0F84\u0F86-\u0F8B\u0F90-\u0F95\u0F97\u0F99-\u0FAD\u0FB1-\u0FB7\u0FB9\u20D0-\u20DC\u20E1\u302A-\u302F\u3099\u309A - variable Digit \u0030-\u0039\u0660-\u0669\u06F0-\u06F9\u0966-\u096F\u09E6-\u09EF\u0A66-\u0A6F\u0AE6-\u0AEF\u0B66-\u0B6F\u0BE7-\u0BEF\u0C66-\u0C6F\u0CE6-\u0CEF\u0D66-\u0D6F\u0E50-\u0E59\u0ED0-\u0ED9\u0F20-\u0F29 - variable Extender \u00B7\u02D0\u02D1\u0387\u0640\u0E46\u0EC6\u3005\u3031-\u3035\u309D-\u309E\u30FC-\u30FE - variable Letter $BaseChar|$Ideographic - - # white space - variable Wsp " \t\r\n" - variable noWsp [cl ^$Wsp] - - # Various XML names - variable NameChar \[-$Letter$Digit._:$CombiningChar$Extender\] - variable Name \[_:$BaseChar$Ideographic\]$NameChar* - variable Names ${Name}(?:$Wsp$Name)* - variable Nmtoken $NameChar+ - variable Nmtokens ${Nmtoken}(?:$Wsp$Nmtoken)* - - # table of predefined entities for XML - - variable EntityPredef - array set EntityPredef { - lt < gt > amp & quot \" apos ' - } - -} - -# These regular expressions are defined here once for better performance - -namespace eval sgml { - variable Wsp - - # Watch out for case-sensitivity - - set attlist_exp [cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*(#REQUIRED|#IMPLIED) - set attlist_enum_exp [cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*\\(([cl ^)]*)\\)[cl $Wsp]*("([cl ^")]*)")? ;# " - set attlist_fixed_exp [cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*(#FIXED)[cl $Wsp]*([cl ^$Wsp]+) - - set param_entity_exp [cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*([cl ^"$Wsp]*)[cl $Wsp]*"([cl ^"]*)" - - set notation_exp [cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*(.*) - -} - -### Utility procedures - -# sgml::noop -- -# -# A do-nothing proc -# -# Arguments: -# args arguments -# -# Results: -# Nothing. - -proc sgml::noop args { - return 0 -} - -# sgml::identity -- -# -# Identity function. -# -# Arguments: -# a arbitrary argument -# -# Results: -# $a - -proc sgml::identity a { - return $a -} - -# sgml::Error -- -# -# Throw an error -# -# Arguments: -# args arguments -# -# Results: -# Error return condition. - -proc sgml::Error args { - uplevel return -code error [list $args] -} - -### Following procedures are based on html_library - -# sgml::zapWhite -- -# -# Convert multiple white space into a single space. -# -# Arguments: -# data plain text -# -# Results: -# As above - -proc sgml::zapWhite data { - regsub -all "\[ \t\r\n\]+" $data { } data - return $data -} - -proc sgml::Boolean value { - regsub {1|true|yes|on} $value 1 value - regsub {0|false|no|off} $value 0 value - return $value -} - diff --git a/tclxml/tclxml-tcl/sgmlparser.tcl b/tclxml/tclxml-tcl/sgmlparser.tcl deleted file mode 100755 index 2677a44..0000000 --- a/tclxml/tclxml-tcl/sgmlparser.tcl +++ /dev/null @@ -1,2814 +0,0 @@ -# sgmlparser.tcl -- -# -# This file provides the generic part of a parser for SGML-based -# languages, namely HTML and XML. -# -# NB. It is a misnomer. There is no support for parsing -# arbitrary SGML as such. -# -# See sgml.tcl for variable definitions. -# -# Copyright (c) 2008 Explain -# http://www.explain.com.au/ -# Copyright (c) 1998-2003 Zveno Pty Ltd -# http://www.zveno.com/ -# -# See the file "LICENSE" in this distribution for information on usage and -# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES. -# -# $Id: sgmlparser.tcl,v 1.1.1.1 2009/01/16 22:11:49 joye Exp $ - -package require sgml 1.9 - -package require uri 1.1 - -package provide sgmlparser 1.1 - -namespace eval sgml { - namespace export tokenise parseEvent - - namespace export parseDTD - - # NB. Most namespace variables are defined in sgml-8.[01].tcl - # to account for differences between versions of Tcl. - # This especially includes the regular expressions used. - - variable ParseEventNum - if {![info exists ParseEventNum]} { - set ParseEventNum 0 - } - variable ParseDTDnum - if {![info exists ParseDTDNum]} { - set ParseDTDNum 0 - } - - variable declExpr [cl $::sgml::Wsp]*([cl ^$::sgml::Wsp]+)[cl $::sgml::Wsp]*([cl ^>]*) - variable EntityExpr [cl $::sgml::Wsp]*(%[cl $::sgml::Wsp])?[cl $::sgml::Wsp]*($::sgml::Name)[cl $::sgml::Wsp]+(.*) - - #variable MarkupDeclExpr <([cl ^$::sgml::Wsp>]+)[cl $::sgml::Wsp]*([cl ^$::sgml::Wsp]+)[cl $::sgml::Wsp]*([cl ^>]*)> - #variable MarkupDeclSub "} {\\1} {\\2} {\\3} {" - variable MarkupDeclExpr <[cl $::sgml::Wsp]*([cl ^$::sgml::Wsp>]+)[cl $::sgml::Wsp]*([cl ^>]*)> - variable MarkupDeclSub "\} {\\1} {\\2} \{" - - variable ExternalEntityExpr ^(PUBLIC|SYSTEM)[cl $::sgml::Wsp]+("|')(.*?)\\2([cl $::sgml::Wsp]+("|')(.*?)\\2)?([cl $::sgml::Wsp]+NDATA[cl $::sgml::Wsp]+($::xml::Name))?\$ - - variable StdOptions - array set StdOptions [list \ - -elementstartcommand [namespace current]::noop \ - -elementendcommand [namespace current]::noop \ - -characterdatacommand [namespace current]::noop \ - -processinginstructioncommand [namespace current]::noop \ - -externalentitycommand {} \ - -xmldeclcommand [namespace current]::noop \ - -doctypecommand [namespace current]::noop \ - -commentcommand [namespace current]::noop \ - -entitydeclcommand [namespace current]::noop \ - -unparsedentitydeclcommand [namespace current]::noop \ - -parameterentitydeclcommand [namespace current]::noop \ - -notationdeclcommand [namespace current]::noop \ - -elementdeclcommand [namespace current]::noop \ - -attlistdeclcommand [namespace current]::noop \ - -paramentityparsing 1 \ - -defaultexpandinternalentities 1 \ - -startdoctypedeclcommand [namespace current]::noop \ - -enddoctypedeclcommand [namespace current]::noop \ - -entityreferencecommand {} \ - -warningcommand [namespace current]::noop \ - -errorcommand [namespace current]::Error \ - -final 1 \ - -validate 0 \ - -baseuri {} \ - -name {} \ - -cmd {} \ - -emptyelement [namespace current]::EmptyElement \ - -parseattributelistcommand [namespace current]::noop \ - -parseentitydeclcommand [namespace current]::noop \ - -normalize 1 \ - -internaldtd {} \ - -reportempty 0 \ - -ignorewhitespace 0 \ - ] -} - -# sgml::tokenise -- -# -# Transform the given HTML/XML text into a Tcl list. -# -# Arguments: -# sgml text to tokenize -# elemExpr RE to recognise tags -# elemSub transform for matched tags -# args options -# -# Valid Options: -# -internaldtdvariable -# -final boolean True if no more data is to be supplied -# -statevariable varName Name of a variable used to store info -# -# Results: -# Returns a Tcl list representing the document. - -proc sgml::tokenise {sgml elemExpr elemSub args} { - array set options {-final 1} - array set options $args - set options(-final) [Boolean $options(-final)] - - # If the data is not final then there must be a variable to store - # unused data. - if {!$options(-final) && ![info exists options(-statevariable)]} { - return -code error {option "-statevariable" required if not final} - } - - # Pre-process stage - # - # Extract the internal DTD subset, if any - - catch {upvar #0 $options(-internaldtdvariable) dtd} - if {[regexp {<!DOCTYPE[^[<]+\[([^]]+)\]} $sgml discard dtd]} { - regsub {(<!DOCTYPE[^[<]+)(\[[^]]+\])} $sgml {\1\&xml:intdtd;} sgml - } - - # Protect Tcl special characters - regsub -all {([{}\\])} $sgml {\\\1} sgml - - # Do the translation - - if {[info exists options(-statevariable)]} { - # Mats: Several rewrites here to handle -final 0 option. - # If any cached unparsed xml (state(leftover)), prepend it. - upvar #0 $options(-statevariable) state - if {[string length $state(leftover)]} { - regsub -all $elemExpr $state(leftover)$sgml $elemSub sgml - set state(leftover) {} - } else { - regsub -all $elemExpr $sgml $elemSub sgml - } - set sgml "{} {} {} \{$sgml\}" - - # Performance note (Tcl 8.0): - # Use of lindex, lreplace will cause parsing to list object - - # This RE only fixes chopped inside tags, not chopped text. - if {[regexp {^([^<]*)(<[^>]*$)} [lindex $sgml end] x text rest]} { - set sgml [lreplace $sgml end end $text] - # Mats: unmatched stuff means that it is chopped off. Cache it for next round. - set state(leftover) $rest - } - - # Patch from bug report #596959, Marshall Rose - if {[string compare [lindex $sgml 4] ""]} { - set sgml [linsert $sgml 0 {} {} {} {} {}] - } - - } else { - - # Performance note (Tcl 8.0): - # In this case, no conversion to list object is performed - - # Mats: This fails if not -final and $sgml is chopped off right in a tag. - regsub -all $elemExpr $sgml $elemSub sgml - set sgml "{} {} {} \{$sgml\}" - } - - return $sgml - -} - -# sgml::parseEvent -- -# -# Produces an event stream for a XML/HTML document, -# given the Tcl list format returned by tokenise. -# -# This procedure checks that the document is well-formed, -# and throws an error if the document is found to be not -# well formed. Warnings are passed via the -warningcommand script. -# -# The procedure only check for well-formedness, -# no DTD is required. However, facilities are provided for entity expansion. -# -# Arguments: -# sgml Instance data, as a Tcl list. -# args option/value pairs -# -# Valid Options: -# -final Indicates end of document data -# -validate Boolean to enable validation -# -baseuri URL for resolving relative URLs -# -elementstartcommand Called when an element starts -# -elementendcommand Called when an element ends -# -characterdatacommand Called when character data occurs -# -entityreferencecommand Called when an entity reference occurs -# -processinginstructioncommand Called when a PI occurs -# -externalentitycommand Called for an external entity reference -# -# -xmldeclcommand Called when the XML declaration occurs -# -doctypecommand Called when the document type declaration occurs -# -commentcommand Called when a comment occurs -# -entitydeclcommand Called when a parsed entity is declared -# -unparsedentitydeclcommand Called when an unparsed external entity is declared -# -parameterentitydeclcommand Called when a parameter entity is declared -# -notationdeclcommand Called when a notation is declared -# -elementdeclcommand Called when an element is declared -# -attlistdeclcommand Called when an attribute list is declared -# -paramentityparsing Boolean to enable/disable parameter entity substitution -# -defaultexpandinternalentities Boolean to enable/disable expansion of entities declared in internal DTD subset -# -# -startdoctypedeclcommand Called when the Doc Type declaration starts (see also -doctypecommand) -# -enddoctypedeclcommand Called when the Doc Type declaration ends (see also -doctypecommand) -# -# -errorcommand Script to evaluate for a fatal error -# -warningcommand Script to evaluate for a reportable warning -# -statevariable global state variable -# -normalize whether to normalize names -# -reportempty whether to include an indication of empty elements -# -ignorewhitespace whether to automatically strip whitespace -# -# Results: -# The various callback scripts are invoked. -# Returns empty string. -# -# BUGS: -# If command options are set to empty string then they should not be invoked. - -proc sgml::parseEvent {sgml args} { - variable Wsp - variable noWsp - variable Nmtoken - variable Name - variable ParseEventNum - variable StdOptions - - array set options [array get StdOptions] - catch {array set options $args} - - # Mats: - # If the data is not final then there must be a variable to persistently store the parse state. - if {!$options(-final) && ![info exists options(-statevariable)]} { - return -code error {option "-statevariable" required if not final} - } - - foreach {opt value} [array get options *command] { - if {[string compare $opt "-externalentitycommand"] && ![string length $value]} { - set options($opt) [namespace current]::noop - } - } - - if {![info exists options(-statevariable)]} { - set options(-statevariable) [namespace current]::ParseEvent[incr ParseEventNum] - } - if {![info exists options(entities)]} { - set options(entities) [namespace current]::Entities$ParseEventNum - array set $options(entities) [array get [namespace current]::EntityPredef] - } - if {![info exists options(extentities)]} { - set options(extentities) [namespace current]::ExtEntities$ParseEventNum - } - if {![info exists options(parameterentities)]} { - set options(parameterentities) [namespace current]::ParamEntities$ParseEventNum - } - if {![info exists options(externalparameterentities)]} { - set options(externalparameterentities) [namespace current]::ExtParamEntities$ParseEventNum - } - if {![info exists options(elementdecls)]} { - set options(elementdecls) [namespace current]::ElementDecls$ParseEventNum - } - if {![info exists options(attlistdecls)]} { - set options(attlistdecls) [namespace current]::AttListDecls$ParseEventNum - } - if {![info exists options(notationdecls)]} { - set options(notationdecls) [namespace current]::NotationDecls$ParseEventNum - } - if {![info exists options(namespaces)]} { - set options(namespaces) [namespace current]::Namespaces$ParseEventNum - } - - # For backward-compatibility - catch {set options(-baseuri) $options(-baseurl)} - - # Choose an external entity resolver - - if {![string length $options(-externalentitycommand)]} { - if {$options(-validate)} { - set options(-externalentitycommand) [namespace code ResolveEntity] - } else { - set options(-externalentitycommand) [namespace code noop] - } - } - - upvar #0 $options(-statevariable) state - upvar #0 $options(entities) entities - - # Mats: - # The problem is that the state is not maintained when -final 0 ! - # I've switched back to an older version here. - - if {![info exists state(line)]} { - # Initialise the state variable - array set state { - mode normal - haveXMLDecl 0 - haveDocElement 0 - inDTD 0 - context {} - stack {} - line 0 - defaultNS {} - defaultNSURI {} - } - } - - foreach {tag close param text} $sgml { - - # Keep track of lines in the input - incr state(line) [regsub -all \n $param {} discard] - incr state(line) [regsub -all \n $text {} discard] - - # If the current mode is cdata or comment then we must undo what the - # regsub has done to reconstitute the data - - set empty {} - switch $state(mode) { - comment { - # This had "[string length $param] && " as a guard - - # can't remember why :-( - if {[regexp ([cl ^-]*)--\$ $tag discard comm1]} { - # end of comment (in tag) - set tag {} - set close {} - set state(mode) normal - DeProtect1 $options(-commentcommand) $state(commentdata)<$comm1 - unset state(commentdata) - } elseif {[regexp ([cl ^-]*)--\$ $param discard comm1]} { - # end of comment (in attributes) - DeProtect1 $options(-commentcommand) $state(commentdata)<$close$tag>$comm1 - unset state(commentdata) - set tag {} - set param {} - set close {} - set state(mode) normal - } elseif {[regexp ([cl ^-]*)-->(.*) $text discard comm1 text]} { - # end of comment (in text) - DeProtect1 $options(-commentcommand) $state(commentdata)<$close$tag$param>$comm1 - unset state(commentdata) - set tag {} - set param {} - set close {} - set state(mode) normal - } else { - # comment continues - append state(commentdata) <$close$tag$param>$text - continue - } - } - cdata { - if {[string length $param] && [regexp ([cl ^\]]*)\]\][cl $Wsp]*\$ $tag discard cdata1]} { - # end of CDATA (in tag) - PCDATA [array get options] $state(cdata)<[subst -nocommand -novariable $close$cdata1] - set text [subst -novariable -nocommand $text] - set tag {} - unset state(cdata) - set state(mode) normal - } elseif {[regexp ([cl ^\]]*)\]\][cl $Wsp]*\$ $param discard cdata1]} { - # end of CDATA (in attributes) - PCDATA [array get options] $state(cdata)<[subst -nocommand -novariable $close$tag$cdata1] - set text [subst -novariable -nocommand $text] - set tag {} - set param {} - unset state(cdata) - set state(mode) normal - } elseif {[regexp (.*)\]\][cl $Wsp]*>(.*) $text discard cdata1 text]} { - # end of CDATA (in text) - PCDATA [array get options] $state(cdata)<[subst -nocommand -novariable $close$tag$param>$cdata1] - set text [subst -novariable -nocommand $text] - set tag {} - set param {} - set close {} - unset state(cdata) - set state(mode) normal - } else { - # CDATA continues - append state(cdata) [subst -nocommand -novariable <$close$tag$param>$text] - continue - } - } - continue { - # We're skipping elements looking for the close tag - switch -glob -- [string length $tag],[regexp {^\?|!.*} $tag],$close { - 0,* { - continue - } - *,0, { - if {![string compare $tag $state(continue:tag)]} { - set empty [uplevel #0 $options(-emptyelement) [list $tag $param $empty]] - if {![string length $empty]} { - incr state(continue:level) - } - } - continue - } - *,0,/ { - if {![string compare $tag $state(continue:tag)]} { - incr state(continue:level) -1 - } - if {!$state(continue:level)} { - unset state(continue:tag) - unset state(continue:level) - set state(mode) {} - } - } - default { - continue - } - } - } - default { - # The trailing slash on empty elements can't be automatically separated out - # in the RE, so we must do it here. - regexp (.*)(/)[cl $Wsp]*$ $param discard param empty - } - } - - # default: normal mode - - # Bug: if the attribute list has a right angle bracket then the empty - # element marker will not be seen - - set empty [uplevel #0 $options(-emptyelement) [list $tag $param $empty]] - - switch -glob -- [string length $tag],[regexp {^\?|!.*} $tag],$close,$empty { - - 0,0,, { - # Ignore empty tag - dealt with non-normal mode above - } - *,0,, { - - # Start tag for an element. - - # Check if the internal DTD entity is in an attribute value - regsub -all &xml:intdtd\; $param \[$options(-internaldtd)\] param - - set code [catch {ParseEvent:ElementOpen $tag $param [array get options]} msg] - set state(haveDocElement) 1 - switch $code { - 0 {# OK} - 3 { - # break - return {} - } - 4 { - # continue - # Remember this tag and look for its close - set state(continue:tag) $tag - set state(continue:level) 1 - set state(mode) continue - continue - } - default { - return -code $code -errorinfo $::errorInfo $msg - } - } - - } - - *,0,/, { - - # End tag for an element. - - set code [catch {ParseEvent:ElementClose $tag [array get options]} msg] - switch $code { - 0 {# OK} - 3 { - # break - return {} - } - 4 { - # continue - # skip sibling nodes - set state(continue:tag) [lindex $state(stack) end] - set state(continue:level) 1 - set state(mode) continue - continue - } - default { - return -code $code -errorinfo $::errorInfo $msg - } - } - - } - - *,0,,/ { - - # Empty element - - # The trailing slash sneaks through into the param variable - regsub -all /[cl $::sgml::Wsp]*\$ $param {} param - - set code [catch {ParseEvent:ElementOpen $tag $param [array get options] -empty 1} msg] - set state(haveDocElement) 1 - switch $code { - 0 {# OK} - 3 { - # break - return {} - } - 4 { - # continue - # Pretty useless since it closes straightaway - } - default { - return -code $code -errorinfo $::errorInfo $msg - } - } - set code [catch {ParseEvent:ElementClose $tag [array get options] -empty 1} msg] - switch $code { - 0 {# OK} - 3 { - # break - return {} - } - 4 { - # continue - # skip sibling nodes - set state(continue:tag) [lindex $state(stack) end] - set state(continue:level) 1 - set state(mode) continue - continue - } - default { - return -code $code -errorinfo $::errorInfo $msg - } - } - - } - - *,1,* { - # Processing instructions or XML declaration - switch -glob -- $tag { - - {\?xml} { - # XML Declaration - if {$state(haveXMLDecl)} { - uplevel #0 $options(-errorcommand) [list illegalcharacter "unexpected characters \"<$tag\" around line $state(line)"] - } elseif {![regexp {\?$} $param]} { - uplevel #0 $options(-errorcommand) [list missingcharacters "XML Declaration missing characters \"?>\" around line $state(line)"] - } else { - - # We can do the parsing in one step with Tcl 8.1 RE's - # This has the benefit of performing better WF checking - - set adv_re [format {^[%s]*version[%s]*=[%s]*("|')(-+|[a-zA-Z0-9_.:]+)\1([%s]+encoding[%s]*=[%s]*("|')([A-Za-z][-A-Za-z0-9._]*)\4)?([%s]*standalone[%s]*=[%s]*("|')(yes|no)\7)?[%s]*\?$} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] - - if {[catch {regexp $adv_re $param discard delimiter version discard delimiter encoding discard delimiter standalone} matches]} { - # Otherwise we must fallback to 8.0. - # This won't detect certain well-formedness errors - - # Get the version number - if {[regexp [format {[%s]*version[%s]*=[%s]*"(-+|[a-zA-Z0-9_.:]+)"[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard version] || [regexp [format {[%s]*version[%s]*=[%s]*'(-+|[a-zA-Z0-9_.:]+)'[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard version]} { - if {[string compare $version "1.0"]} { - # Should we support future versions? - # At least 1.X? - uplevel #0 $options(-errorcommand) [list versionincompatibility "document XML version \"$version\" is incompatible with XML version 1.0"] - } - } else { - uplevel #0 $options(-errorcommand) [list missingversion "XML Declaration missing version information around line $state(line)"] - } - - # Get the encoding declaration - set encoding {} - regexp [format {[%s]*encoding[%s]*=[%s]*"([A-Za-z]([A-Za-z0-9._]|-)*)"[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard encoding - regexp [format {[%s]*encoding[%s]*=[%s]*'([A-Za-z]([A-Za-z0-9._]|-)*)'[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard encoding - - # Get the standalone declaration - set standalone {} - regexp [format {[%s]*standalone[%s]*=[%s]*"(yes|no)"[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard standalone - regexp [format {[%s]*standalone[%s]*=[%s]*'(yes|no)'[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard standalone - - # Invoke the callback - uplevel #0 $options(-xmldeclcommand) [list $version $encoding $standalone] - - } elseif {$matches == 0} { - uplevel #0 $options(-errorcommand) [list illformeddeclaration "XML Declaration not well-formed around line $state(line)"] - } else { - - # Invoke the callback - uplevel #0 $options(-xmldeclcommand) [list $version $encoding $standalone] - - } - - } - - } - - {\?*} { - # Processing instruction - set tag [string range $tag 1 end] - if {[regsub {\?$} $tag {} tag]} { - if {[string length [string trim $param]]} { - uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$param\" in processing instruction around line $state(line)"] - } - } elseif {![regexp ^$Name\$ $tag]} { - uplevel #0 $options(-errorcommand) [list illegalcharacter "illegal character in processing instruction target \"$tag\""] - } elseif {[regexp {[xX][mM][lL]} $tag]} { - uplevel #0 $options(-errorcommand) [list illegalcharacters "characters \"xml\" not permitted in processing instruction target \"$tag\""] - } elseif {![regsub {\?$} $param {} param]} { - uplevel #0 $options(-errorcommand) [list missingquestion "PI: expected '?' character around line $state(line)"] - } - set code [catch {uplevel #0 $options(-processinginstructioncommand) [list $tag [string trimleft $param]]} msg] - switch $code { - 0 {# OK} - 3 { - # break - return {} - } - 4 { - # continue - # skip sibling nodes - set state(continue:tag) [lindex $state(stack) end] - set state(continue:level) 1 - set state(mode) continue - continue - } - default { - return -code $code -errorinfo $::errorInfo $msg - } - } - } - - !DOCTYPE { - # External entity reference - # This should move into xml.tcl - # Parse the params supplied. Looking for Name, ExternalID and MarkupDecl - set matched [regexp ^[cl $Wsp]*($Name)[cl $Wsp]*(.*) $param x state(doc_name) param] - set state(doc_name) [Normalize $state(doc_name) $options(-normalize)] - set externalID {} - set pubidlit {} - set systemlit {} - set externalID {} - if {[regexp -nocase ^[cl $Wsp]*(SYSTEM|PUBLIC)(.*) $param x id param]} { - switch [string toupper $id] { - SYSTEM { - if {[regexp ^[cl $Wsp]+"([cl ^"]*)"(.*) $param x systemlit param] || [regexp ^[cl $Wsp]+'([cl ^']*)'(.*) $param x systemlit param]} { - set externalID [list SYSTEM $systemlit] ;# " - } else { - uplevel #0 $options(-errorcommand) {syntaxerror {syntax error: SYSTEM identifier not followed by literal}} - } - } - PUBLIC { - if {[regexp ^[cl $Wsp]+"([cl ^"]*)"(.*) $param x pubidlit param] || [regexp ^[cl $Wsp]+'([cl ^']*)'(.*) $param x pubidlit param]} { - if {[regexp ^[cl $Wsp]+"([cl ^"]*)"(.*) $param x systemlit param] || [regexp ^[cl $Wsp]+'([cl ^']*)'(.*) $param x systemlit param]} { - set externalID [list PUBLIC $pubidlit $systemlit] - } else { - uplevel #0 $options(-errorcommand) [list syntaxerror "syntax error: PUBLIC identifier not followed by system literal around line $state(line)"] - } - } else { - uplevel #0 $options(-errorcommand) [list syntaxerror "syntax error: PUBLIC identifier not followed by literal around line $state(line)"] - } - } - } - if {[regexp -nocase ^[cl $Wsp]+NDATA[cl $Wsp]+($Name)(.*) $param x notation param]} { - lappend externalID $notation - } - } - - set state(inDTD) 1 - - ParseEvent:DocTypeDecl [array get options] $state(doc_name) $pubidlit $systemlit $options(-internaldtd) - - set state(inDTD) 0 - - } - - !--* { - - # Start of a comment - # See if it ends in the same tag, otherwise change the - # parsing mode - - regexp {!--(.*)} $tag discard comm1 - if {[regexp ([cl ^-]*)--[cl $Wsp]*\$ $comm1 discard comm1_1]} { - # processed comment (end in tag) - uplevel #0 $options(-commentcommand) [list $comm1_1] - } elseif {[regexp ([cl ^-]*)--[cl $Wsp]*\$ $param discard comm2]} { - # processed comment (end in attributes) - uplevel #0 $options(-commentcommand) [list $comm1$comm2] - } elseif {[regexp ([cl ^-]*)-->(.*) $text discard comm2 text]} { - # processed comment (end in text) - uplevel #0 $options(-commentcommand) [list $comm1$param$empty>$comm2] - } else { - # start of comment - set state(mode) comment - set state(commentdata) "$comm1$param$empty>$text" - continue - } - } - - {!\[CDATA\[*} { - - regexp {!\[CDATA\[(.*)} $tag discard cdata1 - if {[regexp {(.*)]]$} $cdata1 discard cdata2]} { - # processed CDATA (end in tag) - PCDATA [array get options] [subst -novariable -nocommand $cdata2] - set text [subst -novariable -nocommand $text] - } elseif {[regexp {(.*)]]$} $param discard cdata2]} { - # processed CDATA (end in attribute) - # Backslashes in param are quoted at this stage - PCDATA [array get options] $cdata1[subst -novariable -nocommand $cdata2] - set text [subst -novariable -nocommand $text] - } elseif {[regexp {(.*)]]>(.*)} $text discard cdata2 text]} { - # processed CDATA (end in text) - # Backslashes in param and text are quoted at this stage - PCDATA [array get options] $cdata1[subst -novariable -nocommand $param]$empty>[subst -novariable -nocommand $cdata2] - set text [subst -novariable -nocommand $text] - } else { - # start CDATA - set state(cdata) "$cdata1$param>$text" - set state(mode) cdata - continue - } - - } - - !ELEMENT - - !ATTLIST - - !ENTITY - - !NOTATION { - uplevel #0 $options(-errorcommand) [list illegaldeclaration "[string range $tag 1 end] declaration not expected in document instance around line $state(line)"] - } - - default { - uplevel #0 $options(-errorcommand) [list unknowninstruction "unknown processing instruction \"<$tag>\" around line $state(line)"] - } - } - } - *,1,* - - *,0,/,/ { - # Syntax error - uplevel #0 $options(-errorcommand) [list syntaxerror "syntax error: closed/empty tag: tag $tag param $param empty $empty close $close around line $state(line)"] - } - } - - # Process character data - - if {$state(haveDocElement) && [llength $state(stack)]} { - - # Check if the internal DTD entity is in the text - regsub -all &xml:intdtd\; $text \[$options(-internaldtd)\] text - - # Look for entity references - if {([array size entities] || \ - [string length $options(-entityreferencecommand)]) && \ - $options(-defaultexpandinternalentities) && \ - [regexp {&[^;]+;} $text]} { - - # protect Tcl specials - # NB. braces and backslashes may already be protected - regsub -all {\\({|}|\\)} $text {\1} text - regsub -all {([][$\\{}])} $text {\\\1} text - - # Mark entity references - regsub -all {&([^;]+);} $text [format {%s; %s {\1} ; %s %s} \}\} [namespace code [list Entity [array get options] $options(-entityreferencecommand) [namespace code [list PCDATA [array get options]]] $options(entities)]] [namespace code [list DeProtect [namespace code [list PCDATA [array get options]]]]] \{\{] text - set text "uplevel #0 [namespace code [list DeProtect1 [namespace code [list PCDATA [array get options]]]]] {{$text}}" - eval $text - } else { - - # Restore protected special characters - regsub -all {\\([][{}\\])} $text {\1} text - PCDATA [array get options] $text - } - } elseif {[string length [string trim $text]]} { - uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$text\" in document prolog around line $state(line)"] - } - - } - - # If this is the end of the document, close all open containers - if {$options(-final) && [llength $state(stack)]} { - eval $options(-errorcommand) [list unclosedelement "element [lindex $state(stack) end] remains unclosed around line $state(line)"] - } - - return {} -} - -# sgml::DeProtect -- -# -# Invoke given command after removing protecting backslashes -# from given text. -# -# Arguments: -# cmd Command to invoke -# text Text to deprotect -# -# Results: -# Depends on command - -proc sgml::DeProtect1 {cmd text} { - if {[string compare {} $text]} { - regsub -all {\\([]$[{}\\])} $text {\1} text - uplevel #0 $cmd [list $text] - } -} -proc sgml::DeProtect {cmd text} { - set text [lindex $text 0] - if {[string compare {} $text]} { - regsub -all {\\([]$[{}\\])} $text {\1} text - uplevel #0 $cmd [list $text] - } -} - -# sgml::ParserDelete -- -# -# Free all memory associated with parser -# -# Arguments: -# var global state array -# -# Results: -# Variables unset - -proc sgml::ParserDelete var { - upvar #0 $var state - - if {![info exists state]} { - return -code error "unknown parser" - } - - catch {unset $state(entities)} - catch {unset $state(parameterentities)} - catch {unset $state(elementdecls)} - catch {unset $state(attlistdecls)} - catch {unset $state(notationdecls)} - catch {unset $state(namespaces)} - - unset state - - return {} -} - -# sgml::ParseEvent:ElementOpen -- -# -# Start of an element. -# -# Arguments: -# tag Element name -# attr Attribute list -# opts Options -# args further configuration options -# -# Options: -# -empty boolean -# indicates whether the element was an empty element -# -# Results: -# Modify state and invoke callback - -proc sgml::ParseEvent:ElementOpen {tag attr opts args} { - variable Name - variable Wsp - - array set options $opts - upvar #0 $options(-statevariable) state - array set cfg {-empty 0} - array set cfg $args - set handleEmpty 0 - - if {$options(-normalize)} { - set tag [string toupper $tag] - } - - # Update state - lappend state(stack) $tag - - # Parse attribute list into a key-value representation - if {[string compare $options(-parseattributelistcommand) {}]} { - if {[catch {uplevel #0 $options(-parseattributelistcommand) [list $opts $attr]} attr]} { - if {[string compare [lindex $attr 0] "unterminated attribute value"]} { - uplevel #0 $options(-errorcommand) [list unterminatedattribute "$attr around line $state(line)"] - set attr {} - } else { - - # It is most likely that a ">" character was in an attribute value. - # This manifests itself by ">" appearing in the element's text. - # In this case the callback should return a three element list; - # the message "unterminated attribute value", the attribute list it - # did manage to parse and the remainder of the attribute list. - - foreach {msg attlist brokenattr} $attr break - - upvar text elemText - if {[string first > $elemText] >= 0} { - - # Now piece the attribute list back together - regexp [cl $Wsp]*($Name)[cl $Wsp]*=[cl $Wsp]*("|')(.*) $brokenattr discard attname delimiter attvalue - regexp (.*)>([cl ^>]*)\$ $elemText discard remattlist elemText - regexp ([cl ^$delimiter]*)${delimiter}(.*) $remattlist discard remattvalue remattlist - - # Gotcha: watch out for empty element syntax - if {[string match */ [string trimright $remattlist]]} { - set remattlist [string range $remattlist 0 end-1] - set handleEmpty 1 - set cfg(-empty) 1 - } - - append attvalue >$remattvalue - lappend attlist $attname $attvalue - - # Complete parsing the attribute list - if {[catch {uplevel #0 $options(-parseattributelistcommand) [list $options(-statevariable) $remattlist]} attr]} { - uplevel #0 $options(-errorcommand) [list unterminatedattribute "$attr around line $state(line)"] - set attr {} - set attlist {} - } else { - eval lappend attlist $attr - } - - set attr $attlist - - } else { - uplevel #0 $options(-errorcommand) [list unterminatedattribute "$attr around line $state(line)"] - set attr {} - } - } - } - } - - set empty {} - if {$cfg(-empty) && $options(-reportempty)} { - set empty {-empty 1} - } - - # Check for namespace declarations - upvar #0 $options(namespaces) namespaces - set nsdecls {} - if {[llength $attr]} { - array set attrlist $attr - foreach {attrName attrValue} [array get attrlist xmlns*] { - unset attrlist($attrName) - set colon [set prefix {}] - if {[regexp {^xmlns(:(.+))?$} $attrName discard colon prefix]} { - switch -glob [string length $colon],[string length $prefix] { - 0,0 { - # default NS declaration - lappend state(defaultNSURI) $attrValue - lappend state(defaultNS) [llength $state(stack)] - lappend nsdecls $attrValue {} - } - 0,* { - # Huh? - } - *,0 { - # Error - uplevel #0 $state(-warningcommand) "no prefix specified for namespace URI \"$attrValue\" in element \"$tag\"" - } - default { - set namespaces($prefix,[llength $state(stack)]) $attrValue - lappend nsdecls $attrValue $prefix - } - } - } - } - if {[llength $nsdecls]} { - set nsdecls [list -namespacedecls $nsdecls] - } - set attr [array get attrlist] - } - - # Check whether this element has an expanded name - set ns {} - if {[regexp {([^:]+):(.*)$} $tag discard prefix tag]} { - set nsspec [lsort -dictionary -decreasing [array names namespaces $prefix,*]] - if {[llength $nsspec]} { - set nsuri $namespaces([lindex $nsspec 0]) - set ns [list -namespace $nsuri] - } else { - uplevel #0 $options(-errorcommand) [list namespaceundeclared "no namespace declared for prefix \"$prefix\" in element $tag"] - } - } elseif {[llength $state(defaultNSURI)]} { - set ns [list -namespace [lindex $state(defaultNSURI) end]] - } - - # Invoke callback - set code [catch {uplevel #0 $options(-elementstartcommand) [list $tag $attr] $empty $ns $nsdecls} msg] - - # Sometimes empty elements must be handled here (see above) - if {$code == 0 && $handleEmpty} { - ParseEvent:ElementClose $tag $opts -empty 1 - } - - return -code $code -errorinfo $::errorInfo $msg -} - -# sgml::ParseEvent:ElementClose -- -# -# End of an element. -# -# Arguments: -# tag Element name -# opts Options -# args further configuration options -# -# Options: -# -empty boolean -# indicates whether the element as an empty element -# -# Results: -# Modify state and invoke callback - -proc sgml::ParseEvent:ElementClose {tag opts args} { - array set options $opts - upvar #0 $options(-statevariable) state - array set cfg {-empty 0} - array set cfg $args - - # WF check - if {[string compare $tag [lindex $state(stack) end]]} { - uplevel #0 $options(-errorcommand) [list illegalendtag "end tag \"$tag\" does not match open element \"[lindex $state(stack) end]\" around line $state(line)"] - return - } - - # Check whether this element has an expanded name - upvar #0 $options(namespaces) namespaces - set ns {} - if {[regexp {([^:]+):(.*)$} $tag discard prefix tag]} { - set nsuri $namespaces([lindex [lsort -dictionary -decreasing [array names namespaces $prefix,*]] 0]) - set ns [list -namespace $nsuri] - } elseif {[llength $state(defaultNSURI)]} { - set ns [list -namespace [lindex $state(defaultNSURI) end]] - } - - # Pop namespace stacks, if any - if {[llength $state(defaultNS)]} { - if {[llength $state(stack)] == [lindex $state(defaultNS) end]} { - set state(defaultNS) [lreplace $state(defaultNS) end end] - } - } - foreach nsspec [array names namespaces *,[llength $state(stack)]] { - unset namespaces($nsspec) - } - - # Update state - set state(stack) [lreplace $state(stack) end end] - - set empty {} - if {$cfg(-empty) && $options(-reportempty)} { - set empty {-empty 1} - } - - # Invoke callback - # Mats: Shall be same as sgml::ParseEvent:ElementOpen to handle exceptions in callback. - set code [catch {uplevel #0 $options(-elementendcommand) [list $tag] $empty $ns} msg] - return -code $code -errorinfo $::errorInfo $msg -} - -# sgml::PCDATA -- -# -# Process PCDATA before passing to application -# -# Arguments: -# opts options -# pcdata Character data to be processed -# -# Results: -# Checks that characters are legal, -# checks -ignorewhitespace setting. - -proc sgml::PCDATA {opts pcdata} { - array set options $opts - - if {$options(-ignorewhitespace) && \ - ![string length [string trim $pcdata]]} { - return {} - } - - if {![regexp ^[cl $::sgml::Char]*\$ $pcdata]} { - upvar \#0 $options(-statevariable) state - uplevel \#0 $options(-errorcommand) [list illegalcharacters "illegal, non-Unicode characters found in text \"$pcdata\" around line $state(line)"] - } - - uplevel \#0 $options(-characterdatacommand) [list $pcdata] -} - -# sgml::Normalize -- -# -# Perform name normalization if required -# -# Arguments: -# name name to normalize -# req normalization required -# -# Results: -# Name returned as upper-case if normalization required - -proc sgml::Normalize {name req} { - if {$req} { - return [string toupper $name] - } else { - return $name - } -} - -# sgml::Entity -- -# -# Resolve XML entity references (syntax: &xxx;). -# -# Arguments: -# opts options -# entityrefcmd application callback for entity references -# pcdatacmd application callback for character data -# entities name of array containing entity definitions. -# ref entity reference (the "xxx" bit) -# -# Results: -# Returns substitution text for given entity. - -proc sgml::Entity {opts entityrefcmd pcdatacmd entities ref} { - array set options $opts - upvar #0 $options(-statevariable) state - - if {![string length $entities]} { - set entities [namespace current]::EntityPredef - } - - # SRB: Bug fix 2008-11-18 #812051: surround case labels in braces for compatibility with Freewrap - switch -glob -- $ref { - {%*} { - # Parameter entity - not recognised outside of a DTD - } - {#x*} { - # Character entity - hex - if {[catch {format %c [scan [string range $ref 2 end] %x tmp; set tmp]} char]} { - return -code error "malformed character entity \"$ref\"" - } - uplevel #0 $pcdatacmd [list $char] - - return {} - - } - {#*} { - # Character entity - decimal - if {[catch {format %c [scan [string range $ref 1 end] %d tmp; set tmp]} char]} { - return -code error "malformed character entity \"$ref\"" - } - uplevel #0 $pcdatacmd [list $char] - - return {} - - } - default { - # General entity - upvar #0 $entities map - if {[info exists map($ref)]} { - - if {![regexp {<|&} $map($ref)]} { - - # Simple text replacement - optimise - uplevel #0 $pcdatacmd [list $map($ref)] - - return {} - - } - - # Otherwise an additional round of parsing is required. - # This only applies to XML, since HTML doesn't have general entities - - # Must parse the replacement text for start & end tags, etc - # This text must be self-contained: balanced closing tags, and so on - - set tokenised [tokenise $map($ref) $::xml::tokExpr $::xml::substExpr] - set options(-final) 0 - eval parseEvent [list $tokenised] [array get options] - - return {} - - } elseif {[string compare $entityrefcmd "::sgml::noop"]} { - - set result [uplevel #0 $entityrefcmd [list $ref]] - - if {[string length $result]} { - uplevel #0 $pcdatacmd [list $result] - } - - return {} - - } else { - - # Reconstitute entity reference - - uplevel #0 $options(-errorcommand) [list illegalentity "undefined entity reference \"$ref\""] - - return {} - - } - } - } - - # If all else fails leave the entity reference untouched - uplevel #0 $pcdatacmd [list &$ref\;] - - return {} -} - -#################################### -# -# DTD parser for SGML (XML). -# -# This DTD actually only handles XML DTDs. Other language's -# DTD's, such as HTML, must be written in terms of a XML DTD. -# -#################################### - -# sgml::ParseEvent:DocTypeDecl -- -# -# Entry point for DTD parsing -# -# Arguments: -# opts configuration options -# docEl document element name -# pubId public identifier -# sysId system identifier (a URI) -# intSSet internal DTD subset - -proc sgml::ParseEvent:DocTypeDecl {opts docEl pubId sysId intSSet} { - array set options {} - array set options $opts - - set code [catch {uplevel #0 $options(-doctypecommand) [list $docEl $pubId $sysId $intSSet]} err] - switch $code { - 3 { - # break - return {} - } - 0 - - 4 { - # continue - } - default { - return -code $code $err - } - } - - # Otherwise we'll parse the DTD and report it piecemeal - - # The internal DTD subset is processed first (XML 2.8) - # During this stage, parameter entities are only allowed - # between markup declarations - - ParseDTD:Internal [array get options] $intSSet - - # The external DTD subset is processed last (XML 2.8) - # During this stage, parameter entities may occur anywhere - - # We must resolve the external identifier to obtain the - # DTD data. The application may supply its own resolver. - - if {[string length $pubId] || [string length $sysId]} { - uplevel #0 $options(-externalentitycommand) [list $options(-cmd) $options(-baseuri) $sysId $pubId] - } - - return {} -} - -# sgml::ParseDTD:Internal -- -# -# Parse the internal DTD subset. -# -# Parameter entities are only allowed between markup declarations. -# -# Arguments: -# opts configuration options -# dtd DTD data -# -# Results: -# Markup declarations parsed may cause callback invocation - -proc sgml::ParseDTD:Internal {opts dtd} { - variable MarkupDeclExpr - variable MarkupDeclSub - - array set options {} - array set options $opts - - upvar #0 $options(-statevariable) state - upvar #0 $options(parameterentities) PEnts - upvar #0 $options(externalparameterentities) ExtPEnts - - # Bug 583947: remove comments before further processing - regsub -all {<!--.*?-->} $dtd {} dtd - - # Tokenize the DTD - - # Protect Tcl special characters - regsub -all {([{}\\])} $dtd {\\\1} dtd - - regsub -all $MarkupDeclExpr $dtd $MarkupDeclSub dtd - - # Entities may have angle brackets in their replacement - # text, which breaks the RE processing. So, we must - # use a similar technique to processing doc instances - # to rebuild the declarations from the pieces - - set mode {} ;# normal - set delimiter {} - set name {} - set param {} - - set state(inInternalDTD) 1 - - # Process the tokens - foreach {decl value text} [lrange "{} {} \{$dtd\}" 3 end] { - - # Keep track of line numbers - incr state(line) [regsub -all \n $text {} discard] - - ParseDTD:EntityMode [array get options] mode replText decl value text $delimiter $name $param - - ParseDTD:ProcessMarkupDecl [array get options] decl value delimiter name mode replText text param - - # There may be parameter entity references between markup decls - - if {[regexp {%.*;} $text]} { - - # Protect Tcl special characters - regsub -all {([{}\\])} $text {\\\1} text - - regsub -all %($::sgml::Name)\; $text "\} {\\1} \{" text - - set PElist "\{$text\}" - set PElist [lreplace $PElist end end] - foreach {text entref} $PElist { - if {[string length [string trim $text]]} { - uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text in internal DTD subset around line $state(line)"] - } - - # Expand parameter entity and recursively parse - # BUG: no checks yet for recursive entity references - - if {[info exists PEnts($entref)]} { - set externalParser [$options(-cmd) entityparser] - $externalParser parse $PEnts($entref) -dtdsubset internal - } elseif {[info exists ExtPEnts($entref)]} { - set externalParser [$options(-cmd) entityparser] - $externalParser parse $ExtPEnts($entref) -dtdsubset external - #$externalParser free - } else { - uplevel #0 $options(-errorcommand) [list illegalreference "reference to undeclared parameter entity \"$entref\""] - } - } - - } - - } - - return {} -} - -# sgml::ParseDTD:EntityMode -- -# -# Perform special processing for various parser modes -# -# Arguments: -# opts configuration options -# modeVar pass-by-reference mode variable -# replTextVar pass-by-ref -# declVar pass-by-ref -# valueVar pass-by-ref -# textVar pass-by-ref -# delimiter delimiter currently in force -# name -# param -# -# Results: -# Depends on current mode - -proc sgml::ParseDTD:EntityMode {opts modeVar replTextVar declVar valueVar textVar delimiter name param} { - upvar 1 $modeVar mode - upvar 1 $replTextVar replText - upvar 1 $declVar decl - upvar 1 $valueVar value - upvar 1 $textVar text - array set options $opts - - switch $mode { - {} { - # Pass through to normal processing section - } - entity { - # Look for closing delimiter - if {[regexp ([cl ^$delimiter]*)${delimiter}(.*) $decl discard val1 remainder]} { - append replText <$val1 - DTD:ENTITY [array get options] $name [string trim $param] $delimiter$replText$delimiter - set decl / - set text $remainder\ $value>$text - set value {} - set mode {} - } elseif {[regexp ([cl ^$delimiter]*)${delimiter}(.*) $value discard val2 remainder]} { - append replText <$decl\ $val2 - DTD:ENTITY [array get options] $name [string trim $param] $delimiter$replText$delimiter - set decl / - set text $remainder>$text - set value {} - set mode {} - } elseif {[regexp ([cl ^$delimiter]*)${delimiter}(.*) $text discard val3 remainder]} { - append replText <$decl\ $value>$val3 - DTD:ENTITY [array get options] $name [string trim $param] $delimiter$replText$delimiter - set decl / - set text $remainder - set value {} - set mode {} - } else { - - # Remain in entity mode - append replText <$decl\ $value>$text - return -code continue - - } - } - - ignore { - upvar #0 $options(-statevariable) state - - if {[regexp {]](.*)$} $decl discard remainder]} { - set state(condSections) [lreplace $state(condSections) end end] - set decl $remainder - set mode {} - } elseif {[regexp {]](.*)$} $value discard remainder]} { - set state(condSections) [lreplace $state(condSections) end end] - regexp <[cl $::sgml::Wsp]*($::sgml::Name)(.*) $remainder discard decl value - set mode {} - } elseif {[regexp {]]>(.*)$} $text discard remainder]} { - set state(condSections) [lreplace $state(condSections) end end] - set decl / - set value {} - set text $remainder - #regexp <[cl $::sgml::Wsp]*($::sgml::Name)([cl ^>]*)>(.*) $remainder discard decl value text - set mode {} - } else { - set decl / - } - - } - - comment { - # Look for closing comment delimiter - - upvar #0 $options(-statevariable) state - - if {[regexp (.*?)--(.*)\$ $decl discard data1 remainder]} { - } elseif {[regexp (.*?)--(.*)\$ $value discard data1 remainder]} { - } elseif {[regexp (.*?)--(.*)\$ $text discard data1 remainder]} { - } else { - # comment continues - append state(commentdata) <$decl\ $value>$text - set decl / - set value {} - set text {} - } - } - - } - - return {} -} - -# sgml::ParseDTD:ProcessMarkupDecl -- -# -# Process a single markup declaration -# -# Arguments: -# opts configuration options -# declVar pass-by-ref -# valueVar pass-by-ref -# delimiterVar pass-by-ref for current delimiter in force -# nameVar pass-by-ref -# modeVar pass-by-ref for current parser mode -# replTextVar pass-by-ref -# textVar pass-by-ref -# paramVar pass-by-ref -# -# Results: -# Depends on markup declaration. May change parser mode - -proc sgml::ParseDTD:ProcessMarkupDecl {opts declVar valueVar delimiterVar nameVar modeVar replTextVar textVar paramVar} { - upvar 1 $modeVar mode - upvar 1 $replTextVar replText - upvar 1 $textVar text - upvar 1 $declVar decl - upvar 1 $valueVar value - upvar 1 $nameVar name - upvar 1 $delimiterVar delimiter - upvar 1 $paramVar param - - variable declExpr - variable ExternalEntityExpr - - array set options $opts - upvar #0 $options(-statevariable) state - - switch -glob -- $decl { - - / { - # continuation from entity processing - } - - !ELEMENT { - # Element declaration - if {[regexp $declExpr $value discard tag cmodel]} { - DTD:ELEMENT [array get options] $tag $cmodel - } else { - uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed element declaration around line $state(line)"] - } - } - - !ATTLIST { - # Attribute list declaration - variable declExpr - if {[regexp $declExpr $value discard tag attdefns]} { - if {[catch {DTD:ATTLIST [array get options] $tag $attdefns} err]} { - #puts stderr "Stack trace: $::errorInfo\n***\n" - # Atttribute parsing has bugs at the moment - #return -code error "$err around line $state(line)" - return {} - } - } else { - uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed attribute list declaration around line $state(line)"] - } - } - - !ENTITY { - # Entity declaration - variable EntityExpr - - if {[regexp $EntityExpr $value discard param name value]} { - - # Entity replacement text may have a '>' character. - # In this case, the real delimiter will be in the following - # text. This is complicated by the possibility of there - # being several '<','>' pairs in the replacement text. - # At this point, we are searching for the matching quote delimiter. - - if {[regexp $ExternalEntityExpr $value]} { - DTD:ENTITY [array get options] $name [string trim $param] $value - } elseif {[regexp ("|')(.*?)\\1(.*) $value discard delimiter replText value]} { - - if {[string length [string trim $value]]} { - uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed entity declaration around line $state(line)"] - } else { - DTD:ENTITY [array get options] $name [string trim $param] $delimiter$replText$delimiter - } - } elseif {[regexp ("|')(.*) $value discard delimiter replText]} { - append replText >$text - set text {} - set mode entity - } else { - uplevel #0 $options(-errorcommand) [list illegaldeclaration "no delimiter for entity declaration around line $state(line)"] - } - - } else { - uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed entity declaration around line $state(line)"] - } - } - - !NOTATION { - # Notation declaration - if {[regexp $declExpr param discard tag notation]} { - DTD:ENTITY [array get options] $tag $notation - } else { - uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed entity declaration around line $state(line)"] - } - } - - !--* { - # Start of a comment - - if {[regexp !--(.*?)--\$ $decl discard data]} { - if {[string length [string trim $value]]} { - uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$value\""] - } - uplevel #0 $options(-commentcommand) [list $data] - set decl / - set value {} - } elseif {[regexp -- ^(.*?)--\$ $value discard data2]} { - regexp !--(.*)\$ $decl discard data1 - uplevel #0 $options(-commentcommand) [list $data1\ $data2] - set decl / - set value {} - } elseif {[regexp (.*?)-->(.*)\$ $text discard data3 remainder]} { - regexp !--(.*)\$ $decl discard data1 - uplevel #0 $options(-commentcommand) [list $data1\ $value>$data3] - set decl / - set value {} - set text $remainder - } else { - regexp !--(.*)\$ $decl discard data1 - set state(commentdata) $data1\ $value>$text - set decl / - set value {} - set text {} - set mode comment - } - } - - !*INCLUDE* - - !*IGNORE* { - if {$state(inInternalDTD)} { - uplevel #0 $options(-errorcommand) [list illegalsection "conditional section not permitted in internal DTD subset around line $state(line)"] - } - - if {[regexp {^!\[INCLUDE\[(.*)} $decl discard remainder]} { - # Push conditional section stack, popped by ]]> sequence - - if {[regexp {(.*?)]]$} $remainder discard r2]} { - # section closed immediately - if {[string length [string trim $r2]]} { - uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r2\" in conditional section"] - } - } elseif {[regexp {(.*?)]](.*)} $value discard r2 r3]} { - # section closed immediately - if {[string length [string trim $r2]]} { - uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r2\" in conditional section"] - } - if {[string length [string trim $r3]]} { - uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r3\" in conditional section"] - } - } else { - - lappend state(condSections) INCLUDE - - set parser [$options(-cmd) entityparser] - $parser parse $remainder\ $value> -dtdsubset external - #$parser free - - if {[regexp {(.*?)]]>(.*)} $text discard t1 t2]} { - if {[string length [string trim $t1]]} { - uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$t1\""] - } - if {![llength $state(condSections)]} { - uplevel #0 $options(-errorcommand) [list illegalsection "extraneous conditional section close"] - } - set state(condSections) [lreplace $state(condSections) end end] - set text $t2 - } - - } - } elseif {[regexp {^!\[IGNORE\[(.*)} $decl discard remainder]} { - # Set ignore mode. Still need a stack - set mode ignore - - if {[regexp {(.*?)]]$} $remainder discard r2]} { - # section closed immediately - if {[string length [string trim $r2]]} { - uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r2\" in conditional section"] - } - } elseif {[regexp {(.*?)]](.*)} $value discard r2 r3]} { - # section closed immediately - if {[string length [string trim $r2]]} { - uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r2\" in conditional section"] - } - if {[string length [string trim $r3]]} { - uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r3\" in conditional section"] - } - } else { - - lappend state(condSections) IGNORE - - if {[regexp {(.*?)]]>(.*)} $text discard t1 t2]} { - if {[string length [string trim $t1]]} { - uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$t1\""] - } - if {![llength $state(condSections)]} { - uplevel #0 $options(-errorcommand) [list illegalsection "extraneous conditional section close"] - } - set state(condSections) [lreplace $state(condSections) end end] - set text $t2 - } - - } - } else { - uplevel #0 $options(-errorcommand) [list illegaldeclaration "illegal markup declaration \"$decl\" around line $state(line)"] - } - - } - - default { - if {[regexp {^\?(.*)} $decl discard target]} { - # Processing instruction - } else { - uplevel #0 $options(-errorcommand) [list illegaldeclaration "illegal markup declaration \"$decl\""] - } - } - } - - return {} -} - -# sgml::ParseDTD:External -- -# -# Parse the external DTD subset. -# -# Parameter entities are allowed anywhere. -# -# Arguments: -# opts configuration options -# dtd DTD data -# -# Results: -# Markup declarations parsed may cause callback invocation - -proc sgml::ParseDTD:External {opts dtd} { - variable MarkupDeclExpr - variable MarkupDeclSub - variable declExpr - - array set options $opts - upvar #0 $options(parameterentities) PEnts - upvar #0 $options(externalparameterentities) ExtPEnts - upvar #0 $options(-statevariable) state - - # As with the internal DTD subset, watch out for - # entities with angle brackets - set mode {} ;# normal - set delimiter {} - set name {} - set param {} - - set oldState 0 - catch {set oldState $state(inInternalDTD)} - set state(inInternalDTD) 0 - - # Initialise conditional section stack - if {![info exists state(condSections)]} { - set state(condSections) {} - } - set startCondSectionDepth [llength $state(condSections)] - - while {[string length $dtd]} { - set progress 0 - set PEref {} - if {![string compare $mode "ignore"]} { - set progress 1 - if {[regexp {]]>(.*)} $dtd discard dtd]} { - set remainder {} - set mode {} ;# normal - set state(condSections) [lreplace $state(condSections) end end] - continue - } else { - uplevel #0 $options(-errorcommand) [list missingdelimiter "IGNORE conditional section closing delimiter not found"] - } - } elseif {[regexp ^(.*?)%($::sgml::Name)\;(.*)\$ $dtd discard data PEref remainder]} { - set progress 1 - } else { - set data $dtd - set dtd {} - set remainder {} - } - - # Tokenize the DTD (so far) - - # Protect Tcl special characters - regsub -all {([{}\\])} $data {\\\1} dataP - - set n [regsub -all $MarkupDeclExpr $dataP $MarkupDeclSub dataP] - - if {$n} { - set progress 1 - # All but the last markup declaration should have no text - set dataP [lrange "{} {} \{$dataP\}" 3 end] - if {[llength $dataP] > 3} { - foreach {decl value text} [lrange $dataP 0 [expr [llength $dataP] - 4]] { - ParseDTD:EntityMode [array get options] mode replText decl value text $delimiter $name $param - ParseDTD:ProcessMarkupDecl [array get options] decl value delimiter name mode repltextVar text param - - if {[string length [string trim $text]]} { - # check for conditional section close - if {[regexp {]]>(.*)$} $text discard text]} { - if {[string length [string trim $text]]} { - uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$text\""] - } - if {![llength $state(condSections)]} { - uplevel #0 $options(-errorcommand) [list illegalsection "extraneous conditional section close"] - } - set state(condSections) [lreplace $state(condSections) end end] - if {![string compare $mode "ignore"]} { - set mode {} ;# normal - } - } else { - uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$text\""] - } - } - } - } - # Do the last declaration - foreach {decl value text} [lrange $dataP [expr [llength $dataP] - 3] end] { - ParseDTD:EntityMode [array get options] mode replText decl value text $delimiter $name $param - ParseDTD:ProcessMarkupDecl [array get options] decl value delimiter name mode repltextVar text param - } - } - - # Now expand the PE reference, if any - switch -glob $mode,[string length $PEref],$n { - ignore,0,* { - set dtd $text - } - ignore,*,* { - set dtd $text$remainder - } - *,0,0 { - set dtd $data - } - *,0,* { - set dtd $text - } - *,*,0 { - if {[catch {append data $PEnts($PEref)}]} { - if {[info exists ExtPEnts($PEref)]} { - set externalParser [$options(-cmd) entityparser] - $externalParser parse $ExtPEnts($PEref) -dtdsubset external - #$externalParser free - } else { - uplevel #0 $options(-errorcommand) [list entityundeclared "parameter entity \"$PEref\" not declared"] - } - } - set dtd $data$remainder - } - default { - if {[catch {append text $PEnts($PEref)}]} { - if {[info exists ExtPEnts($PEref)]} { - set externalParser [$options(-cmd) entityparser] - $externalParser parse $ExtPEnts($PEref) -dtdsubset external - #$externalParser free - } else { - uplevel #0 $options(-errorcommand) [list entityundeclared "parameter entity \"$PEref\" not declared"] - } - } - set dtd $text$remainder - } - } - - # Check whether a conditional section has been terminated - if {[regexp {^(.*?)]]>(.*)$} $dtd discard t1 t2]} { - if {![regexp <.*> $t1]} { - if {[string length [string trim $t1]]} { - uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$t1\""] - } - if {![llength $state(condSections)]} { - uplevel #0 $options(-errorcommand) [list illegalsection "extraneous conditional section close"] - } - set state(condSections) [lreplace $state(condSections) end end] - if {![string compare $mode "ignore"]} { - set mode {} ;# normal - } - set dtd $t2 - set progress 1 - } - } - - if {!$progress} { - # No parameter entity references were found and - # the text does not contain a well-formed markup declaration - # Avoid going into an infinite loop - upvar #0 $options(-errorcommand) [list syntaxerror "external entity does not contain well-formed markup declaration"] - break - } - } - - set state(inInternalDTD) $oldState - - # Check that conditional sections have been closed properly - if {[llength $state(condSections)] > $startCondSectionDepth} { - uplevel #0 $options(-errorcommand) [list syntaxerror "[lindex $state(condSections) end] conditional section not closed"] - } - if {[llength $state(condSections)] < $startCondSectionDepth} { - uplevel #0 $options(-errorcommand) [list syntaxerror "too many conditional section closures"] - } - - return {} -} - -# Procedures for handling the various declarative elements in a DTD. -# New elements may be added by creating a procedure of the form -# parse:DTD:_element_ - -# For each of these procedures, the various regular expressions they use -# are created outside of the proc to avoid overhead at runtime - -# sgml::DTD:ELEMENT -- -# -# <!ELEMENT ...> defines an element. -# -# The content model for the element is stored in the contentmodel array, -# indexed by the element name. The content model is parsed into the -# following list form: -# -# {} Content model is EMPTY. -# Indicated by an empty list. -# * Content model is ANY. -# Indicated by an asterix. -# {ELEMENT ...} -# Content model is element-only. -# {MIXED {element1 element2 ...}} -# Content model is mixed (PCDATA and elements). -# The second element of the list contains the -# elements that may occur. #PCDATA is assumed -# (ie. the list is normalised). -# -# Arguments: -# opts configuration options -# name element GI -# modspec unparsed content model specification - -proc sgml::DTD:ELEMENT {opts name modspec} { - variable Wsp - array set options $opts - - upvar #0 $options(elementdecls) elements - - if {$options(-validate) && [info exists elements($name)]} { - eval $options(-errorcommand) [list elementdeclared "element \"$name\" already declared"] - } else { - switch -- $modspec { - EMPTY { - set elements($name) {} - uplevel #0 $options(-elementdeclcommand) $name {{}} - } - ANY { - set elements($name) * - uplevel #0 $options(-elementdeclcommand) $name * - } - default { - # Don't parse the content model for now, - # just pass the model to the application - if {0 && [regexp [format {^\([%s]*#PCDATA[%s]*(\|([^)]+))?[%s]*\)*[%s]*$} $Wsp $Wsp $Wsp $Wsp] discard discard mtoks]} { - set cm($name) [list MIXED [split $mtoks |]] - } elseif {0} { - if {[catch {CModelParse $state(state) $value} result]} { - eval $options(-errorcommand) [list element? $result] - } else { - set cm($id) [list ELEMENT $result] - } - } else { - set elements($name) $modspec - uplevel #0 $options(-elementdeclcommand) $name [list $modspec] - } - } - } - } -} - -# sgml::CModelParse -- -# -# Parse an element content model (non-mixed). -# A syntax tree is constructed. -# A transition table is built next. -# -# This is going to need alot of work! -# -# Arguments: -# state state array variable -# value the content model data -# -# Results: -# A Tcl list representing the content model. - -proc sgml::CModelParse {state value} { - upvar #0 $state var - - # First build syntax tree - set syntaxTree [CModelMakeSyntaxTree $state $value] - - # Build transition table - set transitionTable [CModelMakeTransitionTable $state $syntaxTree] - - return [list $syntaxTree $transitionTable] -} - -# sgml::CModelMakeSyntaxTree -- -# -# Construct a syntax tree for the regular expression. -# -# Syntax tree is represented as a Tcl list: -# rep {:choice|:seq {{rep list1} {rep list2} ...}} -# where: rep is repetition character, *, + or ?. {} for no repetition -# listN is nested expression or Name -# -# Arguments: -# spec Element specification -# -# Results: -# Syntax tree for element spec as nested Tcl list. -# -# Examples: -# (memo) -# {} {:seq {{} memo}} -# (front, body, back?) -# {} {:seq {{} front} {{} body} {? back}} -# (head, (p | list | note)*, div2*) -# {} {:seq {{} head} {* {:choice {{} p} {{} list} {{} note}}} {* div2}} -# (p | a | ul)+ -# + {:choice {{} p} {{} a} {{} ul}} - -proc sgml::CModelMakeSyntaxTree {state spec} { - upvar #0 $state var - variable Wsp - variable name - - # Translate the spec into a Tcl list. - - # None of the Tcl special characters are allowed in a content model spec. - if {[regexp {\$|\[|\]|\{|\}} $spec]} { - return -code error "illegal characters in specification" - } - - regsub -all [format {(%s)[%s]*(\?|\*|\+)?[%s]*(,|\|)?} $name $Wsp $Wsp] $spec [format {%sCModelSTname %s {\1} {\2} {\3}} \n $state] spec - regsub -all {\(} $spec "\nCModelSTopenParen $state " spec - regsub -all [format {\)[%s]*(\?|\*|\+)?[%s]*(,|\|)?} $Wsp $Wsp] $spec [format {%sCModelSTcloseParen %s {\1} {\2}} \n $state] spec - - array set var {stack {} state start} - eval $spec - - # Peel off the outer seq, its redundant - return [lindex [lindex $var(stack) 1] 0] -} - -# sgml::CModelSTname -- -# -# Processes a name in a content model spec. -# -# Arguments: -# state state array variable -# name name specified -# rep repetition operator -# cs choice or sequence delimiter -# -# Results: -# See CModelSTcp. - -proc sgml::CModelSTname {state name rep cs args} { - if {[llength $args]} { - return -code error "syntax error in specification: \"$args\"" - } - - CModelSTcp $state $name $rep $cs -} - -# sgml::CModelSTcp -- -# -# Process a content particle. -# -# Arguments: -# state state array variable -# name name specified -# rep repetition operator -# cs choice or sequence delimiter -# -# Results: -# The content particle is added to the current group. - -proc sgml::CModelSTcp {state cp rep cs} { - upvar #0 $state var - - switch -glob -- [lindex $var(state) end]=$cs { - start= { - set var(state) [lreplace $var(state) end end end] - # Add (dummy) grouping, either choice or sequence will do - CModelSTcsSet $state , - CModelSTcpAdd $state $cp $rep - } - :choice= - - :seq= { - set var(state) [lreplace $var(state) end end end] - CModelSTcpAdd $state $cp $rep - } - start=| - - start=, { - set var(state) [lreplace $var(state) end end [expr {$cs == "," ? ":seq" : ":choice"}]] - CModelSTcsSet $state $cs - CModelSTcpAdd $state $cp $rep - } - :choice=| - - :seq=, { - CModelSTcpAdd $state $cp $rep - } - :choice=, - - :seq=| { - return -code error "syntax error in specification: incorrect delimiter after \"$cp\", should be \"[expr {$cs == "," ? "|" : ","}]\"" - } - end=* { - return -code error "syntax error in specification: no delimiter before \"$cp\"" - } - default { - return -code error "syntax error" - } - } - -} - -# sgml::CModelSTcsSet -- -# -# Start a choice or sequence on the stack. -# -# Arguments: -# state state array -# cs choice oir sequence -# -# Results: -# state is modified: end element of state is appended. - -proc sgml::CModelSTcsSet {state cs} { - upvar #0 $state var - - set cs [expr {$cs == "," ? ":seq" : ":choice"}] - - if {[llength $var(stack)]} { - set var(stack) [lreplace $var(stack) end end $cs] - } else { - set var(stack) [list $cs {}] - } -} - -# sgml::CModelSTcpAdd -- -# -# Append a content particle to the top of the stack. -# -# Arguments: -# state state array -# cp content particle -# rep repetition -# -# Results: -# state is modified: end element of state is appended. - -proc sgml::CModelSTcpAdd {state cp rep} { - upvar #0 $state var - - if {[llength $var(stack)]} { - set top [lindex $var(stack) end] - lappend top [list $rep $cp] - set var(stack) [lreplace $var(stack) end end $top] - } else { - set var(stack) [list $rep $cp] - } -} - -# sgml::CModelSTopenParen -- -# -# Processes a '(' in a content model spec. -# -# Arguments: -# state state array -# -# Results: -# Pushes stack in state array. - -proc sgml::CModelSTopenParen {state args} { - upvar #0 $state var - - if {[llength $args]} { - return -code error "syntax error in specification: \"$args\"" - } - - lappend var(state) start - lappend var(stack) [list {} {}] -} - -# sgml::CModelSTcloseParen -- -# -# Processes a ')' in a content model spec. -# -# Arguments: -# state state array -# rep repetition -# cs choice or sequence delimiter -# -# Results: -# Stack is popped, and former top of stack is appended to previous element. - -proc sgml::CModelSTcloseParen {state rep cs args} { - upvar #0 $state var - - if {[llength $args]} { - return -code error "syntax error in specification: \"$args\"" - } - - set cp [lindex $var(stack) end] - set var(stack) [lreplace $var(stack) end end] - set var(state) [lreplace $var(state) end end] - CModelSTcp $state $cp $rep $cs -} - -# sgml::CModelMakeTransitionTable -- -# -# Given a content model's syntax tree, constructs -# the transition table for the regular expression. -# -# See "Compilers, Principles, Techniques, and Tools", -# Aho, Sethi and Ullman. Section 3.9, algorithm 3.5. -# -# Arguments: -# state state array variable -# st syntax tree -# -# Results: -# The transition table is returned, as a key/value Tcl list. - -proc sgml::CModelMakeTransitionTable {state st} { - upvar #0 $state var - - # Construct nullable, firstpos and lastpos functions - array set var {number 0} - foreach {nullable firstpos lastpos} [ \ - TraverseDepth1st $state $st { - # Evaluated for leaf nodes - # Compute nullable(n) - # Compute firstpos(n) - # Compute lastpos(n) - set nullable [nullable leaf $rep $name] - set firstpos [list {} $var(number)] - set lastpos [list {} $var(number)] - set var(pos:$var(number)) $name - } { - # Evaluated for nonterminal nodes - # Compute nullable, firstpos, lastpos - set firstpos [firstpos $cs $firstpos $nullable] - set lastpos [lastpos $cs $lastpos $nullable] - set nullable [nullable nonterm $rep $cs $nullable] - } \ - ] break - - set accepting [incr var(number)] - set var(pos:$accepting) # - - # var(pos:N) maps from position to symbol. - # Construct reverse map for convenience. - # NB. A symbol may appear in more than one position. - # var is about to be reset, so use different arrays. - - foreach {pos symbol} [array get var pos:*] { - set pos [lindex [split $pos :] 1] - set pos2symbol($pos) $symbol - lappend sym2pos($symbol) $pos - } - - # Construct the followpos functions - catch {unset var} - followpos $state $st $firstpos $lastpos - - # Construct transition table - # Dstates is [union $marked $unmarked] - set unmarked [list [lindex $firstpos 1]] - while {[llength $unmarked]} { - set T [lindex $unmarked 0] - lappend marked $T - set unmarked [lrange $unmarked 1 end] - - # Find which input symbols occur in T - set symbols {} - foreach pos $T { - if {$pos != $accepting && [lsearch $symbols $pos2symbol($pos)] < 0} { - lappend symbols $pos2symbol($pos) - } - } - foreach a $symbols { - set U {} - foreach pos $sym2pos($a) { - if {[lsearch $T $pos] >= 0} { - # add followpos($pos) - if {$var($pos) == {}} { - lappend U $accepting - } else { - eval lappend U $var($pos) - } - } - } - set U [makeSet $U] - if {[llength $U] && [lsearch $marked $U] < 0 && [lsearch $unmarked $U] < 0} { - lappend unmarked $U - } - set Dtran($T,$a) $U - } - - } - - return [list [array get Dtran] [array get sym2pos] $accepting] -} - -# sgml::followpos -- -# -# Compute the followpos function, using the already computed -# firstpos and lastpos. -# -# Arguments: -# state array variable to store followpos functions -# st syntax tree -# firstpos firstpos functions for the syntax tree -# lastpos lastpos functions -# -# Results: -# followpos functions for each leaf node, in name/value format - -proc sgml::followpos {state st firstpos lastpos} { - upvar #0 $state var - - switch -- [lindex [lindex $st 1] 0] { - :seq { - for {set i 1} {$i < [llength [lindex $st 1]]} {incr i} { - followpos $state [lindex [lindex $st 1] $i] \ - [lindex [lindex $firstpos 0] [expr $i - 1]] \ - [lindex [lindex $lastpos 0] [expr $i - 1]] - foreach pos [lindex [lindex [lindex $lastpos 0] [expr $i - 1]] 1] { - eval lappend var($pos) [lindex [lindex [lindex $firstpos 0] $i] 1] - set var($pos) [makeSet $var($pos)] - } - } - } - :choice { - for {set i 1} {$i < [llength [lindex $st 1]]} {incr i} { - followpos $state [lindex [lindex $st 1] $i] \ - [lindex [lindex $firstpos 0] [expr $i - 1]] \ - [lindex [lindex $lastpos 0] [expr $i - 1]] - } - } - default { - # No action at leaf nodes - } - } - - switch -- [lindex $st 0] { - ? { - # We having nothing to do here ! Doing the same as - # for * effectively converts this qualifier into the other. - } - * { - foreach pos [lindex $lastpos 1] { - eval lappend var($pos) [lindex $firstpos 1] - set var($pos) [makeSet $var($pos)] - } - } - } - -} - -# sgml::TraverseDepth1st -- -# -# Perform depth-first traversal of a tree. -# A new tree is constructed, with each node computed by f. -# -# Arguments: -# state state array variable -# t The tree to traverse, a Tcl list -# leaf Evaluated at a leaf node -# nonTerm Evaluated at a nonterminal node -# -# Results: -# A new tree is returned. - -proc sgml::TraverseDepth1st {state t leaf nonTerm} { - upvar #0 $state var - - set nullable {} - set firstpos {} - set lastpos {} - - switch -- [lindex [lindex $t 1] 0] { - :seq - - :choice { - set rep [lindex $t 0] - set cs [lindex [lindex $t 1] 0] - - foreach child [lrange [lindex $t 1] 1 end] { - foreach {childNullable childFirstpos childLastpos} \ - [TraverseDepth1st $state $child $leaf $nonTerm] break - lappend nullable $childNullable - lappend firstpos $childFirstpos - lappend lastpos $childLastpos - } - - eval $nonTerm - } - default { - incr var(number) - set rep [lindex [lindex $t 0] 0] - set name [lindex [lindex $t 1] 0] - eval $leaf - } - } - - return [list $nullable $firstpos $lastpos] -} - -# sgml::firstpos -- -# -# Computes the firstpos function for a nonterminal node. -# -# Arguments: -# cs node type, choice or sequence -# firstpos firstpos functions for the subtree -# nullable nullable functions for the subtree -# -# Results: -# firstpos function for this node is returned. - -proc sgml::firstpos {cs firstpos nullable} { - switch -- $cs { - :seq { - set result [lindex [lindex $firstpos 0] 1] - for {set i 0} {$i < [llength $nullable]} {incr i} { - if {[lindex [lindex $nullable $i] 1]} { - eval lappend result [lindex [lindex $firstpos [expr $i + 1]] 1] - } else { - break - } - } - } - :choice { - foreach child $firstpos { - eval lappend result $child - } - } - } - - return [list $firstpos [makeSet $result]] -} - -# sgml::lastpos -- -# -# Computes the lastpos function for a nonterminal node. -# Same as firstpos, only logic is reversed -# -# Arguments: -# cs node type, choice or sequence -# lastpos lastpos functions for the subtree -# nullable nullable functions forthe subtree -# -# Results: -# lastpos function for this node is returned. - -proc sgml::lastpos {cs lastpos nullable} { - switch -- $cs { - :seq { - set result [lindex [lindex $lastpos end] 1] - for {set i [expr [llength $nullable] - 1]} {$i >= 0} {incr i -1} { - if {[lindex [lindex $nullable $i] 1]} { - eval lappend result [lindex [lindex $lastpos $i] 1] - } else { - break - } - } - } - :choice { - foreach child $lastpos { - eval lappend result $child - } - } - } - - return [list $lastpos [makeSet $result]] -} - -# sgml::makeSet -- -# -# Turn a list into a set, ie. remove duplicates. -# -# Arguments: -# s a list -# -# Results: -# A set is returned, which is a list with duplicates removed. - -proc sgml::makeSet s { - foreach r $s { - if {[llength $r]} { - set unique($r) {} - } - } - return [array names unique] -} - -# sgml::nullable -- -# -# Compute the nullable function for a node. -# -# Arguments: -# nodeType leaf or nonterminal -# rep repetition applying to this node -# name leaf node: symbol for this node, nonterm node: choice or seq node -# subtree nonterm node: nullable functions for the subtree -# -# Results: -# Returns nullable function for this branch of the tree. - -proc sgml::nullable {nodeType rep name {subtree {}}} { - switch -glob -- $rep:$nodeType { - :leaf - - +:leaf { - return [list {} 0] - } - \\*:leaf - - \\?:leaf { - return [list {} 1] - } - \\*:nonterm - - \\?:nonterm { - return [list $subtree 1] - } - :nonterm - - +:nonterm { - switch -- $name { - :choice { - set result 0 - foreach child $subtree { - set result [expr $result || [lindex $child 1]] - } - } - :seq { - set result 1 - foreach child $subtree { - set result [expr $result && [lindex $child 1]] - } - } - } - return [list $subtree $result] - } - } -} - -# sgml::DTD:ATTLIST -- -# -# <!ATTLIST ...> defines an attribute list. -# -# Arguments: -# opts configuration opions -# name Element GI -# attspec unparsed attribute definitions -# -# Results: -# Attribute list variables are modified. - -proc sgml::DTD:ATTLIST {opts name attspec} { - variable attlist_exp - variable attlist_enum_exp - variable attlist_fixed_exp - - array set options $opts - - # Parse the attribute list. If it were regular, could just use foreach, - # but some attributes may have values. - regsub -all {([][$\\])} $attspec {\\\1} attspec - regsub -all $attlist_exp $attspec "\}\nDTDAttribute {$options(-attlistdeclcommand)} $name $options(attlistdecls) {\\1} {\\2} {\\3} {} \{" attspec - regsub -all $attlist_enum_exp $attspec "\}\nDTDAttribute {$options(-attlistdeclcommand)} $name $options(attlistdecls) {\\1} {\\2} {} {\\4} \{" attspec - regsub -all $attlist_fixed_exp $attspec "\}\nDTDAttribute {$options(-attlistdeclcommand)} $name $options(attlistdecls) {\\1} {\\2} {\\3} {\\4} \{" attspec - - eval "noop \{$attspec\}" - - return {} -} - -# sgml::DTDAttribute -- -# -# Parse definition of a single attribute. -# -# Arguments: -# callback attribute defn callback -# name element name -# var array variable -# att attribute name -# type type of this attribute -# default default value of the attribute -# value other information -# text other text (should be empty) -# -# Results: -# Attribute defn added to array, unless it already exists - -proc sgml::DTDAttribute args { - # BUG: Some problems with parameter passing - deal with it later - foreach {callback name var att type default value text} $args break - - upvar #0 $var atts - - if {[string length [string trim $text]]} { - return -code error "unexpected text \"$text\" in attribute definition" - } - - # What about overridden attribute defns? - # A non-validating app may want to know about them - # (eg. an editor) - if {![info exists atts($name/$att)]} { - set atts($name/$att) [list $type $default $value] - uplevel #0 $callback [list $name $att $type $default $value] - } - - return {} -} - -# sgml::DTD:ENTITY -- -# -# <!ENTITY ...> declaration. -# -# Callbacks: -# -entitydeclcommand for general entity declaration -# -unparsedentitydeclcommand for unparsed external entity declaration -# -parameterentitydeclcommand for parameter entity declaration -# -# Arguments: -# opts configuration options -# name name of entity being defined -# param whether a parameter entity is being defined -# value unparsed replacement text -# -# Results: -# Modifies the caller's entities array variable - -proc sgml::DTD:ENTITY {opts name param value} { - - array set options $opts - - if {[string compare % $param]} { - # Entity declaration - general or external - upvar #0 $options(entities) ents - upvar #0 $options(extentities) externals - - if {[info exists ents($name)] || [info exists externals($name)]} { - eval $options(-warningcommand) entity [list "entity \"$name\" already declared"] - } else { - if {[catch {uplevel #0 $options(-parseentitydeclcommand) [list $value]} value]} { - return -code error "unable to parse entity declaration due to \"$value\"" - } - switch -glob [lindex $value 0],[lindex $value 3] { - internal, { - set ents($name) [EntitySubst [array get options] [lindex $value 1]] - uplevel #0 $options(-entitydeclcommand) [list $name $ents($name)] - } - internal,* { - return -code error "unexpected NDATA declaration" - } - external, { - set externals($name) [lrange $value 1 2] - uplevel #0 $options(-entitydeclcommand) [eval list $name [lrange $value 1 2]] - } - external,* { - set externals($name) [lrange $value 1 3] - uplevel #0 $options(-unparsedentitydeclcommand) [eval list $name [lrange $value 1 3]] - } - default { - return -code error "internal error: unexpected parser state" - } - } - } - } else { - # Parameter entity declaration - upvar #0 $options(parameterentities) PEnts - upvar #0 $options(externalparameterentities) ExtPEnts - - if {[info exists PEnts($name)] || [info exists ExtPEnts($name)]} { - eval $options(-warningcommand) parameterentity [list "parameter entity \"$name\" already declared"] - } else { - if {[catch {uplevel #0 $options(-parseentitydeclcommand) [list $value]} value]} { - return -code error "unable to parse parameter entity declaration due to \"$value\"" - } - if {[string length [lindex $value 3]]} { - return -code error "NDATA illegal in parameter entity declaration" - } - switch [lindex $value 0] { - internal { - # Substitute character references and PEs (XML: 4.5) - set value [EntitySubst [array get options] [lindex $value 1]] - - set PEnts($name) $value - uplevel #0 $options(-parameterentitydeclcommand) [list $name $value] - } - external - - default { - # Get the replacement text now. - # Could wait until the first reference, but easier - # to just do it now. - - set token [uri::geturl [uri::resolve $options(-baseuri) [lindex $value 1]]] - - set ExtPEnts($name) [lindex [array get $token data] 1] - uplevel #0 $options(-parameterentitydeclcommand) [eval list $name [lrange $value 1 2]] - } - } - } - } -} - -# sgml::EntitySubst -- -# -# Perform entity substitution on an entity replacement text. -# This differs slightly from other substitution procedures, -# because only parameter and character entity substitution -# is performed, not general entities. -# See XML Rec. section 4.5. -# -# Arguments: -# opts configuration options -# value Literal entity value -# -# Results: -# Expanded replacement text - -proc sgml::EntitySubst {opts value} { - array set options $opts - - # Protect Tcl special characters - regsub -all {([{}\\])} $value {\\\1} value - - # Find entity references - regsub -all (&#\[0-9\]+|&#x\[0-9a-fA-F\]+|%${::sgml::Name})\; $value "\[EntitySubstValue [list $options(parameterentities)] {\\1}\]" value - - set result [subst $value] - - return $result -} - -# sgml::EntitySubstValue -- -# -# Handle a single character or parameter entity substitution -# -# Arguments: -# PEvar array variable containing PE declarations -# ref character or parameter entity reference -# -# Results: -# Replacement text - -proc sgml::EntitySubstValue {PEvar ref} { - # SRB: Bug fix 2008-11-18 #812051: surround case labels in braces for compatibility with Freewrap - switch -glob -- $ref { - {&#x*} { - scan [string range $ref 3 end] %x hex - return [format %c $hex] - } - {&#*} { - return [format %c [string range $ref 2 end]] - } - {%*} { - upvar #0 $PEvar PEs - set ref [string range $ref 1 end] - if {[info exists PEs($ref)]} { - return $PEs($ref) - } else { - return -code error "parameter entity \"$ref\" not declared" - } - } - default { - return -code error "internal error - unexpected entity reference" - } - } - return {} -} - -# sgml::DTD:NOTATION -- -# -# Process notation declaration -# -# Arguments: -# opts configuration options -# name notation name -# value unparsed notation spec - -proc sgml::DTD:NOTATION {opts name value} { - return {} - - variable notation_exp - upvar opts state - - if {[regexp $notation_exp $value x scheme data] == 2} { - } else { - eval $state(-errorcommand) [list notationvalue "notation value \"$value\" incorrectly specified"] - } -} - -# sgml::ResolveEntity -- -# -# Default entity resolution routine -# -# Arguments: -# cmd command of parent parser -# base base URL for relative URLs -# sysId system identifier -# pubId public identifier - -proc sgml::ResolveEntity {cmd base sysId pubId} { - variable ParseEventNum - - if {[catch {uri::resolve $base $sysId} url]} { - return -code error "unable to resolve system identifier \"$sysId\"" - } - if {[catch {uri::geturl $url} token]} { - return -code error "unable to retrieve external entity \"$url\" for system identifier \"$sysId\"" - } - - upvar #0 $token data - - set parser [uplevel #0 $cmd entityparser] - - set body {} - catch {set body $data(body)} - catch {set body $data(data)} - if {[string length $body]} { - uplevel #0 $parser parse [list $body] -dtdsubset external - } - $parser free - - return {} -} diff --git a/tclxml/tclxml-tcl/tclparser-8.0.tcl b/tclxml/tclxml-tcl/tclparser-8.0.tcl deleted file mode 100755 index e2573f8..0000000 --- a/tclxml/tclxml-tcl/tclparser-8.0.tcl +++ /dev/null @@ -1,359 +0,0 @@ -# tclparser-8.0.tcl -- -# -# This file provides a Tcl implementation of a XML parser. -# This file supports Tcl 8.0. -# -# See xml-8.[01].tcl for definitions of character sets and -# regular expressions. -# -# Copyright (c) 2005-2008 by Explain. -# http://www.explain.com.au/ -# Copyright (c) 1998-2004 Zveno Pty Ltd -# http://www.zveno.com/ -# -# See the file "LICENSE" in this distribution for information on usage and -# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES. -# -# $Id: tclparser-8.0.tcl,v 1.1.1.1 2009/01/16 22:11:49 joye Exp $ - -package require -exact Tcl 8.0 - -package require xmldefs 3.2 - -package require sgmlparser 1.0 - -package provide xml::tclparser 3.2 - -namespace eval xml { - - # Procedures for parsing XML documents - namespace export parser - # Procedures for parsing XML DTDs - namespace export DTDparser - - # Counter for creating unique parser objects - variable ParserCounter 0 - -} - -# xml::parser -- -# -# Creates XML parser object. -# -# Arguments: -# args Unique name for parser object -# plus option/value pairs -# -# Recognised Options: -# -final Indicates end of document data -# -elementstartcommand Called when an element starts -# -elementendcommand Called when an element ends -# -characterdatacommand Called when character data occurs -# -processinginstructioncommand Called when a PI occurs -# -externalentityrefcommand Called for an external entity reference -# -# (Not compatible with expat) -# -xmldeclcommand Called when the XML declaration occurs -# -doctypecommand Called when the document type declaration occurs -# -# -errorcommand Script to evaluate for a fatal error -# -warningcommand Script to evaluate for a reportable warning -# -statevariable global state variable -# -reportempty whether to provide empty element indication -# -# Results: -# The state variable is initialised. - -proc xml::parser {args} { - variable ParserCounter - - if {[llength $args] > 0} { - set name [lindex $args 0] - set args [lreplace $args 0 0] - } else { - set name parser[incr ParserCounter] - } - - if {[info command [namespace current]::$name] != {}} { - return -code error "unable to create parser object \"[namespace current]::$name\" command" - } - - # Initialise state variable and object command - upvar \#0 [namespace current]::$name parser - set sgml_ns [namespace parent]::sgml - array set parser [list name $name \ - -final 1 \ - -elementstartcommand ${sgml_ns}::noop \ - -elementendcommand ${sgml_ns}::noop \ - -characterdatacommand ${sgml_ns}::noop \ - -processinginstructioncommand ${sgml_ns}::noop \ - -externalentityrefcommand ${sgml_ns}::noop \ - -xmldeclcommand ${sgml_ns}::noop \ - -doctypecommand ${sgml_ns}::noop \ - -warningcommand ${sgml_ns}::noop \ - -statevariable [namespace current]::$name \ - -reportempty 0 \ - internaldtd {} \ - ] - - proc [namespace current]::$name {method args} \ - "eval ParseCommand $name \$method \$args" - - eval ParseCommand [list $name] configure $args - - return [namespace current]::$name -} - -# xml::ParseCommand -- -# -# Handles parse object command invocations -# -# Valid Methods: -# cget -# configure -# parse -# reset -# -# Arguments: -# parser parser object -# method minor command -# args other arguments -# -# Results: -# Depends on method - -proc xml::ParseCommand {parser method args} { - upvar \#0 [namespace current]::$parser state - - switch -- $method { - cget { - return $state([lindex $args 0]) - } - configure { - foreach {opt value} $args { - set state($opt) $value - } - } - parse { - ParseCommand_parse $parser [lindex $args 0] - } - reset { - if {[llength $args]} { - return -code error "too many arguments" - } - ParseCommand_reset $parser - } - default { - return -code error "unknown method \"$method\"" - } - } - - return {} -} - -# xml::ParseCommand_parse -- -# -# Parses document instance data -# -# Arguments: -# object parser object -# xml data -# -# Results: -# Callbacks are invoked, if any are defined - -proc xml::ParseCommand_parse {object xml} { - upvar \#0 [namespace current]::$object parser - variable Wsp - variable tokExpr - variable substExpr - - set parent [namespace parent] - if {![string compare :: $parent]} { - set parent {} - } - - set tokenised [lrange \ - [${parent}::sgml::tokenise $xml \ - $tokExpr \ - $substExpr \ - -internaldtdvariable [namespace current]::${object}(internaldtd)] \ - 4 end] - - eval ${parent}::sgml::parseEvent \ - [list $tokenised \ - -emptyelement [namespace code ParseEmpty] \ - -parseattributelistcommand [namespace code ParseAttrs]] \ - [array get parser -*command] \ - [array get parser -entityvariable] \ - [array get parser -reportempty] \ - [array get parser -final] \ - -normalize 0 \ - -internaldtd [list $parser(internaldtd)] - - return {} -} - -# xml::ParseEmpty -- Tcl 8.0 version -# -# Used by parser to determine whether an element is empty. -# This should be dead easy in XML. The only complication is -# that the RE above can't catch the trailing slash, so we have -# to dig it out of the tag name or attribute list. -# -# Tcl 8.1 REs should fix this. -# -# Arguments: -# tag element name -# attr attribute list (raw) -# e End tag delimiter. -# -# Results: -# "/" if the trailing slash is found. Optionally, return a list -# containing new values for the tag name and/or attribute list. - -proc xml::ParseEmpty {tag attr e} { - - if {[string match */ [string trimright $tag]] && \ - ![string length $attr]} { - regsub {/$} $tag {} tag - return [list / $tag $attr] - } elseif {[string match */ [string trimright $attr]]} { - regsub {/$} [string trimright $attr] {} attr - return [list / $tag $attr] - } else { - return {} - } - -} - -# xml::ParseAttrs -- -# -# Parse element attributes. -# -# There are two forms for name-value pairs: -# -# name="value" -# name='value' -# -# Watch out for the trailing slash on empty elements. -# -# Arguments: -# attrs attribute string given in a tag -# -# Results: -# Returns a Tcl list representing the name-value pairs in the -# attribute string - -proc xml::ParseAttrs attrs { - variable Wsp - variable Name - - # First check whether there's any work to do - if {![string compare {} [string trim $attrs]]} { - return {} - } - - # Strip the trailing slash on empty elements - regsub [format {/[%s]*$} " \t\n\r"] $attrs {} atList - - set mode name - set result {} - foreach component [split $atList =] { - switch $mode { - name { - set component [string trim $component] - if {[regexp $Name $component]} { - lappend result $component - } else { - return -code error "invalid attribute name \"$component\"" - } - set mode value:start - } - value:start { - set component [string trimleft $component] - set delimiter [string index $component 0] - set value {} - switch -- $delimiter { - \" - - ' { - if {[regexp [format {%s([^%s]*)%s(.*)} $delimiter $delimiter $delimiter] $component discard value remainder]} { - lappend result $value - set remainder [string trim $remainder] - if {[string length $remainder]} { - if {[regexp $Name $remainder]} { - lappend result $remainder - set mode value:start - } else { - return -code error "invalid attribute name \"$remainder\"" - } - } else { - set mode end - } - } else { - set value [string range $component 1 end] - set mode value:continue - } - } - default { - return -code error "invalid value for attribute \"[lindex $result end]\"" - } - } - } - value:continue { - if {[regexp [format {([^%s]*)%s(.*)} $delimiter $delimiter] $component discard valuepart remainder]} { - append value = $valuepart - lappend result $value - set remainder [string trim $remainder] - if {[string length $remainder]} { - if {[regexp $Name $remainder]} { - lappend result $remainder - set mode value:start - } else { - return -code error "invalid attribute name \"$remainder\"" - } - } else { - set mode end - } - } else { - append value = $component - } - } - end { - return -code error "unexpected data found after end of attribute list" - } - } - } - - switch $mode { - name - - end { - # This is normal - } - default { - return -code error "unexpected end of attribute list" - } - } - - return $result -} - -# xml::ParseCommand_reset -- -# -# Initialize parser data -# -# Arguments: -# object parser object -# -# Results: -# Parser data structure initialised - -proc xml::ParseCommand_reset object { - upvar \#0 [namespace current]::$object parser - - array set parser [list \ - -final 1 \ - internaldtd {} \ - ] -} - diff --git a/tclxml/tclxml-tcl/tclparser-8.1.tcl b/tclxml/tclxml-tcl/tclparser-8.1.tcl deleted file mode 100755 index 40a0af9..0000000 --- a/tclxml/tclxml-tcl/tclparser-8.1.tcl +++ /dev/null @@ -1,614 +0,0 @@ -# tclparser-8.1.tcl -- -# -# This file provides a Tcl implementation of a XML parser. -# This file supports Tcl 8.1. -# -# See xml-8.[01].tcl for definitions of character sets and -# regular expressions. -# -# Copyright (c) 2005-2008 by Explain. -# http://www.explain.com.au/ -# Copyright (c) 1998-2003 Zveno Pty Ltd -# http://www.zveno.com/ -# -# See the file "LICENSE" in this distribution for information on usage and -# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES. -# -# $Id: tclparser-8.1.tcl,v 1.1.1.1 2009/01/16 22:11:49 joye Exp $ - -package require Tcl 8.1 - -package provide xml::tclparser 3.2 - -package require xmldefs 3.2 - -package require sgmlparser 1.0 - -namespace eval xml::tclparser { - - namespace export create createexternal externalentity parse configure get delete - - # Tokenising expressions - - variable tokExpr $::xml::tokExpr - variable substExpr $::xml::substExpr - - # Register this parser class - - ::xml::parserclass create tcl \ - -createcommand [namespace code create] \ - -createentityparsercommand [namespace code createentityparser] \ - -parsecommand [namespace code parse] \ - -configurecommand [namespace code configure] \ - -deletecommand [namespace code delete] \ - -resetcommand [namespace code reset] -} - -# xml::tclparser::create -- -# -# Creates XML parser object. -# -# Arguments: -# name unique identifier for this instance -# -# Results: -# The state variable is initialised. - -proc xml::tclparser::create name { - - # Initialise state variable - upvar \#0 [namespace current]::$name parser - array set parser [list -name $name \ - -cmd [uplevel 3 namespace current]::$name \ - -final 1 \ - -validate 0 \ - -statevariable [namespace current]::$name \ - -baseuri {} \ - internaldtd {} \ - entities [namespace current]::Entities$name \ - extentities [namespace current]::ExtEntities$name \ - parameterentities [namespace current]::PEntities$name \ - externalparameterentities [namespace current]::ExtPEntities$name \ - elementdecls [namespace current]::ElDecls$name \ - attlistdecls [namespace current]::AttlistDecls$name \ - notationdecls [namespace current]::NotDecls$name \ - depth 0 \ - leftover {} \ - ] - - # Initialise entities with predefined set - array set [namespace current]::Entities$name [array get ::sgml::EntityPredef] - - return $parser(-cmd) -} - -# xml::tclparser::createentityparser -- -# -# Creates XML parser object for an entity. -# -# Arguments: -# name name for the new parser -# parent name of parent parser -# -# Results: -# The state variable is initialised. - -proc xml::tclparser::createentityparser {parent name} { - upvar #0 [namespace current]::$parent p - - # Initialise state variable - upvar \#0 [namespace current]::$name external - array set external [array get p] - - regsub $parent $p(-cmd) {} parentns - - array set external [list -name $name \ - -cmd $parentns$name \ - -statevariable [namespace current]::$name \ - internaldtd {} \ - line 0 \ - ] - incr external(depth) - - return $external(-cmd) -} - -# xml::tclparser::configure -- -# -# Configures a XML parser object. -# -# Arguments: -# name unique identifier for this instance -# args option name/value pairs -# -# Results: -# May change values of config options - -proc xml::tclparser::configure {name args} { - upvar \#0 [namespace current]::$name parser - - # BUG: very crude, no checks for illegal args - # Mats: Should be synced with sgmlparser.tcl - set options {-elementstartcommand -elementendcommand \ - -characterdatacommand -processinginstructioncommand \ - -externalentitycommand -xmldeclcommand \ - -doctypecommand -commentcommand \ - -entitydeclcommand -unparsedentitydeclcommand \ - -parameterentitydeclcommand -notationdeclcommand \ - -elementdeclcommand -attlistdeclcommand \ - -paramentityparsing -defaultexpandinternalentities \ - -startdoctypedeclcommand -enddoctypedeclcommand \ - -entityreferencecommand -warningcommand \ - -defaultcommand -unknownencodingcommand -notstandalonecommand \ - -startcdatasectioncommand -endcdatasectioncommand \ - -errorcommand -final \ - -validate -baseuri -baseurl \ - -name -cmd -emptyelement \ - -parseattributelistcommand -parseentitydeclcommand \ - -normalize -internaldtd -dtdsubset \ - -reportempty -ignorewhitespace \ - -reportempty \ - } - set usage [join $options ", "] - regsub -all -- - $options {} options - set pat ^-([join $options |])$ - foreach {flag value} $args { - if {[regexp $pat $flag]} { - # Validate numbers - if {[info exists parser($flag)] && \ - [string is integer -strict $parser($flag)] && \ - ![string is integer -strict $value]} { - return -code error "Bad value for $flag ($value), must be integer" - } - set parser($flag) $value - } else { - return -code error "Unknown option $flag, can be: $usage" - } - } - - # Backward-compatibility: -baseuri is a synonym for -baseurl - catch {set parser(-baseuri) $parser(-baseurl)} - - return {} -} - -# xml::tclparser::parse -- -# -# Parses document instance data -# -# Arguments: -# name parser object -# xml data -# args configuration options -# -# Results: -# Callbacks are invoked - -proc xml::tclparser::parse {name xml args} { - - array set options $args - upvar \#0 [namespace current]::$name parser - variable tokExpr - variable substExpr - - # Mats: - if {[llength $args]} { - eval {configure $name} $args - } - - set parseOptions [list \ - -emptyelement [namespace code ParseEmpty] \ - -parseattributelistcommand [namespace code ParseAttrs] \ - -parseentitydeclcommand [namespace code ParseEntity] \ - -normalize 0] - eval lappend parseOptions \ - [array get parser -*command] \ - [array get parser -reportempty] \ - [array get parser -ignorewhitespace] \ - [array get parser -name] \ - [array get parser -cmd] \ - [array get parser -baseuri] \ - [array get parser -validate] \ - [array get parser -final] \ - [array get parser -defaultexpandinternalentities] \ - [array get parser entities] \ - [array get parser extentities] \ - [array get parser parameterentities] \ - [array get parser externalparameterentities] \ - [array get parser elementdecls] \ - [array get parser attlistdecls] \ - [array get parser notationdecls] - - # Mats: - # If -final 0 we also need to maintain the state with a -statevariable ! - if {!$parser(-final)} { - eval lappend parseOptions [array get parser -statevariable] - } - - set dtdsubset no - catch {set dtdsubset $options(-dtdsubset)} - switch -- $dtdsubset { - internal { - # Bypass normal parsing - lappend parseOptions -statevariable $parser(-statevariable) - array set intOptions [array get ::sgml::StdOptions] - array set intOptions $parseOptions - ::sgml::ParseDTD:Internal [array get intOptions] $xml - return {} - } - external { - # Bypass normal parsing - lappend parseOptions -statevariable $parser(-statevariable) - array set intOptions [array get ::sgml::StdOptions] - array set intOptions $parseOptions - ::sgml::ParseDTD:External [array get intOptions] $xml - return {} - } - default { - # Pass through to normal processing - } - } - - lappend tokenOptions \ - -internaldtdvariable [namespace current]::${name}(internaldtd) - - # Mats: If -final 0 we also need to maintain the state with a -statevariable ! - if {!$parser(-final)} { - eval lappend tokenOptions [array get parser -statevariable] \ - [array get parser -final] - } - - # Mats: - # Why not the first four? Just padding? Lrange undos \n interp. - # It is necessary to have the first four as well if chopped off in - # middle of pcdata. - set tokenised [lrange \ - [eval {::sgml::tokenise $xml $tokExpr $substExpr} $tokenOptions] \ - 0 end] - - lappend parseOptions -internaldtd [list $parser(internaldtd)] - eval ::sgml::parseEvent [list $tokenised] $parseOptions - - return {} -} - -# xml::tclparser::ParseEmpty -- Tcl 8.1+ version -# -# Used by parser to determine whether an element is empty. -# This is usually dead easy in XML, but as always not quite. -# Have to watch out for empty element syntax -# -# Arguments: -# tag element name -# attr attribute list (raw) -# e End tag delimiter. -# -# Results: -# Return value of e - -proc xml::tclparser::ParseEmpty {tag attr e} { - switch -glob [string length $e],[regexp "/[::xml::cl $::xml::Wsp]*$" $attr] { - 0,0 { - return {} - } - 0,* { - return / - } - default { - return $e - } - } -} - -# xml::tclparser::ParseAttrs -- Tcl 8.1+ version -# -# Parse element attributes. -# -# There are two forms for name-value pairs: -# -# name="value" -# name='value' -# -# Arguments: -# opts parser options -# attrs attribute string given in a tag -# -# Results: -# Returns a Tcl list representing the name-value pairs in the -# attribute string -# -# A ">" occurring in the attribute list causes problems when parsing -# the XML. This manifests itself by an unterminated attribute value -# and a ">" appearing the element text. -# In this case return a three element list; -# the message "unterminated attribute value", the attribute list it -# did manage to parse and the remainder of the attribute list. - -proc xml::tclparser::ParseAttrs {opts attrs} { - - set result {} - - while {[string length [string trim $attrs]]} { - if {[regexp [::sgml::cl $::xml::Wsp]*($::xml::Name)[::sgml::cl $::xml::Wsp]*=[::sgml::cl $::xml::Wsp]*("|')([::sgml::cl ^<]*?)\\2(.*) $attrs discard attrName delimiter value attrs]} { - lappend result $attrName [NormalizeAttValue $opts $value] - } elseif {[regexp [::sgml::cl $::xml::Wsp]*$::xml::Name[::sgml::cl $::xml::Wsp]*=[::sgml::cl $::xml::Wsp]*("|')[::sgml::cl ^<]*\$ $attrs]} { - return -code error [list {unterminated attribute value} $result $attrs] - } else { - return -code error "invalid attribute list" - } - } - - return $result -} - -# xml::tclparser::NormalizeAttValue -- -# -# Perform attribute value normalisation. This involves: -# . character references are appended to the value -# . entity references are recursively processed and replacement value appended -# . whitespace characters cause a space to be appended -# . other characters appended as-is -# -# Arguments: -# opts parser options -# value unparsed attribute value -# -# Results: -# Normalised value returned. - -proc xml::tclparser::NormalizeAttValue {opts value} { - - # sgmlparser already has backslashes protected - # Protect Tcl specials - regsub -all {([][$])} $value {\\\1} value - - # Deal with white space - regsub -all "\[$::xml::Wsp\]" $value { } value - - # Find entity refs - regsub -all {&([^;]+);} $value {[NormalizeAttValue:DeRef $opts {\1}]} value - - return [subst $value] -} - -# xml::tclparser::NormalizeAttValue:DeRef -- -# -# Handler to normalize attribute values -# -# Arguments: -# opts parser options -# ref entity reference -# -# Results: -# Returns character - -proc xml::tclparser::NormalizeAttValue:DeRef {opts ref} { - # SRB: Bug fix 2008-11-18 #812051: surround case labels in braces for compatibility with Freewrap - switch -glob -- $ref { - {#x*} { - scan [string range $ref 2 end] %x value - set char [format %c $value] - # Check that the char is legal for XML - if {[regexp [format {^[%s]$} $::xml::Char] $char]} { - return $char - } else { - return -code error "illegal character" - } - } - {#*} { - scan [string range $ref 1 end] %d value - set char [format %c $value] - # Check that the char is legal for XML - if {[regexp [format {^[%s]$} $::xml::Char] $char]} { - return $char - } else { - return -code error "illegal character" - } - } - lt - - gt - - amp - - quot - - apos { - array set map {lt < gt > amp & quot \" apos '} - return $map($ref) - } - default { - # A general entity. Must resolve to a text value - no element structure. - - array set options $opts - upvar #0 $options(entities) map - - if {[info exists map($ref)]} { - - if {[regexp < $map($ref)]} { - return -code error "illegal character \"<\" in attribute value" - } - - if {![regexp & $map($ref)]} { - # Simple text replacement - return $map($ref) - } - - # There are entity references in the replacement text. - # Can't use child entity parser since must catch element structures - - return [NormalizeAttValue $opts $map($ref)] - - } elseif {[string compare $options(-entityreferencecommand) "::sgml::noop"]} { - - set result [uplevel #0 $options(-entityreferencecommand) [list $ref]] - - return $result - - } else { - return -code error "unable to resolve entity reference \"$ref\"" - } - } - } -} - -# xml::tclparser::ParseEntity -- -# -# Parse general entity declaration -# -# Arguments: -# data text to parse -# -# Results: -# Tcl list containing entity declaration - -proc xml::tclparser::ParseEntity data { - set data [string trim $data] - if {[regexp $::sgml::ExternalEntityExpr $data discard type delimiter1 id1 discard delimiter2 id2 optNDATA ndata]} { - switch $type { - PUBLIC { - return [list external $id2 $id1 $ndata] - } - SYSTEM { - return [list external $id1 {} $ndata] - } - } - } elseif {[regexp {^("|')(.*?)\1$} $data discard delimiter value]} { - return [list internal $value] - } else { - return -code error "badly formed entity declaration" - } -} - -# xml::tclparser::delete -- -# -# Destroy parser data -# -# Arguments: -# name parser object -# -# Results: -# Parser data structure destroyed - -proc xml::tclparser::delete name { - upvar \#0 [namespace current]::$name parser - catch {::sgml::ParserDelete $parser(-statevariable)} - catch {unset parser} - return {} -} - -# xml::tclparser::get -- -# -# Retrieve additional information from the parser -# -# Arguments: -# name parser object -# method info to retrieve -# args additional arguments for method -# -# Results: -# Depends on method - -proc xml::tclparser::get {name method args} { - upvar #0 [namespace current]::$name parser - - switch -- $method { - - elementdecl { - switch [llength $args] { - - 0 { - # Return all element declarations - upvar #0 $parser(elementdecls) elements - return [array get elements] - } - - 1 { - # Return specific element declaration - upvar #0 $parser(elementdecls) elements - if {[info exists elements([lindex $args 0])]} { - return [array get elements [lindex $args 0]] - } else { - return -code error "element \"[lindex $args 0]\" not declared" - } - } - - default { - return -code error "wrong number of arguments: should be \"elementdecl ?element?\"" - } - } - } - - attlist { - if {[llength $args] != 1} { - return -code error "wrong number of arguments: should be \"get attlist element\"" - } - - upvar #0 $parser(attlistdecls) - - return {} - } - - entitydecl { - } - - parameterentitydecl { - } - - notationdecl { - } - - default { - return -code error "unknown method \"$method\"" - } - } - - return {} -} - -# xml::tclparser::ExternalEntity -- -# -# Resolve and parse external entity -# -# Arguments: -# name parser object -# base base URL -# sys system identifier -# pub public identifier -# -# Results: -# External entity is fetched and parsed - -proc xml::tclparser::ExternalEntity {name base sys pub} { -} - -# xml::tclparser:: -- -# -# Reset a parser instance, ready to parse another document -# -# Arguments: -# name parser object -# -# Results: -# Variables unset - -proc xml::tclparser::reset {name} { - upvar \#0 [namespace current]::$name parser - - # Has this parser object been properly initialised? - if {![info exists parser] || \ - ![info exists parser(-name)]} { - return [create $name] - } - - array set parser { - -final 1 - depth 0 - leftover {} - } - - foreach var {Entities ExtEntities PEntities ExtPEntities ElDecls AttlistDecls NotDecls} { - catch {unset [namespace current]::${var}$name} - } - - # Initialise entities with predefined set - array set [namespace current]::Entities$name [array get ::sgml::EntityPredef] - - return {} -} diff --git a/tclxml/tclxml-tcl/xml-8.0.tcl b/tclxml/tclxml-tcl/xml-8.0.tcl deleted file mode 100755 index db28423..0000000 --- a/tclxml/tclxml-tcl/xml-8.0.tcl +++ /dev/null @@ -1,92 +0,0 @@ -# xml-8.0.tcl -- -# -# This file provides generic XML services for all implementations. -# This file supports Tcl 8.0 regular expressions. -# -# See xmlparse.tcl for the Tcl implementation of a XML parser. -# -# Copyright (c) 2005 by Explain. -# http://www.explain.com.au/ -# Copyright (c) 1998-2004 Zveno Pty Ltd -# http://www.zveno.com/ -# -# See the file "LICENSE" in this distribution for information on usage and -# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES. -# -# $Id: xml-8.0.tcl,v 1.1.1.1 2009/01/16 22:11:49 joye Exp $ - -package require -exact Tcl 8.0 - -package require sgml 1.8 - -package provide xmldefs 3.2 - -namespace eval xml { - - # Convenience routine - proc cl x { - return "\[$x\]" - } - - # Define various regular expressions - - # Characters - variable Char $::sgml::Char - - # white space - variable Wsp " \t\r\n" - variable noWsp [cl ^$Wsp] - - # Various XML names and tokens - - variable NameChar $::sgml::NameChar - variable Name $::sgml::Name - variable Names $::sgml::Names - variable Nmtoken $::sgml::Nmtoken - variable Nmtokens $::sgml::Nmtokens - - # The definition of the Namespace URI for XML Namespaces themselves. - # The prefix 'xml' is automatically bound to this URI. - variable xmlnsNS http://www.w3.org/XML/1998/namespace - - # Tokenising expressions - - variable tokExpr <(/?)([cl ^$Wsp>/]+)([cl $Wsp]*[cl ^>]*)> - variable substExpr "\}\n{\\2} {\\1} {\\3} \{" - - # table of predefined entities - - variable EntityPredef - array set EntityPredef { - lt < gt > amp & quot \" apos ' - } - -} - -### -### General utility procedures -### - -# xml::noop -- -# -# A do-nothing proc - -proc xml::noop args {} - -### Following procedures are based on html_library - -# xml::zapWhite -- -# -# Convert multiple white space into a single space. -# -# Arguments: -# data plain text -# -# Results: -# As above - -proc xml::zapWhite data { - regsub -all "\[ \t\r\n\]+" $data { } data - return $data -} - diff --git a/tclxml/tclxml-tcl/xml-8.1.tcl b/tclxml/tclxml-tcl/xml-8.1.tcl deleted file mode 100755 index 5ec410b..0000000 --- a/tclxml/tclxml-tcl/xml-8.1.tcl +++ /dev/null @@ -1,135 +0,0 @@ -# xml.tcl -- -# -# This file provides generic XML services for all implementations. -# This file supports Tcl 8.1 regular expressions. -# -# See tclparser.tcl for the Tcl implementation of a XML parser. -# -# Copyright (c) 2005 by Explain. -# http://www.explain.com.au/ -# Copyright (c) 1998-2004 Zveno Pty Ltd -# http://www.zveno.com/ -# -# See the file "LICENSE" in this distribution for information on usage and -# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES. -# -# $Id: xml-8.1.tcl,v 1.1.1.1 2009/01/16 22:11:49 joye Exp $ - -package require Tcl 8.1 - -package provide xmldefs 3.2 - -package require sgml 1.8 - -namespace eval xml { - - namespace export qnamesplit - - # Convenience routine - proc cl x { - return "\[$x\]" - } - - # Define various regular expressions - - # Characters - variable Char $::sgml::Char - - # white space - variable Wsp " \t\r\n" - variable allWsp [cl $Wsp]* - variable noWsp [cl ^$Wsp] - - # Various XML names and tokens - - variable NameChar $::sgml::NameChar - variable Name $::sgml::Name - variable Names $::sgml::Names - variable Nmtoken $::sgml::Nmtoken - variable Nmtokens $::sgml::Nmtokens - - # XML Namespaces names - - # NCName ::= Name - ':' - variable NCName $::sgml::Name - regsub -all : $NCName {} NCName - variable QName (${NCName}:)?$NCName ;# (Prefix ':')? LocalPart - - # The definition of the Namespace URI for XML Namespaces themselves. - # The prefix 'xml' is automatically bound to this URI. - variable xmlnsNS http://www.w3.org/XML/1998/namespace - - # table of predefined entities - - variable EntityPredef - array set EntityPredef { - lt < gt > amp & quot \" apos ' - } - - # Expressions for pulling things apart - variable tokExpr <(/?)([::xml::cl ^$::xml::Wsp>/]+)([::xml::cl $::xml::Wsp]*[::xml::cl ^>]*)> - variable substExpr "\}\n{\\2} {\\1} {\\3} \{" - -} - -### -### Exported procedures -### - -# xml::qnamesplit -- -# -# Split a QName into its constituent parts: -# the XML Namespace prefix and the Local-name -# -# Arguments: -# qname XML Qualified Name (see XML Namespaces [6]) -# -# Results: -# Returns prefix and local-name as a Tcl list. -# Error condition returned if the prefix or local-name -# are not valid NCNames (XML Name) - -proc xml::qnamesplit qname { - variable NCName - variable Name - - set prefix {} - set localname $qname - if {[regexp : $qname]} { - if {![regexp ^($NCName)?:($NCName)\$ $qname discard prefix localname]} { - return -code error "name \"$qname\" is not a valid QName" - } - } elseif {![regexp ^$Name\$ $qname]} { - return -code error "name \"$qname\" is not a valid Name" - } - - return [list $prefix $localname] -} - -### -### General utility procedures -### - -# xml::noop -- -# -# A do-nothing proc - -proc xml::noop args {} - -### Following procedures are based on html_library - -# xml::zapWhite -- -# -# Convert multiple white space into a single space. -# -# Arguments: -# data plain text -# -# Results: -# As above - -proc xml::zapWhite data { - regsub -all "\[ \t\r\n\]+" $data { } data - return $data -} - diff --git a/tclxml/tclxml-tcl/xml__tcl.tcl b/tclxml/tclxml-tcl/xml__tcl.tcl deleted file mode 100644 index bdb7bd9..0000000 --- a/tclxml/tclxml-tcl/xml__tcl.tcl +++ /dev/null @@ -1,272 +0,0 @@ -# xml__tcl.tcl -- -# -# This file provides a Tcl implementation of the parser -# class support found in ../tclxml.c. It is only used -# when the C implementation is not installed (for some reason). -# -# Copyright (c) 2005 by Explain. -# http://www.explain.com.au/ -# Copyright (c) 2000-2004 Zveno Pty Ltd -# http://www.zveno.com/ -# -# See the file "LICENSE" in this distribution for information on usage and -# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES. -# -# $Id: xml__tcl.tcl,v 1.1.1.1 2009/01/16 22:11:49 joye Exp $ - -package provide xml::tcl 3.2 - -namespace eval xml { - namespace export configure parser parserclass - - # Parser implementation classes - variable classes - array set classes {} - - # Default parser class - variable default {} - - # Counter for generating unique names - variable counter 0 -} - -# xml::configure -- -# -# Configure the xml package -# -# Arguments: -# None -# -# Results: -# None (not yet implemented) - -proc xml::configure args {} - -# xml::parserclass -- -# -# Implements the xml::parserclass command for managing -# parser implementations. -# -# Arguments: -# method subcommand -# args method arguments -# -# Results: -# Depends on method - -proc xml::parserclass {method args} { - variable classes - variable default - - switch -- $method { - - create { - if {[llength $args] < 1} { - return -code error "wrong number of arguments, should be xml::parserclass create name ?args?" - } - - set name [lindex $args 0] - if {[llength [lrange $args 1 end]] % 2} { - return -code error "missing value for option \"[lindex $args end]\"" - } - array set classes [list $name [list \ - -createcommand [namespace current]::noop \ - -createentityparsercommand [namespace current]::noop \ - -parsecommand [namespace current]::noop \ - -configurecommand [namespace current]::noop \ - -getcommand [namespace current]::noop \ - -deletecommand [namespace current]::noop \ - ]] - # BUG: we're not checking that the arguments are kosher - set classes($name) [lrange $args 1 end] - set default $name - } - - destroy { - if {[llength $args] < 1} { - return -code error "wrong number of arguments, should be xml::parserclass destroy name" - } - - if {[info exists classes([lindex $args 0])]} { - unset classes([lindex $args 0]) - } else { - return -code error "no such parser class \"[lindex $args 0]\"" - } - } - - info { - if {[llength $args] < 1} { - return -code error "wrong number of arguments, should be xml::parserclass info method" - } - - switch -- [lindex $args 0] { - names { - return [array names classes] - } - default { - return $default - } - } - } - - default { - return -code error "unknown method \"$method\"" - } - } - - return {} -} - -# xml::parser -- -# -# Create a parser object instance -# -# Arguments: -# args optional name, configuration options -# -# Results: -# Returns object name. Parser instance created. - -proc xml::parser args { - variable classes - variable default - - if {[llength $args] < 1} { - # Create unique name, no options - set parserName [FindUniqueName] - } else { - if {[string index [lindex $args 0] 0] == "-"} { - # Create unique name, have options - set parserName [FindUniqueName] - } else { - # Given name, optional options - set parserName [lindex $args 0] - set args [lrange $args 1 end] - } - } - - array set options [list \ - -parser $default - ] - array set options $args - - if {![info exists classes($options(-parser))]} { - return -code error "no such parser class \"$options(-parser)\"" - } - - # Now create the parser instance command and data structure - # The command must be created in the caller's namespace - uplevel 1 [list proc $parserName {method args} "eval [namespace current]::ParserCmd [list $parserName] \[list \$method\] \$args"] - upvar #0 [namespace current]::$parserName data - array set data [list class $options(-parser)] - - array set classinfo $classes($options(-parser)) - if {[string compare $classinfo(-createcommand) ""]} { - eval $classinfo(-createcommand) [list $parserName] - } - if {[string compare $classinfo(-configurecommand) ""] && \ - [llength $args]} { - eval $classinfo(-configurecommand) [list $parserName] $args - } - - return $parserName -} - -# xml::FindUniqueName -- -# -# Generate unique object name -# -# Arguments: -# None -# -# Results: -# Returns string. - -proc xml::FindUniqueName {} { - variable counter - return xmlparser[incr counter] -} - -# xml::ParserCmd -- -# -# Implements parser object command -# -# Arguments: -# name object reference -# method subcommand -# args method arguments -# -# Results: -# Depends on method - -proc xml::ParserCmd {name method args} { - variable classes - upvar #0 [namespace current]::$name data - - array set classinfo $classes($data(class)) - - switch -- $method { - - configure { - # BUG: We're not checking for legal options - array set data $args - eval $classinfo(-configurecommand) [list $name] $args - return {} - } - - cget { - return $data([lindex $args 0]) - } - - entityparser { - set new [FindUniqueName] - - upvar #0 [namespace current]::$name parent - upvar #0 [namespace current]::$new data - array set data [array get parent] - - uplevel 1 [list proc $new {method args} "eval [namespace current]::ParserCmd [list $new] \[list \$method\] \$args"] - - return [eval $classinfo(-createentityparsercommand) [list $name $new] $args] - } - - free { - eval $classinfo(-deletecommand) [list $name] - unset data - uplevel 1 [list rename $name {}] - } - - get { - eval $classinfo(-getcommand) [list $name] $args - } - - parse { - if {[llength $args] < 1} { - return -code error "wrong number of arguments, should be $name parse xml ?options?" - } - eval $classinfo(-parsecommand) [list $name] $args - } - - reset { - eval $classinfo(-resetcommand) [list $name] - } - - default { - return -code error "unknown method" - } - } - - return {} -} - -# xml::noop -- -# -# Do nothing utility proc -# -# Arguments: -# args whatever -# -# Results: -# Nothing happens - -proc xml::noop args {} diff --git a/tclxml/tclxml-tcl/xmldep.tcl b/tclxml/tclxml-tcl/xmldep.tcl deleted file mode 100644 index bbb2613..0000000 --- a/tclxml/tclxml-tcl/xmldep.tcl +++ /dev/null @@ -1,179 +0,0 @@ -# xmldep.tcl -- -# -# Find the dependencies in an XML document. -# Supports external entities and XSL include/import. -# -# TODO: -# XInclude -# -# Copyright (c) 2001-2003 Zveno Pty Ltd -# http://www.zveno.com/ -# -# See the file "LICENSE" in this distribution for information on usage and -# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES. -# -# $Id: xmldep.tcl,v 1.1.1.1 2009/01/16 22:11:49 joye Exp $ - -package require xml - -package provide xml::dep 1.0 - -namespace eval xml::dep { - namespace export depend - - variable extEntities - array set extEntities {} - - variable XSLTNS http://www.w3.org/1999/XSL/Transform -} - -# xml::dep::depend -- -# -# Find the resources which an XML document -# depends on. The document is parsed -# sequentially, rather than using DOM, for efficiency. -# -# TODO: -# Asynchronous parsing. -# -# Arguments: -# xml XML document entity -# args configuration options -# -# Results: -# Returns list of resource (system) identifiers - -proc xml::dep::depend {xml args} { - variable resources - variable entities - - set resources {} - catch {unset entities} - array set entities {} - - set p [xml::parser \ - -elementstartcommand [namespace code ElStart] \ - -doctypecommand [namespace code DocTypeDecl] \ - -entitydeclcommand [namespace code EntityDecl] \ - -entityreferencecommand [namespace code EntityReference] \ - -validate 1 \ - ] - if {[llength $args]} { - eval [list $p] configure $args - } - $p parse $xml - - return $resources -} - -# xml::dep::ElStart -- -# -# Process start element -# -# Arguments: -# name tag name -# atlist attribute list -# args options -# -# Results: -# May add to resources list - -proc xml::dep::ElStart {name atlist args} { - variable XSLTNS - variable resources - - array set opts { - -namespace {} - } - array set opts $args - - switch -- $opts(-namespace) \ - $XSLTNS { - switch $name { - import - - include { - array set attr { - href {} - } - array set attr $atlist - - if {[string length $attr(href)]} { - if {[lsearch $resources $attr(href)] < 0} { - lappend resources $attr(href) - } - } - - } - } - } -} - -# xml::dep::DocTypeDecl -- -# -# Process Document Type Declaration -# -# Arguments: -# name Document element -# pubid Public identifier -# sysid System identifier -# dtd Internal DTD Subset -# -# Results: -# Resource added to list - -proc xml::dep::DocTypeDecl {name pubid sysid dtd} { - variable resources - - puts stderr [list DocTypeDecl $name $pubid $sysid dtd] - - if {[string length $sysid] && \ - [lsearch $resources $sysid] < 0} { - lappend resources $sysid - } - - return {} -} - -# xml::dep::EntityDecl -- -# -# Process entity declaration, looking for external entity -# -# Arguments: -# name entity name -# sysid system identifier -# pubid public identifier or repl. text -# -# Results: -# Store external entity info for later reference - -proc xml::dep::EntityDecl {name sysid pubid} { - variable extEntities - - puts stderr [list EntityDecl $name $sysid $pubid] - - set extEntities($name) $sysid -} - -# xml::dep::EntityReference -- -# -# Process entity reference -# -# Arguments: -# name entity name -# -# Results: -# May add to resources list - -proc xml::dep::EntityReference name { - variable extEntities - variable resources - - puts stderr [list EntityReference $name] - - if {[info exists extEntities($name)] && \ - [lsearch $resources $extEntities($name)] < 0} { - lappend resources $extEntities($name) - } - -} - diff --git a/tclxml/tclxml-tcl/xpath.tcl b/tclxml/tclxml-tcl/xpath.tcl deleted file mode 100644 index e772e67..0000000 --- a/tclxml/tclxml-tcl/xpath.tcl +++ /dev/null @@ -1,362 +0,0 @@ -# xpath.tcl -- -# -# Provides an XPath parser for Tcl, -# plus various support procedures -# -# Copyright (c) 2000-2003 Zveno Pty Ltd -# -# See the file "LICENSE" in this distribution for information on usage and -# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES. -# -# $Id: xpath.tcl,v 1.1.1.1 2009/01/16 22:11:49 joye Exp $ - -package provide xpath 1.0 - -# We need the XML package for definition of Names -package require xml - -namespace eval xpath { - namespace export split join createnode - - variable axes { - ancestor - ancestor-or-self - attribute - child - descendant - descendant-or-self - following - following-sibling - namespace - parent - preceding - preceding-sibling - self - } - - variable nodeTypes { - comment - text - processing-instruction - node - } - - # NB. QName has parens for prefix - - variable nodetestExpr ^(${::xml::QName})${::xml::allWsp}(\\(${::xml::allWsp}(("|')(.*?)\\5)?${::xml::allWsp}\\))?${::xml::allWsp}(.*) - - variable nodetestExpr2 ((($::xml::QName)${::xml::allWsp}(\\(${::xml::allWsp}(("|')(.*?)\\7)?${::xml::allWsp}\\))?)|${::xml::allWsp}(\\*))${::xml::allWsp}(.*) -} - -# xpath::split -- -# -# Parse an XPath location path -# -# Arguments: -# locpath location path -# -# Results: -# A Tcl list representing the location path. -# The list has the form: {{axis node-test {predicate predicate ...}} ...} -# Where each list item is a location step. - -proc xpath::split locpath { - set leftover {} - - set result [InnerSplit $locpath leftover] - - if {[string length [string trim $leftover]]} { - return -code error "unexpected text \"$leftover\"" - } - - return $result -} - -proc xpath::InnerSplit {locpath leftoverVar} { - upvar $leftoverVar leftover - - variable axes - variable nodetestExpr - variable nodetestExpr2 - - # First determine whether we have an absolute location path - if {[regexp {^/(.*)} $locpath discard locpath]} { - set path {{}} - } else { - set path {} - } - - while {[string length [string trimleft $locpath]]} { - if {[regexp {^\.\.(.*)} $locpath discard locpath]} { - # .. abbreviation - set axis parent - set nodetest * - } elseif {[regexp {^/(.*)} $locpath discard locpath]} { - # // abbreviation - set axis descendant-or-self - if {[regexp ^$nodetestExpr2 [string trimleft $locpath] discard discard discard nodetest discard typetest discard discard literal wildcard locpath]} { - set nodetest [ResolveWildcard $nodetest $typetest $wildcard $literal] - } else { - set leftover $locpath - return $path - } - } elseif {[regexp ^\\.${::xml::allWsp}(.*) $locpath discard locpath]} { - # . abbreviation - set axis self - set nodetest * - } elseif {[regexp ^@($::xml::QName)${::xml::allWsp}=${::xml::allWsp}"(\[^"\])"(.*) $locpath discard attrName discard attrValue locpath]} { - # @ abbreviation - set axis attribute - set nodetest $attrName - } elseif {[regexp ^@($::xml::QName)${::xml::allWsp}=${::xml::allWsp}'(\[^'\])'(.*) $locpath discard attrName discard attrValue locpath]} { - # @ abbreviation - set axis attribute - set nodetest $attrName - } elseif {[regexp ^@($::xml::QName)(.*) $locpath discard attrName discard2 locpath]} { - # @ abbreviation - set axis attribute - set nodetest $attrName - } elseif {[regexp ^((${::xml::QName})${::xml::allWsp}::${::xml::allWsp})?\\*(.*) $locpath discard discard axis discard locpath]} { - # wildcard specified - set nodetest * - if {![string length $axis]} { - set axis child - } - } elseif {[regexp ^((${::xml::QName})${::xml::allWsp}::${::xml::allWsp})?$nodetestExpr2 $locpath discard discard axis discard discard discard nodetest discard typetest discard discard literal wildcard locpath]} { - # nodetest, with or without axis - if {![string length $axis]} { - set axis child - } - set nodetest [ResolveWildcard $nodetest $typetest $wildcard $literal] - } else { - set leftover $locpath - return $path - } - - # ParsePredicates - set predicates {} - set locpath [string trimleft $locpath] - while {[regexp {^\[(.*)} $locpath discard locpath]} { - if {[regexp {^([0-9]+)(\].*)} [string trim $locpath] discard posn locpath]} { - set predicate [list = {function position {}} [list number $posn]] - } else { - set leftover2 {} - set predicate [ParseExpr $locpath leftover2] - set locpath $leftover2 - unset leftover2 - } - - if {[regexp {^\](.*)} [string trimleft $locpath] discard locpath]} { - lappend predicates $predicate - } else { - return -code error "unexpected text in predicate \"$locpath\"" - } - } - - set axis [string trim $axis] - set nodetest [string trim $nodetest] - - # This step completed - if {[lsearch $axes $axis] < 0} { - return -code error "invalid axis \"$axis\"" - } - lappend path [list $axis $nodetest $predicates] - - # Move to next step - - if {[string length $locpath] && ![regexp ^/(.*) $locpath discard locpath]} { - set leftover $locpath - return $path - } - - } - - return $path -} - -# xpath::ParseExpr -- -# -# Parse one expression in a predicate -# -# Arguments: -# locpath location path to parse -# leftoverVar Name of variable in which to store remaining path -# -# Results: -# Returns parsed expression as a Tcl list - -proc xpath::ParseExpr {locpath leftoverVar} { - upvar $leftoverVar leftover - variable nodeTypes - - set expr {} - set mode expr - set stack {} - - while {[string index [string trimleft $locpath] 0] != "\]"} { - set locpath [string trimleft $locpath] - switch $mode { - expr { - # We're looking for a term - if {[regexp ^-(.*) $locpath discard locpath]} { - # UnaryExpr - lappend stack "-" - } elseif {[regexp ^\\\$({$::xml::QName})(.*) $locpath discard varname discard locpath]} { - # VariableReference - lappend stack [list varRef $varname] - set mode term - } elseif {[regexp {^\((.*)} $locpath discard locpath]} { - # Start grouping - set leftover2 {} - lappend stack [list group [ParseExpr $locpath leftover2]] - set locpath $leftover2 - unset leftover2 - - if {[regexp {^\)(.*)} [string trimleft $locpath] discard locpath]} { - set mode term - } else { - return -code error "unexpected text \"$locpath\", expected \")\"" - } - - } elseif {[regexp {^"([^"]*)"(.*)} $locpath discard literal locpath]} { - # Literal (" delimited) - lappend stack [list literal $literal] - set mode term - } elseif {[regexp {^'([^']*)'(.*)} $locpath discard literal locpath]} { - # Literal (' delimited) - lappend stack [list literal $literal] - set mode term - } elseif {[regexp {^([0-9]+(\.[0-9]+)?)(.*)} $locpath discard number discard locpath]} { - # Number - lappend stack [list number $number] - set mode term - } elseif {[regexp {^(\.[0-9]+)(.*)} $locpath discard number locpath]} { - # Number - lappend stack [list number $number] - set mode term - } elseif {[regexp ^(${::xml::QName})\\(${::xml::allWsp}(.*) $locpath discard functionName discard locpath]} { - # Function call start or abbreviated node-type test - - if {[lsearch $nodeTypes $functionName] >= 0} { - # Looking like a node-type test - if {[regexp ^\\)${::xml::allWsp}(.*) $locpath discard locpath]} { - lappend stack [list path [list child [list $functionName ()] {}]] - set mode term - } else { - return -code error "invalid node-type test \"$functionName\"" - } - } else { - if {[regexp ^\\)${::xml::allWsp}(.*) $locpath discard locpath]} { - set parameters {} - } else { - set leftover2 {} - set parameters [ParseExpr $locpath leftover2] - set locpath $leftover2 - unset leftover2 - while {[regexp {^,(.*)} $locpath discard locpath]} { - set leftover2 {} - lappend parameters [ParseExpr $locpath leftover2] - set locpath $leftover2 - unset leftover2 - } - - if {![regexp ^\\)${::xml::allWsp}(.*) [string trimleft $locpath] discard locpath]} { - return -code error "unexpected text \"locpath\" - expected \")\"" - } - } - - lappend stack [list function $functionName $parameters] - set mode term - } - - } else { - # LocationPath - set leftover2 {} - lappend stack [list path [InnerSplit $locpath leftover2]] - set locpath $leftover2 - unset leftover2 - set mode term - } - } - term { - # We're looking for an expression operator - if {[regexp ^-(.*) $locpath discard locpath]} { - # UnaryExpr - set stack [linsert $stack 0 expr "-"] - set mode expr - } elseif {[regexp ^(and|or|\\=|!\\=|<|>|<\\=|>\\=|\\||\\+|\\-|\\*|div|mod)(.*) $locpath discard exprtype locpath]} { - # AndExpr, OrExpr, EqualityExpr, RelationalExpr or UnionExpr - set stack [linsert $stack 0 $exprtype] - set mode expr - } else { - return -code error "unexpected text \"$locpath\", expecting operator" - } - } - default { - # Should never be here! - return -code error "internal error" - } - } - } - - set leftover $locpath - return $stack -} - -# xpath::ResolveWildcard -- - -proc xpath::ResolveWildcard {nodetest typetest wildcard literal} { - variable nodeTypes - - switch -glob -- [string length $nodetest],[string length $typetest],[string length $wildcard],[string length $literal] { - 0,0,0,* { - return -code error "bad location step (nothing parsed)" - } - 0,0,* { - # Name wildcard specified - return * - } - *,0,0,* { - # Element type test - nothing to do - return $nodetest - } - *,0,*,* { - # Internal error? - return -code error "bad location step (found both nodetest and wildcard)" - } - *,*,0,0 { - # Node type test - if {[lsearch $nodeTypes $nodetest] < 0} { - return -code error "unknown node type \"$typetest\"" - } - return [list $nodetest $typetest] - } - *,*,0,* { - # Node type test - if {[lsearch $nodeTypes $nodetest] < 0} { - return -code error "unknown node type \"$typetest\"" - } - return [list $nodetest $literal] - } - default { - # Internal error? - return -code error "bad location step" - } - } -} - -# xpath::join -- -# -# Reconstitute an XPath location path from a -# Tcl list representation. -# -# Arguments: -# spath split path -# -# Results: -# Returns an Xpath location path - -proc xpath::join spath { - return -code error "not yet implemented" -} - |