diff options
Diffstat (limited to 'tclxml/tclxml-tcl/tclparser-8.0.tcl')
-rwxr-xr-x | tclxml/tclxml-tcl/tclparser-8.0.tcl | 359 |
1 files changed, 359 insertions, 0 deletions
diff --git a/tclxml/tclxml-tcl/tclparser-8.0.tcl b/tclxml/tclxml-tcl/tclparser-8.0.tcl new file mode 100755 index 0000000..e2573f8 --- /dev/null +++ b/tclxml/tclxml-tcl/tclparser-8.0.tcl @@ -0,0 +1,359 @@ +# tclparser-8.0.tcl -- +# +# This file provides a Tcl implementation of a XML parser. +# This file supports Tcl 8.0. +# +# See xml-8.[01].tcl for definitions of character sets and +# regular expressions. +# +# Copyright (c) 2005-2008 by Explain. +# http://www.explain.com.au/ +# Copyright (c) 1998-2004 Zveno Pty Ltd +# http://www.zveno.com/ +# +# See the file "LICENSE" in this distribution for information on usage and +# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES. +# +# $Id: tclparser-8.0.tcl,v 1.1.1.1 2009/01/16 22:11:49 joye Exp $ + +package require -exact Tcl 8.0 + +package require xmldefs 3.2 + +package require sgmlparser 1.0 + +package provide xml::tclparser 3.2 + +namespace eval xml { + + # Procedures for parsing XML documents + namespace export parser + # Procedures for parsing XML DTDs + namespace export DTDparser + + # Counter for creating unique parser objects + variable ParserCounter 0 + +} + +# xml::parser -- +# +# Creates XML parser object. +# +# Arguments: +# args Unique name for parser object +# plus option/value pairs +# +# Recognised Options: +# -final Indicates end of document data +# -elementstartcommand Called when an element starts +# -elementendcommand Called when an element ends +# -characterdatacommand Called when character data occurs +# -processinginstructioncommand Called when a PI occurs +# -externalentityrefcommand Called for an external entity reference +# +# (Not compatible with expat) +# -xmldeclcommand Called when the XML declaration occurs +# -doctypecommand Called when the document type declaration occurs +# +# -errorcommand Script to evaluate for a fatal error +# -warningcommand Script to evaluate for a reportable warning +# -statevariable global state variable +# -reportempty whether to provide empty element indication +# +# Results: +# The state variable is initialised. + +proc xml::parser {args} { + variable ParserCounter + + if {[llength $args] > 0} { + set name [lindex $args 0] + set args [lreplace $args 0 0] + } else { + set name parser[incr ParserCounter] + } + + if {[info command [namespace current]::$name] != {}} { + return -code error "unable to create parser object \"[namespace current]::$name\" command" + } + + # Initialise state variable and object command + upvar \#0 [namespace current]::$name parser + set sgml_ns [namespace parent]::sgml + array set parser [list name $name \ + -final 1 \ + -elementstartcommand ${sgml_ns}::noop \ + -elementendcommand ${sgml_ns}::noop \ + -characterdatacommand ${sgml_ns}::noop \ + -processinginstructioncommand ${sgml_ns}::noop \ + -externalentityrefcommand ${sgml_ns}::noop \ + -xmldeclcommand ${sgml_ns}::noop \ + -doctypecommand ${sgml_ns}::noop \ + -warningcommand ${sgml_ns}::noop \ + -statevariable [namespace current]::$name \ + -reportempty 0 \ + internaldtd {} \ + ] + + proc [namespace current]::$name {method args} \ + "eval ParseCommand $name \$method \$args" + + eval ParseCommand [list $name] configure $args + + return [namespace current]::$name +} + +# xml::ParseCommand -- +# +# Handles parse object command invocations +# +# Valid Methods: +# cget +# configure +# parse +# reset +# +# Arguments: +# parser parser object +# method minor command +# args other arguments +# +# Results: +# Depends on method + +proc xml::ParseCommand {parser method args} { + upvar \#0 [namespace current]::$parser state + + switch -- $method { + cget { + return $state([lindex $args 0]) + } + configure { + foreach {opt value} $args { + set state($opt) $value + } + } + parse { + ParseCommand_parse $parser [lindex $args 0] + } + reset { + if {[llength $args]} { + return -code error "too many arguments" + } + ParseCommand_reset $parser + } + default { + return -code error "unknown method \"$method\"" + } + } + + return {} +} + +# xml::ParseCommand_parse -- +# +# Parses document instance data +# +# Arguments: +# object parser object +# xml data +# +# Results: +# Callbacks are invoked, if any are defined + +proc xml::ParseCommand_parse {object xml} { + upvar \#0 [namespace current]::$object parser + variable Wsp + variable tokExpr + variable substExpr + + set parent [namespace parent] + if {![string compare :: $parent]} { + set parent {} + } + + set tokenised [lrange \ + [${parent}::sgml::tokenise $xml \ + $tokExpr \ + $substExpr \ + -internaldtdvariable [namespace current]::${object}(internaldtd)] \ + 4 end] + + eval ${parent}::sgml::parseEvent \ + [list $tokenised \ + -emptyelement [namespace code ParseEmpty] \ + -parseattributelistcommand [namespace code ParseAttrs]] \ + [array get parser -*command] \ + [array get parser -entityvariable] \ + [array get parser -reportempty] \ + [array get parser -final] \ + -normalize 0 \ + -internaldtd [list $parser(internaldtd)] + + return {} +} + +# xml::ParseEmpty -- Tcl 8.0 version +# +# Used by parser to determine whether an element is empty. +# This should be dead easy in XML. The only complication is +# that the RE above can't catch the trailing slash, so we have +# to dig it out of the tag name or attribute list. +# +# Tcl 8.1 REs should fix this. +# +# Arguments: +# tag element name +# attr attribute list (raw) +# e End tag delimiter. +# +# Results: +# "/" if the trailing slash is found. Optionally, return a list +# containing new values for the tag name and/or attribute list. + +proc xml::ParseEmpty {tag attr e} { + + if {[string match */ [string trimright $tag]] && \ + ![string length $attr]} { + regsub {/$} $tag {} tag + return [list / $tag $attr] + } elseif {[string match */ [string trimright $attr]]} { + regsub {/$} [string trimright $attr] {} attr + return [list / $tag $attr] + } else { + return {} + } + +} + +# xml::ParseAttrs -- +# +# Parse element attributes. +# +# There are two forms for name-value pairs: +# +# name="value" +# name='value' +# +# Watch out for the trailing slash on empty elements. +# +# Arguments: +# attrs attribute string given in a tag +# +# Results: +# Returns a Tcl list representing the name-value pairs in the +# attribute string + +proc xml::ParseAttrs attrs { + variable Wsp + variable Name + + # First check whether there's any work to do + if {![string compare {} [string trim $attrs]]} { + return {} + } + + # Strip the trailing slash on empty elements + regsub [format {/[%s]*$} " \t\n\r"] $attrs {} atList + + set mode name + set result {} + foreach component [split $atList =] { + switch $mode { + name { + set component [string trim $component] + if {[regexp $Name $component]} { + lappend result $component + } else { + return -code error "invalid attribute name \"$component\"" + } + set mode value:start + } + value:start { + set component [string trimleft $component] + set delimiter [string index $component 0] + set value {} + switch -- $delimiter { + \" - + ' { + if {[regexp [format {%s([^%s]*)%s(.*)} $delimiter $delimiter $delimiter] $component discard value remainder]} { + lappend result $value + set remainder [string trim $remainder] + if {[string length $remainder]} { + if {[regexp $Name $remainder]} { + lappend result $remainder + set mode value:start + } else { + return -code error "invalid attribute name \"$remainder\"" + } + } else { + set mode end + } + } else { + set value [string range $component 1 end] + set mode value:continue + } + } + default { + return -code error "invalid value for attribute \"[lindex $result end]\"" + } + } + } + value:continue { + if {[regexp [format {([^%s]*)%s(.*)} $delimiter $delimiter] $component discard valuepart remainder]} { + append value = $valuepart + lappend result $value + set remainder [string trim $remainder] + if {[string length $remainder]} { + if {[regexp $Name $remainder]} { + lappend result $remainder + set mode value:start + } else { + return -code error "invalid attribute name \"$remainder\"" + } + } else { + set mode end + } + } else { + append value = $component + } + } + end { + return -code error "unexpected data found after end of attribute list" + } + } + } + + switch $mode { + name - + end { + # This is normal + } + default { + return -code error "unexpected end of attribute list" + } + } + + return $result +} + +# xml::ParseCommand_reset -- +# +# Initialize parser data +# +# Arguments: +# object parser object +# +# Results: +# Parser data structure initialised + +proc xml::ParseCommand_reset object { + upvar \#0 [namespace current]::$object parser + + array set parser [list \ + -final 1 \ + internaldtd {} \ + ] +} + |