summaryrefslogtreecommitdiffstats
path: root/tcldom-tcl/xmlswitch.tcl
diff options
context:
space:
mode:
Diffstat (limited to 'tcldom-tcl/xmlswitch.tcl')
-rw-r--r--tcldom-tcl/xmlswitch.tcl520
1 files changed, 520 insertions, 0 deletions
diff --git a/tcldom-tcl/xmlswitch.tcl b/tcldom-tcl/xmlswitch.tcl
new file mode 100644
index 0000000..4e2a2a1
--- /dev/null
+++ b/tcldom-tcl/xmlswitch.tcl
@@ -0,0 +1,520 @@
+# xmlswitch.tcl --
+#
+# This file implements a control structure for Tcl.
+# 'xmlswitch' iterates over an XML document. Features in
+# the document may be specified using XPath location paths,
+# and these will trigger Tcl scripts when matched.
+#
+# Copyright (c) 2008 Explain
+# http://www.explain.com.au/
+# Copyright (c) 2000-2003 Zveno Pty Ltd
+# http://www.zveno.com/
+#
+# See the file "LICENSE" in this distribution for information on usage and
+# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
+#
+# $Id: xmlswitch.tcl,v 1.1.1.1 2009/01/16 22:11:49 joye Exp $
+
+package provide xmlswitch 3.2
+
+# We need the xml, dom and xpath packages
+
+package require xml 3.2
+package require dom 3.2
+package require xpath 1.0
+
+namespace eval xmlswitch {
+ namespace export xmlswitch xmlswitchcont xmlswitchend
+ namespace export domswitch
+ namespace export free rootnode
+
+ variable counter 0
+
+ variable typemap
+ array set typemap {
+ text textNode
+ comment comment
+ processing-instruction processingInstruction
+ }
+}
+
+# xmlswitch::xmlswitch --
+#
+# Parse XML data, matching for XPath locations along the way
+# and (possibly) triggering callbacks.
+#
+# A DOM tree is built as a side-effect (necessary for resolving
+# XPath location paths).
+#
+# Arguments:
+# xml XML document
+# args configuration options,
+# plus a single path/script expression, or multiple expressions
+#
+# Results:
+# Tcl callbacks may be invoked.
+# If -async option is true returns a token for this "process".
+
+proc xmlswitch::xmlswitch {xml args} {
+ variable counter
+
+ set stateVarName [namespace current]::State[incr counter]
+ upvar #0 $stateVarName state
+ set state(stateVarName) $stateVarName
+ set state(-async) 0
+
+ set state(pathArray) ${stateVarName}Paths
+ upvar #0 $state(pathArray) paths
+ array set paths {}
+
+ set cleanup {
+ unset state
+ unset paths
+ }
+
+ # Find configuration options and remove
+ set numOpts 0
+ foreach {opt value} $args {
+ switch -glob -- $opt {
+ -* {
+ set state($opt) $value
+ incr numOpts 2
+ }
+ default {
+ set args [lrange $args $numOpts end]
+ break
+ }
+ }
+ }
+
+ switch -- [llength $args] {
+ 0 {
+ # Nothing to do
+ eval $cleanup
+ return $stateVarName
+ }
+ 1 {
+ foreach {path script} [lindex $args 0] {
+ set paths([xpath::split $path]) $script
+ }
+ }
+ default {
+ if {[llength $args] % 2} {
+ eval $cleanup
+ return -code error "no script matching location path \"[lindex $args end]\""
+ }
+ foreach {path script} $args {
+ set paths([xpath::split $path]) $script
+ }
+ }
+ }
+
+ set root [set state(root) [dom::DOMImplementation create]]
+ set state(current) $root
+
+ # Parse the document
+ # We're going to do this incrementally, so the caller can
+ # break at any time
+ set state(parser) [eval xml::parser [array get state -parser]]
+ #append cleanup "\n $parser destroy\n"
+ $state(parser) configure \
+ -elementstartcommand [namespace code [list ParseElementStart $stateVarName]] \
+ -elementendcommand [namespace code [list ParseElementEnd $stateVarName]] \
+ -characterdatacommand [namespace code [list ParseCharacterData $stateVarName]] \
+ -final 0
+
+# -processinginstructioncommand [namespace code [list ParsePI $stateVarName]] \
+# -commentcommand [namespace code [list ParseComment]]
+
+ if {[catch {$state(parser) parse $xml} err]} {
+ eval $cleanup
+ return -code error $err
+ }
+
+ if {$state(-async)} {
+ return $stateVarName
+ } else {
+ eval $cleanup
+ return {}
+ }
+}
+
+# xmlswitch::xmlswitchcont --
+#
+# Provide more XML data to parse
+#
+# Arguments:
+# token state variable name
+# xml XML data
+#
+# Results:
+# More parsing
+
+proc xmlswitch::xmlswitchcont {token xml} {
+ upvar #0 $token state
+
+ $state(parser) parse $xml
+
+ return {}
+}
+
+# xmlswitch::xmlswitchend --
+#
+# Signal that no further data is available
+#
+# Arguments:
+# token state array
+#
+# Results:
+# Parser configuration changed
+
+proc xmlswitch::xmlswitchend token {
+ upvar #0 $token state
+
+ $state(parser) configure -final true
+
+ return {}
+}
+
+# xmlswitch::rootnode --
+#
+# Get the root node
+#
+# Arguments:
+# token state array
+#
+# Results:
+# Returns root node token
+
+proc xmlswitch::rootnode token {
+ upvar #0 $token state
+
+ return $state(root)
+}
+
+# xmlswitch::free --
+#
+# Free resources EXCEPT the DOM tree.
+# "-all" causes DOM tree to be destroyed too.
+#
+# Arguments:
+# token state array
+# args options
+#
+# Results:
+# Resources freed.
+
+proc xmlswitch::free {token args} {
+ upvar #0 $token state
+
+ if {[lsearch $args "-all"] >= 0} {
+ dom::DOMImplementation destroy $state(root)
+ }
+
+ catch {unset $state(pathArray)}
+ catch {unset state}
+
+ catch {$state(parser) free}
+
+ return {}
+}
+
+# xmlswitch::ParseElementStart --
+#
+# Handle element start tag
+#
+# Arguments:
+# token state array
+# name element type
+# attrList attribute list
+# args options
+# Results:
+# All XPath location paths are checked for a match,
+# and script evaluated for matching XPath.
+# DOM tree node added.
+
+proc xmlswitch::ParseElementStart:dbgdisabled {token name attrList args} {
+ if {[catch {eval ParseElementStart:dbg [list $token $name $attrList] $args} msg]} {
+ puts stderr [list ParseElementStart failed with msg $msg]
+ puts stderr $::errorInfo
+ return -code error $msg
+ } else {
+ puts stderr [list ParseElementStart returned OK]
+ }
+ return $msg
+}
+proc xmlswitch::ParseElementStart {token name attrList args} {
+
+ upvar #0 $token state
+ array set opts $args
+
+ #puts stderr [list xmlswitch::ParseElementStart $token $name $attrList $args]
+
+ lappend state(current) \
+ [dom::document createElement [lindex $state(current) end] $name]
+ foreach {name value} $attrList {
+ dom::element setAttribute [lindex $state(current) end] $name $value
+ }
+
+ MatchTemplates $token [lindex $state(current) end]
+
+ return {}
+}
+
+# xmlswitch::ParseElementEnd --
+#
+# Handle element end tag
+#
+# Arguments:
+# token state array
+# name element type
+# args options
+# Results:
+# State changed
+
+proc xmlswitch::ParseElementEnd {token name args} {
+ upvar #0 $token state
+
+ set state(current) [lreplace $state(current) end end]
+
+ return {}
+}
+
+# xmlswitch::ParseCharacterData --
+#
+# Handle character data
+#
+# Arguments:
+# token state array
+# data pcdata
+#
+# Results:
+# All XPath location paths are checked for a match,
+# and script evaluated for matching XPath.
+# DOM tree node added.
+
+proc xmlswitch::ParseCharacterData {token data} {
+ upvar #0 $token state
+
+ lappend state(current) \
+ [dom::document createTextNode [lindex $state(current) end] $data]
+
+ MatchTemplates $token [lindex $state(current) end]
+
+ set state(current) [lreplace $state(current) end end]
+
+ return {}
+}
+
+# xmlswitch::domswitch --
+#
+# Similar to xmlswitch above, but iterates over a pre-built
+# DOM tree.
+#
+# Arguments:
+# xml XML document
+# args a single path/script expression, or multiple expressions
+#
+# Results:
+# Tcl callbacks may be invoked.
+
+proc xmlswitch::domswitch {xml args} {
+}
+
+# xmlswitch::MatchTemplates --
+#
+# Check all templates for one which matches
+# the current node.
+#
+# Arguments:
+# token state array
+# node Current DOM node
+#
+# Results:
+# If a template matches, its script is evaluated
+
+proc xmlswitch::MatchTemplates {token node} {
+ upvar #0 $token state
+ upvar #0 $state(pathArray) paths
+
+ #puts stderr [list xmlswitch::MatchTemplates $token $node (type: [dom::node cget $node -nodeType]) (name: [dom::node cget $node -nodeName])]
+
+ set matches {}
+
+ foreach {path script} [array get paths] {
+
+ #puts stderr [list checking path $path for a match]
+
+ set context $node
+
+ # Work backwards along the path, reversing each axis
+ set match 0
+ set i [llength $path]
+ #puts stderr [list $i steps to be tested]
+ while {[incr i -1] >= 0} {
+ #puts stderr [list step $i [lindex $path $i]]
+ switch -glob [llength [lindex $path $i]],$i {
+ 0,0 {
+ #puts stderr [list absolute path, end of steps - am I at the root?]
+ if {![string length [dom::node parent $context]]} {
+ #puts stderr [list absolute path matched]
+ lappend matches [list $path $script]
+ } else {
+ #puts stderr [list absolute path did not match]
+ }
+ }
+ *,0 {
+ #puts stderr [list last step, relative path]
+ switch [lindex [lindex $path $i] 0] {
+ child {
+ if {[NodeTest [lindex $path $i] $context] && \
+ [CheckPredicates [lindex $path $i] $context]} {
+ #puts stderr [list relative path matched]
+ lappend matches [list $path $script]
+ } else {
+ #puts stderr [list relative path did not match]
+ }
+ }
+ default {
+ return -code error "axis \"[lindex [lindex $path $i] 0]\" not supported"
+ }
+ }
+ }
+ default {
+ #puts stderr [list continuing checking steps]
+ switch [lindex [lindex $path $i] 0] {
+ child {
+ if {[NodeTest [lindex $path $i] $context] && \
+ [CheckPredicates [lindex $path $i] $context]} {
+ set context [dom::node parent $context]
+ } else {
+ #puts stderr [list no match]
+ }
+ }
+ default {
+ return -code error "axis \"[lindex [lindex $path $i] 0]\" not supported"
+ }
+ }
+ }
+ }
+ }
+ }
+
+ # TODO: If there are multiple matches then we must pick the
+ # most specific match
+
+ if {[llength $matches] > 1} {
+ # For the moment we'll just take the first match
+ set matches [list [lindex $matches 0]]
+ }
+
+ if {[llength $matches]} {
+ #puts stderr [list evaluating callback at level [info level]]
+ uplevel 3 [lindex [lindex $matches 0] 1]
+ }
+
+ return {}
+}
+
+# xmlswitch::NodeTest --
+#
+# Check that the node passes the node (type) test
+#
+# Arguments:
+# step Location step
+# node DOM node
+#
+# Results:
+# Boolean
+
+proc xmlswitch::NodeTest {step node} {
+
+ if {[llength [lindex $step 1]] > 1} {
+ switch -glob -- [lindex [lindex $step 1] 0],[dom::node cget $node -nodeType] {
+ node,* -
+ text,textNode -
+ comment,comment -
+ processing-instruction,processingInstruction {
+ return 1
+ }
+ default {
+ return 0
+ }
+ }
+ } elseif {![string compare [lindex $step 1] "*"]} {
+ return 1
+ } elseif {![string compare [lindex $step 1] [dom::node cget $node -nodeName]]} {
+ return 1
+ } else {
+ return 0
+ }
+}
+
+# xmlswitch::CheckPredicates --
+#
+# Check that the node passes the predicates
+#
+# Arguments:
+# step Location step
+# node DOM node
+#
+# Results:
+# Boolean
+
+proc xmlswitch::CheckPredicates {step node} {
+ variable typemap
+
+ set predicates [lindex $step 2]
+ # Shortcut: no predicates means everything passes
+ if {![llength $predicates]} {
+ return 1
+ }
+
+ # Get the context node set
+ switch [lindex $step 0] {
+ child {
+ set nodeset {}
+ if {[llength [lindex $step 1]]} {
+ foreach {name typetest} [lindex $step 1] break
+ switch -- $name {
+ node {
+ set nodeset [dom::node children [dom::node parent $node]]
+ }
+ text -
+ comment -
+ processing-instruction {
+ foreach child [dom::node children [dom::node parent $node]] {
+ if {![string compare [dom::node cget $child -nodeType] $typemap($name)]} {
+ lappend nodeset $child
+ }
+ }
+ }
+ default {
+ # Error
+ }
+ }
+ } else {
+ foreach child [dom::node children [dom::node parent $node]] {
+ if {![string compare [lindex $step 1] [dom::node cget $child -nodeName]]} {
+ lappend nodeset $child
+ }
+ }
+ }
+ }
+ default {
+ return -code error "axis \"[lindex $step 0]\" not supported"
+ }
+ }
+
+ foreach predicate $predicates {
+ # position() is the only supported predicate
+ if {[lsearch $nodeset $node] + 1 == $predicate} {
+ # continue
+ } else {
+ return 0
+ }
+ }
+
+ return 1
+}
+