summaryrefslogtreecommitdiffstats
path: root/tclxml-tcl/tclparser-8.1.tcl
diff options
context:
space:
mode:
Diffstat (limited to 'tclxml-tcl/tclparser-8.1.tcl')
-rwxr-xr-xtclxml-tcl/tclparser-8.1.tcl614
1 files changed, 614 insertions, 0 deletions
diff --git a/tclxml-tcl/tclparser-8.1.tcl b/tclxml-tcl/tclparser-8.1.tcl
new file mode 100755
index 0000000..40a0af9
--- /dev/null
+++ b/tclxml-tcl/tclparser-8.1.tcl
@@ -0,0 +1,614 @@
+# tclparser-8.1.tcl --
+#
+# This file provides a Tcl implementation of a XML parser.
+# This file supports Tcl 8.1.
+#
+# See xml-8.[01].tcl for definitions of character sets and
+# regular expressions.
+#
+# Copyright (c) 2005-2008 by Explain.
+# http://www.explain.com.au/
+# Copyright (c) 1998-2003 Zveno Pty Ltd
+# http://www.zveno.com/
+#
+# See the file "LICENSE" in this distribution for information on usage and
+# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
+#
+# $Id: tclparser-8.1.tcl,v 1.1.1.1 2009/01/16 22:11:49 joye Exp $
+
+package require Tcl 8.1
+
+package provide xml::tclparser 3.2
+
+package require xmldefs 3.2
+
+package require sgmlparser 1.0
+
+namespace eval xml::tclparser {
+
+ namespace export create createexternal externalentity parse configure get delete
+
+ # Tokenising expressions
+
+ variable tokExpr $::xml::tokExpr
+ variable substExpr $::xml::substExpr
+
+ # Register this parser class
+
+ ::xml::parserclass create tcl \
+ -createcommand [namespace code create] \
+ -createentityparsercommand [namespace code createentityparser] \
+ -parsecommand [namespace code parse] \
+ -configurecommand [namespace code configure] \
+ -deletecommand [namespace code delete] \
+ -resetcommand [namespace code reset]
+}
+
+# xml::tclparser::create --
+#
+# Creates XML parser object.
+#
+# Arguments:
+# name unique identifier for this instance
+#
+# Results:
+# The state variable is initialised.
+
+proc xml::tclparser::create name {
+
+ # Initialise state variable
+ upvar \#0 [namespace current]::$name parser
+ array set parser [list -name $name \
+ -cmd [uplevel 3 namespace current]::$name \
+ -final 1 \
+ -validate 0 \
+ -statevariable [namespace current]::$name \
+ -baseuri {} \
+ internaldtd {} \
+ entities [namespace current]::Entities$name \
+ extentities [namespace current]::ExtEntities$name \
+ parameterentities [namespace current]::PEntities$name \
+ externalparameterentities [namespace current]::ExtPEntities$name \
+ elementdecls [namespace current]::ElDecls$name \
+ attlistdecls [namespace current]::AttlistDecls$name \
+ notationdecls [namespace current]::NotDecls$name \
+ depth 0 \
+ leftover {} \
+ ]
+
+ # Initialise entities with predefined set
+ array set [namespace current]::Entities$name [array get ::sgml::EntityPredef]
+
+ return $parser(-cmd)
+}
+
+# xml::tclparser::createentityparser --
+#
+# Creates XML parser object for an entity.
+#
+# Arguments:
+# name name for the new parser
+# parent name of parent parser
+#
+# Results:
+# The state variable is initialised.
+
+proc xml::tclparser::createentityparser {parent name} {
+ upvar #0 [namespace current]::$parent p
+
+ # Initialise state variable
+ upvar \#0 [namespace current]::$name external
+ array set external [array get p]
+
+ regsub $parent $p(-cmd) {} parentns
+
+ array set external [list -name $name \
+ -cmd $parentns$name \
+ -statevariable [namespace current]::$name \
+ internaldtd {} \
+ line 0 \
+ ]
+ incr external(depth)
+
+ return $external(-cmd)
+}
+
+# xml::tclparser::configure --
+#
+# Configures a XML parser object.
+#
+# Arguments:
+# name unique identifier for this instance
+# args option name/value pairs
+#
+# Results:
+# May change values of config options
+
+proc xml::tclparser::configure {name args} {
+ upvar \#0 [namespace current]::$name parser
+
+ # BUG: very crude, no checks for illegal args
+ # Mats: Should be synced with sgmlparser.tcl
+ set options {-elementstartcommand -elementendcommand \
+ -characterdatacommand -processinginstructioncommand \
+ -externalentitycommand -xmldeclcommand \
+ -doctypecommand -commentcommand \
+ -entitydeclcommand -unparsedentitydeclcommand \
+ -parameterentitydeclcommand -notationdeclcommand \
+ -elementdeclcommand -attlistdeclcommand \
+ -paramentityparsing -defaultexpandinternalentities \
+ -startdoctypedeclcommand -enddoctypedeclcommand \
+ -entityreferencecommand -warningcommand \
+ -defaultcommand -unknownencodingcommand -notstandalonecommand \
+ -startcdatasectioncommand -endcdatasectioncommand \
+ -errorcommand -final \
+ -validate -baseuri -baseurl \
+ -name -cmd -emptyelement \
+ -parseattributelistcommand -parseentitydeclcommand \
+ -normalize -internaldtd -dtdsubset \
+ -reportempty -ignorewhitespace \
+ -reportempty \
+ }
+ set usage [join $options ", "]
+ regsub -all -- - $options {} options
+ set pat ^-([join $options |])$
+ foreach {flag value} $args {
+ if {[regexp $pat $flag]} {
+ # Validate numbers
+ if {[info exists parser($flag)] && \
+ [string is integer -strict $parser($flag)] && \
+ ![string is integer -strict $value]} {
+ return -code error "Bad value for $flag ($value), must be integer"
+ }
+ set parser($flag) $value
+ } else {
+ return -code error "Unknown option $flag, can be: $usage"
+ }
+ }
+
+ # Backward-compatibility: -baseuri is a synonym for -baseurl
+ catch {set parser(-baseuri) $parser(-baseurl)}
+
+ return {}
+}
+
+# xml::tclparser::parse --
+#
+# Parses document instance data
+#
+# Arguments:
+# name parser object
+# xml data
+# args configuration options
+#
+# Results:
+# Callbacks are invoked
+
+proc xml::tclparser::parse {name xml args} {
+
+ array set options $args
+ upvar \#0 [namespace current]::$name parser
+ variable tokExpr
+ variable substExpr
+
+ # Mats:
+ if {[llength $args]} {
+ eval {configure $name} $args
+ }
+
+ set parseOptions [list \
+ -emptyelement [namespace code ParseEmpty] \
+ -parseattributelistcommand [namespace code ParseAttrs] \
+ -parseentitydeclcommand [namespace code ParseEntity] \
+ -normalize 0]
+ eval lappend parseOptions \
+ [array get parser -*command] \
+ [array get parser -reportempty] \
+ [array get parser -ignorewhitespace] \
+ [array get parser -name] \
+ [array get parser -cmd] \
+ [array get parser -baseuri] \
+ [array get parser -validate] \
+ [array get parser -final] \
+ [array get parser -defaultexpandinternalentities] \
+ [array get parser entities] \
+ [array get parser extentities] \
+ [array get parser parameterentities] \
+ [array get parser externalparameterentities] \
+ [array get parser elementdecls] \
+ [array get parser attlistdecls] \
+ [array get parser notationdecls]
+
+ # Mats:
+ # If -final 0 we also need to maintain the state with a -statevariable !
+ if {!$parser(-final)} {
+ eval lappend parseOptions [array get parser -statevariable]
+ }
+
+ set dtdsubset no
+ catch {set dtdsubset $options(-dtdsubset)}
+ switch -- $dtdsubset {
+ internal {
+ # Bypass normal parsing
+ lappend parseOptions -statevariable $parser(-statevariable)
+ array set intOptions [array get ::sgml::StdOptions]
+ array set intOptions $parseOptions
+ ::sgml::ParseDTD:Internal [array get intOptions] $xml
+ return {}
+ }
+ external {
+ # Bypass normal parsing
+ lappend parseOptions -statevariable $parser(-statevariable)
+ array set intOptions [array get ::sgml::StdOptions]
+ array set intOptions $parseOptions
+ ::sgml::ParseDTD:External [array get intOptions] $xml
+ return {}
+ }
+ default {
+ # Pass through to normal processing
+ }
+ }
+
+ lappend tokenOptions \
+ -internaldtdvariable [namespace current]::${name}(internaldtd)
+
+ # Mats: If -final 0 we also need to maintain the state with a -statevariable !
+ if {!$parser(-final)} {
+ eval lappend tokenOptions [array get parser -statevariable] \
+ [array get parser -final]
+ }
+
+ # Mats:
+ # Why not the first four? Just padding? Lrange undos \n interp.
+ # It is necessary to have the first four as well if chopped off in
+ # middle of pcdata.
+ set tokenised [lrange \
+ [eval {::sgml::tokenise $xml $tokExpr $substExpr} $tokenOptions] \
+ 0 end]
+
+ lappend parseOptions -internaldtd [list $parser(internaldtd)]
+ eval ::sgml::parseEvent [list $tokenised] $parseOptions
+
+ return {}
+}
+
+# xml::tclparser::ParseEmpty -- Tcl 8.1+ version
+#
+# Used by parser to determine whether an element is empty.
+# This is usually dead easy in XML, but as always not quite.
+# Have to watch out for empty element syntax
+#
+# Arguments:
+# tag element name
+# attr attribute list (raw)
+# e End tag delimiter.
+#
+# Results:
+# Return value of e
+
+proc xml::tclparser::ParseEmpty {tag attr e} {
+ switch -glob [string length $e],[regexp "/[::xml::cl $::xml::Wsp]*$" $attr] {
+ 0,0 {
+ return {}
+ }
+ 0,* {
+ return /
+ }
+ default {
+ return $e
+ }
+ }
+}
+
+# xml::tclparser::ParseAttrs -- Tcl 8.1+ version
+#
+# Parse element attributes.
+#
+# There are two forms for name-value pairs:
+#
+# name="value"
+# name='value'
+#
+# Arguments:
+# opts parser options
+# attrs attribute string given in a tag
+#
+# Results:
+# Returns a Tcl list representing the name-value pairs in the
+# attribute string
+#
+# A ">" occurring in the attribute list causes problems when parsing
+# the XML. This manifests itself by an unterminated attribute value
+# and a ">" appearing the element text.
+# In this case return a three element list;
+# the message "unterminated attribute value", the attribute list it
+# did manage to parse and the remainder of the attribute list.
+
+proc xml::tclparser::ParseAttrs {opts attrs} {
+
+ set result {}
+
+ while {[string length [string trim $attrs]]} {
+ if {[regexp [::sgml::cl $::xml::Wsp]*($::xml::Name)[::sgml::cl $::xml::Wsp]*=[::sgml::cl $::xml::Wsp]*("|')([::sgml::cl ^<]*?)\\2(.*) $attrs discard attrName delimiter value attrs]} {
+ lappend result $attrName [NormalizeAttValue $opts $value]
+ } elseif {[regexp [::sgml::cl $::xml::Wsp]*$::xml::Name[::sgml::cl $::xml::Wsp]*=[::sgml::cl $::xml::Wsp]*("|')[::sgml::cl ^<]*\$ $attrs]} {
+ return -code error [list {unterminated attribute value} $result $attrs]
+ } else {
+ return -code error "invalid attribute list"
+ }
+ }
+
+ return $result
+}
+
+# xml::tclparser::NormalizeAttValue --
+#
+# Perform attribute value normalisation. This involves:
+# . character references are appended to the value
+# . entity references are recursively processed and replacement value appended
+# . whitespace characters cause a space to be appended
+# . other characters appended as-is
+#
+# Arguments:
+# opts parser options
+# value unparsed attribute value
+#
+# Results:
+# Normalised value returned.
+
+proc xml::tclparser::NormalizeAttValue {opts value} {
+
+ # sgmlparser already has backslashes protected
+ # Protect Tcl specials
+ regsub -all {([][$])} $value {\\\1} value
+
+ # Deal with white space
+ regsub -all "\[$::xml::Wsp\]" $value { } value
+
+ # Find entity refs
+ regsub -all {&([^;]+);} $value {[NormalizeAttValue:DeRef $opts {\1}]} value
+
+ return [subst $value]
+}
+
+# xml::tclparser::NormalizeAttValue:DeRef --
+#
+# Handler to normalize attribute values
+#
+# Arguments:
+# opts parser options
+# ref entity reference
+#
+# Results:
+# Returns character
+
+proc xml::tclparser::NormalizeAttValue:DeRef {opts ref} {
+ # SRB: Bug fix 2008-11-18 #812051: surround case labels in braces for compatibility with Freewrap
+ switch -glob -- $ref {
+ {#x*} {
+ scan [string range $ref 2 end] %x value
+ set char [format %c $value]
+ # Check that the char is legal for XML
+ if {[regexp [format {^[%s]$} $::xml::Char] $char]} {
+ return $char
+ } else {
+ return -code error "illegal character"
+ }
+ }
+ {#*} {
+ scan [string range $ref 1 end] %d value
+ set char [format %c $value]
+ # Check that the char is legal for XML
+ if {[regexp [format {^[%s]$} $::xml::Char] $char]} {
+ return $char
+ } else {
+ return -code error "illegal character"
+ }
+ }
+ lt -
+ gt -
+ amp -
+ quot -
+ apos {
+ array set map {lt < gt > amp & quot \" apos '}
+ return $map($ref)
+ }
+ default {
+ # A general entity. Must resolve to a text value - no element structure.
+
+ array set options $opts
+ upvar #0 $options(entities) map
+
+ if {[info exists map($ref)]} {
+
+ if {[regexp < $map($ref)]} {
+ return -code error "illegal character \"<\" in attribute value"
+ }
+
+ if {![regexp & $map($ref)]} {
+ # Simple text replacement
+ return $map($ref)
+ }
+
+ # There are entity references in the replacement text.
+ # Can't use child entity parser since must catch element structures
+
+ return [NormalizeAttValue $opts $map($ref)]
+
+ } elseif {[string compare $options(-entityreferencecommand) "::sgml::noop"]} {
+
+ set result [uplevel #0 $options(-entityreferencecommand) [list $ref]]
+
+ return $result
+
+ } else {
+ return -code error "unable to resolve entity reference \"$ref\""
+ }
+ }
+ }
+}
+
+# xml::tclparser::ParseEntity --
+#
+# Parse general entity declaration
+#
+# Arguments:
+# data text to parse
+#
+# Results:
+# Tcl list containing entity declaration
+
+proc xml::tclparser::ParseEntity data {
+ set data [string trim $data]
+ if {[regexp $::sgml::ExternalEntityExpr $data discard type delimiter1 id1 discard delimiter2 id2 optNDATA ndata]} {
+ switch $type {
+ PUBLIC {
+ return [list external $id2 $id1 $ndata]
+ }
+ SYSTEM {
+ return [list external $id1 {} $ndata]
+ }
+ }
+ } elseif {[regexp {^("|')(.*?)\1$} $data discard delimiter value]} {
+ return [list internal $value]
+ } else {
+ return -code error "badly formed entity declaration"
+ }
+}
+
+# xml::tclparser::delete --
+#
+# Destroy parser data
+#
+# Arguments:
+# name parser object
+#
+# Results:
+# Parser data structure destroyed
+
+proc xml::tclparser::delete name {
+ upvar \#0 [namespace current]::$name parser
+ catch {::sgml::ParserDelete $parser(-statevariable)}
+ catch {unset parser}
+ return {}
+}
+
+# xml::tclparser::get --
+#
+# Retrieve additional information from the parser
+#
+# Arguments:
+# name parser object
+# method info to retrieve
+# args additional arguments for method
+#
+# Results:
+# Depends on method
+
+proc xml::tclparser::get {name method args} {
+ upvar #0 [namespace current]::$name parser
+
+ switch -- $method {
+
+ elementdecl {
+ switch [llength $args] {
+
+ 0 {
+ # Return all element declarations
+ upvar #0 $parser(elementdecls) elements
+ return [array get elements]
+ }
+
+ 1 {
+ # Return specific element declaration
+ upvar #0 $parser(elementdecls) elements
+ if {[info exists elements([lindex $args 0])]} {
+ return [array get elements [lindex $args 0]]
+ } else {
+ return -code error "element \"[lindex $args 0]\" not declared"
+ }
+ }
+
+ default {
+ return -code error "wrong number of arguments: should be \"elementdecl ?element?\""
+ }
+ }
+ }
+
+ attlist {
+ if {[llength $args] != 1} {
+ return -code error "wrong number of arguments: should be \"get attlist element\""
+ }
+
+ upvar #0 $parser(attlistdecls)
+
+ return {}
+ }
+
+ entitydecl {
+ }
+
+ parameterentitydecl {
+ }
+
+ notationdecl {
+ }
+
+ default {
+ return -code error "unknown method \"$method\""
+ }
+ }
+
+ return {}
+}
+
+# xml::tclparser::ExternalEntity --
+#
+# Resolve and parse external entity
+#
+# Arguments:
+# name parser object
+# base base URL
+# sys system identifier
+# pub public identifier
+#
+# Results:
+# External entity is fetched and parsed
+
+proc xml::tclparser::ExternalEntity {name base sys pub} {
+}
+
+# xml::tclparser:: --
+#
+# Reset a parser instance, ready to parse another document
+#
+# Arguments:
+# name parser object
+#
+# Results:
+# Variables unset
+
+proc xml::tclparser::reset {name} {
+ upvar \#0 [namespace current]::$name parser
+
+ # Has this parser object been properly initialised?
+ if {![info exists parser] || \
+ ![info exists parser(-name)]} {
+ return [create $name]
+ }
+
+ array set parser {
+ -final 1
+ depth 0
+ leftover {}
+ }
+
+ foreach var {Entities ExtEntities PEntities ExtPEntities ElDecls AttlistDecls NotDecls} {
+ catch {unset [namespace current]::${var}$name}
+ }
+
+ # Initialise entities with predefined set
+ array set [namespace current]::Entities$name [array get ::sgml::EntityPredef]
+
+ return {}
+}