diff options
Diffstat (limited to 'library/http')
| -rw-r--r-- | library/http/http.tcl | 1466 | ||||
| -rw-r--r-- | library/http/pkgIndex.tcl | 13 |
2 files changed, 1264 insertions, 215 deletions
diff --git a/library/http/http.tcl b/library/http/http.tcl index d0a24f1..a6b2bfd 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -1,50 +1,144 @@ # http.tcl -- # -# Client-side HTTP for GET, POST, and HEAD commands. -# These routines can be used in untrusted code that uses -# the Safesock security policy. These procedures use a -# callback interface to avoid using vwait, which is not -# defined in the safe base. +# Client-side HTTP for GET, POST, and HEAD commands. These routines can +# be used in untrusted code that uses the Safesock security policy. +# These procedures use a callback interface to avoid using vwait, which +# is not defined in the safe base. # -# See the file "license.terms" for information on usage and -# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES. -# -# RCS: @(#) $Id: http.tcl,v 1.3 1998/09/14 18:40:03 stanton Exp $ +# See the file "license.terms" for information on usage and redistribution of +# this file, and for a DISCLAIMER OF ALL WARRANTIES. -package provide http 2.0 ;# This uses Tcl namespaces +package require Tcl 8.6 +# Keep this in sync with pkgIndex.tcl and with the install directories in +# Makefiles +package provide http 2.8.8 namespace eval http { - variable http + # Allow resourcing to not clobber existing data - array set http { - -accept */* - -proxyhost {} - -proxyport {} - -useragent {Tcl http client package 2.0} - -proxyfilter http::ProxyRequired + variable http + if {![info exists http]} { + array set http { + -accept */* + -proxyhost {} + -proxyport {} + -proxyfilter http::ProxyRequired + -urlencoding utf-8 + } + # We need a useragent string of this style or various servers will refuse to + # send us compressed content even when we ask for it. This follows the + # de-facto layout of user-agent strings in current browsers. + set http(-useragent) "Mozilla/5.0\ + ([string totitle $::tcl_platform(platform)]; U;\ + $::tcl_platform(os) $::tcl_platform(osVersion))\ + http/[package provide http] Tcl/[package provide Tcl]" } - variable formMap - set alphanumeric a-zA-Z0-9 + proc init {} { + # Set up the map for quoting chars. RFC3986 Section 2.3 say percent + # encode all except: "... percent-encoded octets in the ranges of + # ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period + # (%2E), underscore (%5F), or tilde (%7E) should not be created by URI + # producers ..." + for {set i 0} {$i <= 256} {incr i} { + set c [format %c $i] + if {![string match {[-._~a-zA-Z0-9]} $c]} { + set map($c) %[format %.2X $i] + } + } + # These are handled specially + set map(\n) %0D%0A + variable formMap [array get map] - for {set i 1} {$i <= 256} {incr i} { - set c [format %c $i] - if {![string match \[$alphanumeric\] $c]} { - set formMap($c) %[format %.2x $i] + # Create a map for HTTP/1.1 open sockets + variable socketmap + if {[info exists socketmap]} { + # Close but don't remove open sockets on re-init + foreach {url sock} [array get socketmap] { + catch {close $sock} + } } + array set socketmap {} + } + init + + variable urlTypes + if {![info exists urlTypes]} { + set urlTypes(http) [list 80 ::socket] + } + + variable encodings [string tolower [encoding names]] + # This can be changed, but iso8859-1 is the RFC standard. + variable defaultCharset + if {![info exists defaultCharset]} { + set defaultCharset "iso8859-1" + } + + # Force RFC 3986 strictness in geturl url verification? + variable strict + if {![info exists strict]} { + set strict 1 } - # These are handled specially - array set formMap { - " " + \n %0d%0a + + # Let user control default keepalive for compatibility + variable defaultKeepalive + if {![info exists defaultKeepalive]} { + set defaultKeepalive 0 } - namespace export geturl config reset wait formatQuery + namespace export geturl config reset wait formatQuery register unregister # Useful, but not exported: data size status code } +# http::Log -- +# +# Debugging output -- define this to observe HTTP/1.1 socket usage. +# Should echo any args received. +# +# Arguments: +# msg Message to output +# +if {[info command http::Log] eq {}} {proc http::Log {args} {}} + +# http::register -- +# +# See documentation for details. +# +# Arguments: +# proto URL protocol prefix, e.g. https +# port Default port for protocol +# command Command to use to create socket +# Results: +# list of port and command that was registered. + +proc http::register {proto port command} { + variable urlTypes + set urlTypes([string tolower $proto]) [list $port $command] +} + +# http::unregister -- +# +# Unregisters URL protocol handler +# +# Arguments: +# proto URL protocol prefix, e.g. https +# Results: +# list of port and command that was unregistered. + +proc http::unregister {proto} { + variable urlTypes + set lower [string tolower $proto] + if {![info exists urlTypes($lower)]} { + return -code error "unsupported url type \"$proto\"" + } + set old $urlTypes($lower) + unset urlTypes($lower) + return $old +} + # http::config -- # -# See documentaion for details. +# See documentation for details. # # Arguments: # args Options parsed by the procedure. @@ -62,67 +156,131 @@ proc http::config {args} { } return $result } - regsub -all -- - $options {} options - set pat ^-([join $options |])$ + set options [string map {- ""} $options] + set pat ^-(?:[join $options |])$ if {[llength $args] == 1} { set flag [lindex $args 0] - if {[regexp -- $pat $flag]} { - return $http($flag) - } else { + if {![regexp -- $pat $flag]} { return -code error "Unknown option $flag, must be: $usage" } + return $http($flag) } else { foreach {flag value} $args { - if {[regexp -- $pat $flag]} { - set http($flag) $value - } else { + if {![regexp -- $pat $flag]} { return -code error "Unknown option $flag, must be: $usage" } + set http($flag) $value } } } - proc http::Finish { token {errormsg ""} } { +# http::Finish -- +# +# Clean up the socket and eval close time callbacks +# +# Arguments: +# token Connection token. +# errormsg (optional) If set, forces status to error. +# skipCB (optional) If set, don't call the -command callback. This +# is useful when geturl wants to throw an exception instead +# of calling the callback. That way, the same error isn't +# reported to two places. +# +# Side Effects: +# Closes the socket + +proc http::Finish {token {errormsg ""} {skipCB 0}} { variable $token upvar 0 $token state global errorInfo errorCode - if {[string length $errormsg] != 0} { + if {$errormsg ne ""} { set state(error) [list $errormsg $errorInfo $errorCode] - set state(status) error + set state(status) "error" } - catch {close $state(sock)} - catch {after cancel $state(after)} - if {[info exists state(-command)]} { - if {[catch {eval $state(-command) {$token}} err]} { - if {[string length $errormsg] == 0} { - set state(error) [list $err $errorInfo $errorCode] - set state(status) error + if { + ($state(status) eq "timeout") || ($state(status) eq "error") || + ([info exists state(connection)] && ($state(connection) eq "close")) + } { + CloseSocket $state(sock) $token + } + if {[info exists state(after)]} { + after cancel $state(after) + } + if {[info exists state(-command)] && !$skipCB + && ![info exists state(done-command-cb)]} { + set state(done-command-cb) yes + if {[catch {eval $state(-command) {$token}} err] && $errormsg eq ""} { + set state(error) [list $err $errorInfo $errorCode] + set state(status) error + } + } +} + +# http::CloseSocket - +# +# Close a socket and remove it from the persistent sockets table. If +# possible an http token is included here but when we are called from a +# fileevent on remote closure we need to find the correct entry - hence +# the second section. + +proc ::http::CloseSocket {s {token {}}} { + variable socketmap + catch {fileevent $s readable {}} + set conn_id {} + if {$token ne ""} { + variable $token + upvar 0 $token state + if {[info exists state(socketinfo)]} { + set conn_id $state(socketinfo) + } + } else { + set map [array get socketmap] + set ndx [lsearch -exact $map $s] + if {$ndx != -1} { + incr ndx -1 + set conn_id [lindex $map $ndx] + } + } + if {$conn_id eq {} || ![info exists socketmap($conn_id)]} { + Log "Closing socket $s (no connection info)" + if {[catch {close $s} err]} { + Log "Error: $err" + } + } else { + if {[info exists socketmap($conn_id)]} { + Log "Closing connection $conn_id (sock $socketmap($conn_id))" + if {[catch {close $socketmap($conn_id)} err]} { + Log "Error: $err" } + unset socketmap($conn_id) + } else { + Log "Cannot close connection $conn_id - no socket in socket map" } - unset state(-command) } } # http::reset -- # -# See documentaion for details. +# See documentation for details. # # Arguments: # token Connection token. # why Status info. -# Results: -# TODO +# +# Side Effects: +# See Finish -proc http::reset { token {why reset} } { +proc http::reset {token {why reset}} { variable $token upvar 0 $token state set state(status) $why catch {fileevent $state(sock) readable {}} + catch {fileevent $state(sock) writable {}} Finish $token if {[info exists state(error)]} { set errorlist $state(error) - unset state(error) - eval error $errorlist + unset state + eval ::error $errorlist } } @@ -131,15 +289,23 @@ proc http::reset { token {why reset} } { # Establishes a connection to a remote url via http. # # Arguments: -# url The http URL to goget. -# args Option value pairs. Valid options include: +# url The http URL to goget. +# args Option value pairs. Valid options include: # -blocksize, -validate, -headers, -timeout # Results: -# Returns a token for this connection. - +# Returns a token for this connection. This token is the name of an +# array that the caller should unset to garbage collect the state. -proc http::geturl { url args } { +proc http::geturl {url args} { variable http + variable urlTypes + variable defaultCharset + variable defaultKeepalive + variable strict + + # Initialize the state variable, an array. We'll return the name of this + # array as the token for the transaction. + if {![info exists http(uid)]} { set http(uid) 0 } @@ -147,110 +313,489 @@ proc http::geturl { url args } { variable $token upvar 0 $token state reset $token + + # Process command options. + array set state { - -blocksize 8192 - -validate 0 - -headers {} - -timeout 0 - state header + -binary false + -blocksize 8192 + -queryblocksize 8192 + -validate 0 + -headers {} + -timeout 0 + -type application/x-www-form-urlencoded + -queryprogress {} + -protocol 1.1 + binary 0 + state connecting meta {} + coding {} currentsize 0 totalsize 0 - type text/html - body {} + querylength 0 + queryoffset 0 + type text/html + body {} status "" + http "" + connection close } - set options {-blocksize -channel -command -handler -headers \ - -progress -query -validate -timeout} - set usage [join $options ", "] - regsub -all -- - $options {} options - set pat ^-([join $options |])$ + set state(-keepalive) $defaultKeepalive + set state(-strict) $strict + # These flags have their types verified [Bug 811170] + array set type { + -binary boolean + -blocksize integer + -queryblocksize integer + -strict boolean + -timeout integer + -validate boolean + } + set state(charset) $defaultCharset + set options { + -binary -blocksize -channel -command -handler -headers -keepalive + -method -myaddr -progress -protocol -query -queryblocksize + -querychannel -queryprogress -strict -timeout -type -validate + } + set usage [join [lsort $options] ", "] + set options [string map {- ""} $options] + set pat ^-(?:[join $options |])$ foreach {flag value} $args { - if {[regexp $pat $flag]} { + if {[regexp -- $pat $flag]} { # Validate numbers - if {[info exists state($flag)] && \ - [regexp {^[0-9]+$} $state($flag)] && \ - ![regexp {^[0-9]+$} $value]} { - return -code error "Bad value for $flag ($value), must be integer" + if { + [info exists type($flag)] && + ![string is $type($flag) -strict $value] + } { + unset $token + return -code error \ + "Bad value for $flag ($value), must be $type($flag)" } set state($flag) $value } else { + unset $token return -code error "Unknown option $flag, can be: $usage" } } - if {! [regexp -nocase {^(http://)?([^/:]+)(:([0-9]+))?(/.*)?$} $url \ - x proto host y port srvurl]} { - error "Unsupported URL: $url" + + # Make sure -query and -querychannel aren't both specified + + set isQueryChannel [info exists state(-querychannel)] + set isQuery [info exists state(-query)] + if {$isQuery && $isQueryChannel} { + unset $token + return -code error "Can't combine -query and -querychannel options!" + } + + # Validate URL, determine the server host and port, and check proxy case + # Recognize user:pass@host URLs also, although we do not do anything with + # that info yet. + + # URLs have basically four parts. + # First, before the colon, is the protocol scheme (e.g. http) + # Second, for HTTP-like protocols, is the authority + # The authority is preceded by // and lasts up to (but not including) + # the following / or ? and it identifies up to four parts, of which + # only one, the host, is required (if an authority is present at all). + # All other parts of the authority (user name, password, port number) + # are optional. + # Third is the resource name, which is split into two parts at a ? + # The first part (from the single "/" up to "?") is the path, and the + # second part (from that "?" up to "#") is the query. *HOWEVER*, we do + # not need to separate them; we send the whole lot to the server. + # Both, path and query are allowed to be missing, including their + # delimiting character. + # Fourth is the fragment identifier, which is everything after the first + # "#" in the URL. The fragment identifier MUST NOT be sent to the server + # and indeed, we don't bother to validate it (it could be an error to + # pass it in here, but it's cheap to strip). + # + # An example of a URL that has all the parts: + # + # http://jschmoe:xyzzy@www.bogus.net:8000/foo/bar.tml?q=foo#changes + # + # The "http" is the protocol, the user is "jschmoe", the password is + # "xyzzy", the host is "www.bogus.net", the port is "8000", the path is + # "/foo/bar.tml", the query is "q=foo", and the fragment is "changes". + # + # Note that the RE actually combines the user and password parts, as + # recommended in RFC 3986. Indeed, that RFC states that putting passwords + # in URLs is a Really Bad Idea, something with which I would agree utterly. + # + # From a validation perspective, we need to ensure that the parts of the + # URL that are going to the server are correctly encoded. This is only + # done if $state(-strict) is true (inherited from $::http::strict). + + set URLmatcher {(?x) # this is _expanded_ syntax + ^ + (?: (\w+) : ) ? # <protocol scheme> + (?: // + (?: + ( + [^@/\#?]+ # <userinfo part of authority> + ) @ + )? + ( # <host part of authority> + [^/:\#?]+ | # host name or IPv4 address + \[ [^/\#?]+ \] # IPv6 address in square brackets + ) + (?: : (\d+) )? # <port part of authority> + )? + ( [/\?] [^\#]*)? # <path> (including query) + (?: \# (.*) )? # <fragment> + $ + } + + # Phase one: parse + if {![regexp -- $URLmatcher $url -> proto user host port srvurl]} { + unset $token + return -code error "Unsupported URL: $url" } - if {[string length $port] == 0} { - set port 80 + # Phase two: validate + set host [string trim $host {[]}]; # strip square brackets from IPv6 address + if {$host eq ""} { + # Caller has to provide a host name; we do not have a "default host" + # that would enable us to handle relative URLs. + unset $token + return -code error "Missing host part: $url" + # Note that we don't check the hostname for validity here; if it's + # invalid, we'll simply fail to resolve it later on. } - if {[string length $srvurl] == 0} { + if {$port ne "" && $port > 65535} { + unset $token + return -code error "Invalid port number: $port" + } + # The user identification and resource identification parts of the URL can + # have encoded characters in them; take care! + if {$user ne ""} { + # Check for validity according to RFC 3986, Appendix A + set validityRE {(?xi) + ^ + (?: [-\w.~!$&'()*+,;=:] | %[0-9a-f][0-9a-f] )+ + $ + } + if {$state(-strict) && ![regexp -- $validityRE $user]} { + unset $token + # Provide a better error message in this error case + if {[regexp {(?i)%(?![0-9a-f][0-9a-f]).?.?} $user bad]} { + return -code error \ + "Illegal encoding character usage \"$bad\" in URL user" + } + return -code error "Illegal characters in URL user" + } + } + if {$srvurl ne ""} { + # RFC 3986 allows empty paths (not even a /), but servers + # return 400 if the path in the HTTP request doesn't start + # with / , so add it here if needed. + if {[string index $srvurl 0] ne "/"} { + set srvurl /$srvurl + } + # Check for validity according to RFC 3986, Appendix A + set validityRE {(?xi) + ^ + # Path part (already must start with / character) + (?: [-\w.~!$&'()*+,;=:@/] | %[0-9a-f][0-9a-f] )* + # Query part (optional, permits ? characters) + (?: \? (?: [-\w.~!$&'()*+,;=:@/?] | %[0-9a-f][0-9a-f] )* )? + $ + } + if {$state(-strict) && ![regexp -- $validityRE $srvurl]} { + unset $token + # Provide a better error message in this error case + if {[regexp {(?i)%(?![0-9a-f][0-9a-f])..} $srvurl bad]} { + return -code error \ + "Illegal encoding character usage \"$bad\" in URL path" + } + return -code error "Illegal characters in URL path" + } + } else { set srvurl / } - if {[string length $proto] == 0} { - set url http://$url + if {$proto eq ""} { + set proto http + } + set lower [string tolower $proto] + if {![info exists urlTypes($lower)]} { + unset $token + return -code error "Unsupported URL type \"$proto\"" + } + set defport [lindex $urlTypes($lower) 0] + set defcmd [lindex $urlTypes($lower) 1] + + if {$port eq ""} { + set port $defport } - set state(url) $url if {![catch {$http(-proxyfilter) $host} proxy]} { set phost [lindex $proxy 0] set pport [lindex $proxy 1] } + + # OK, now reassemble into a full URL + set url ${proto}:// + if {$user ne ""} { + append url $user + append url @ + } + append url $host + if {$port != $defport} { + append url : $port + } + append url $srvurl + # Don't append the fragment! + set state(url) $url + + # If a timeout is specified we set up the after event and arrange for an + # asynchronous socket connection. + + set sockopts [list -async] if {$state(-timeout) > 0} { - set state(after) [after $state(-timeout) [list http::reset $token timeout]] + set state(after) [after $state(-timeout) \ + [list http::reset $token timeout]] } - if {[info exists phost] && [string length $phost]} { + + # If we are using the proxy, we must pass in the full URL that includes + # the server name. + + if {[info exists phost] && ($phost ne "")} { set srvurl $url - set s [socket $phost $pport] + set targetAddr [list $phost $pport] } else { - set s [socket $host $port] + set targetAddr [list $host $port] } - set state(sock) $s + # Proxy connections aren't shared among different hosts. + set state(socketinfo) $host:$port + + # See if we are supposed to use a previously opened channel. + if {$state(-keepalive)} { + variable socketmap + if {[info exists socketmap($state(socketinfo))]} { + if {[catch {fconfigure $socketmap($state(socketinfo))}]} { + Log "WARNING: socket for $state(socketinfo) was closed" + unset socketmap($state(socketinfo)) + } else { + set sock $socketmap($state(socketinfo)) + Log "reusing socket $sock for $state(socketinfo)" + catch {fileevent $sock writable {}} + catch {fileevent $sock readable {}} + } + } + # don't automatically close this connection socket + set state(connection) {} + } + if {![info exists sock]} { + # Pass -myaddr directly to the socket command + if {[info exists state(-myaddr)]} { + lappend sockopts -myaddr $state(-myaddr) + } + if {[catch {eval $defcmd $sockopts $targetAddr} sock]} { + # something went wrong while trying to establish the connection. + # Clean up after events and such, but DON'T call the command + # callback (if available) because we're going to throw an + # exception from here instead. + + set state(sock) $sock + Finish $token "" 1 + cleanup $token + return -code error $sock + } + } + set state(sock) $sock + Log "Using $sock for $state(socketinfo)" \ + [expr {$state(-keepalive)?"keepalive":""}] + if {$state(-keepalive)} { + set socketmap($state(socketinfo)) $sock + } + + if {![info exists phost]} { + set phost "" + } + fileevent $sock writable [list http::Connect $token $proto $phost $srvurl] + + # Wait for the connection to complete. + if {![info exists state(-command)]} { + # geturl does EVERYTHING asynchronously, so if the user + # calls it synchronously, we just do a wait here. + http::wait $token + + if {![info exists state]} { + # If we timed out then Finish has been called and the users + # command callback may have cleaned up the token. If so we end up + # here with nothing left to do. + return $token + } elseif {$state(status) eq "error"} { + # Something went wrong while trying to establish the connection. + # Clean up after events and such, but DON'T call the command + # callback (if available) because we're going to throw an + # exception from here instead. + set err [lindex $state(error) 0] + cleanup $token + return -code error $err + } + } + + return $token +} + + +proc http::Connected { token proto phost srvurl} { + variable http + variable urlTypes + + variable $token + upvar 0 $token state + + # Set back the variables needed here + set sock $state(sock) + set isQueryChannel [info exists state(-querychannel)] + set isQuery [info exists state(-query)] + set host [lindex [split $state(socketinfo) :] 0] + set port [lindex [split $state(socketinfo) :] 1] + + set lower [string tolower $proto] + set defport [lindex $urlTypes($lower) 0] # Send data in cr-lf format, but accept any line terminators - fconfigure $s -translation {auto crlf} -buffersize $state(-blocksize) + fconfigure $sock -translation {auto crlf} -buffersize $state(-blocksize) - # The following is disallowed in safe interpreters, but the socket - # is already in non-blocking mode in that case. + # The following is disallowed in safe interpreters, but the socket is + # already in non-blocking mode in that case. - catch {fconfigure $s -blocking off} - set len 0 + catch {fconfigure $sock -blocking off} set how GET - if {[info exists state(-query)]} { - set len [string length $state(-query)] - if {$len > 0} { + if {$isQuery} { + set state(querylength) [string length $state(-query)] + if {$state(querylength) > 0} { set how POST + set contDone 0 + } else { + # There's no query data. + unset state(-query) + set isQuery 0 } } elseif {$state(-validate)} { set how HEAD + } elseif {$isQueryChannel} { + set how POST + # The query channel must be blocking for the async Write to + # work properly. + fconfigure $state(-querychannel) -blocking 1 -translation binary + set contDone 0 } - puts $s "$how $srvurl HTTP/1.0" - puts $s "Accept: $http(-accept)" - puts $s "Host: $host" - puts $s "User-Agent: $http(-useragent)" - foreach {key value} $state(-headers) { - regsub -all \[\n\r\] $value {} value - set key [string trim $key] - if {[string length $key]} { - puts $s "$key: $value" - } - } - if {$len > 0} { - puts $s "Content-Length: $len" - puts $s "Content-Type: application/x-www-form-urlencoded" - puts $s "" - fconfigure $s -translation {auto binary} - puts $s $state(-query) - } else { - puts $s "" + if {[info exists state(-method)] && $state(-method) ne ""} { + set how $state(-method) } - flush $s - fileevent $s readable [list http::Event $token] - if {! [info exists state(-command)]} { - wait $token + # We cannot handle chunked encodings with -handler, so force HTTP/1.0 + # until we can manage this. + if {[info exists state(-handler)]} { + set state(-protocol) 1.0 } - return $token + if {[catch { + puts $sock "$how $srvurl HTTP/$state(-protocol)" + puts $sock "Accept: $http(-accept)" + array set hdrs $state(-headers) + if {[info exists hdrs(Host)]} { + # Allow Host spoofing. [Bug 928154] + puts $sock "Host: $hdrs(Host)" + } elseif {$port == $defport} { + # Don't add port in this case, to handle broken servers. [Bug + # #504508] + puts $sock "Host: $host" + } else { + puts $sock "Host: $host:$port" + } + unset hdrs + puts $sock "User-Agent: $http(-useragent)" + if {$state(-protocol) == 1.0 && $state(-keepalive)} { + puts $sock "Connection: keep-alive" + } + if {$state(-protocol) > 1.0 && !$state(-keepalive)} { + puts $sock "Connection: close" ;# RFC2616 sec 8.1.2.1 + } + if {[info exists phost] && ($phost ne "") && $state(-keepalive)} { + puts $sock "Proxy-Connection: Keep-Alive" + } + set accept_encoding_seen 0 + set content_type_seen 0 + foreach {key value} $state(-headers) { + if {[string equal -nocase $key "host"]} { + continue + } + if {[string equal -nocase $key "accept-encoding"]} { + set accept_encoding_seen 1 + } + if {[string equal -nocase $key "content-type"]} { + set content_type_seen 1 + } + set value [string map [list \n "" \r ""] $value] + set key [string trim $key] + if {[string equal -nocase $key "content-length"]} { + set contDone 1 + set state(querylength) $value + } + if {[string length $key]} { + puts $sock "$key: $value" + } + } + if {!$accept_encoding_seen && ![info exists state(-handler)]} { + puts $sock "Accept-Encoding: deflate,gzip,compress" + } + if {$isQueryChannel && $state(querylength) == 0} { + # Try to determine size of data in channel. If we cannot seek, the + # surrounding catch will trap us + + set start [tell $state(-querychannel)] + seek $state(-querychannel) 0 end + set state(querylength) \ + [expr {[tell $state(-querychannel)] - $start}] + seek $state(-querychannel) $start + } + + # Flush the request header and set up the fileevent that will either + # push the POST data or read the response. + # + # fileevent note: + # + # It is possible to have both the read and write fileevents active at + # this point. The only scenario it seems to affect is a server that + # closes the connection without reading the POST data. (e.g., early + # versions TclHttpd in various error cases). Depending on the + # platform, the client may or may not be able to get the response from + # the server because of the error it will get trying to write the post + # data. Having both fileevents active changes the timing and the + # behavior, but no two platforms (among Solaris, Linux, and NT) behave + # the same, and none behave all that well in any case. Servers should + # always read their POST data if they expect the client to read their + # response. + + if {$isQuery || $isQueryChannel} { + if {!$content_type_seen} { + puts $sock "Content-Type: $state(-type)" + } + if {!$contDone} { + puts $sock "Content-Length: $state(querylength)" + } + puts $sock "" + fconfigure $sock -translation {auto binary} + fileevent $sock writable [list http::Write $token] + } else { + puts $sock "" + flush $sock + fileevent $sock readable [list http::Event $sock $token] + } + + } err]} { + # The socket probably was never connected, or the connection dropped + # later. + + # if state(status) is error, it means someone's already called Finish + # to do the above-described clean up. + if {$state(status) ne "error"} { + Finish $token $err + } + } + } # Data access functions: @@ -265,6 +810,9 @@ proc http::data {token} { return $state(body) } proc http::status {token} { + if {![info exists $token]} { + return "error" + } variable $token upvar 0 $token state return $state(status) @@ -274,119 +822,512 @@ proc http::code {token} { upvar 0 $token state return $state(http) } +proc http::ncode {token} { + variable $token + upvar 0 $token state + if {[regexp {[0-9]{3}} $state(http) numeric_code]} { + return $numeric_code + } else { + return $state(http) + } +} proc http::size {token} { variable $token upvar 0 $token state return $state(currentsize) } +proc http::meta {token} { + variable $token + upvar 0 $token state + return $state(meta) +} +proc http::error {token} { + variable $token + upvar 0 $token state + if {[info exists state(error)]} { + return $state(error) + } + return "" +} + +# http::cleanup +# +# Garbage collect the state associated with a transaction +# +# Arguments +# token The token returned from http::geturl +# +# Side Effects +# unsets the state array - proc http::Event {token} { +proc http::cleanup {token} { variable $token upvar 0 $token state - set s $state(sock) + if {[info exists state]} { + unset state + } +} - if {[::eof $s]} { - Eof $token +# http::Connect +# +# This callback is made when an asyncronous connection completes. +# +# Arguments +# token The token returned from http::geturl +# +# Side Effects +# Sets the status of the connection, which unblocks +# the waiting geturl call + +proc http::Connect {token proto phost srvurl} { + variable $token + upvar 0 $token state + set err "due to unexpected EOF" + if { + [eof $state(sock)] || + [set err [fconfigure $state(sock) -error]] ne "" + } { + Finish $token "connect failed $err" + } else { + fileevent $state(sock) writable {} + ::http::Connected $token $proto $phost $srvurl + } + return +} + +# http::Write +# +# Write POST query data to the socket +# +# Arguments +# token The token for the connection +# +# Side Effects +# Write the socket and handle callbacks. + +proc http::Write {token} { + variable $token + upvar 0 $token state + set sock $state(sock) + + # Output a block. Tcl will buffer this if the socket blocks + set done 0 + if {[catch { + # Catch I/O errors on dead sockets + + if {[info exists state(-query)]} { + # Chop up large query strings so queryprogress callback can give + # smooth feedback. + + puts -nonewline $sock \ + [string range $state(-query) $state(queryoffset) \ + [expr {$state(queryoffset) + $state(-queryblocksize) - 1}]] + incr state(queryoffset) $state(-queryblocksize) + if {$state(queryoffset) >= $state(querylength)} { + set state(queryoffset) $state(querylength) + set done 1 + } + } else { + # Copy blocks from the query channel + + set outStr [read $state(-querychannel) $state(-queryblocksize)] + puts -nonewline $sock $outStr + incr state(queryoffset) [string length $outStr] + if {[eof $state(-querychannel)]} { + set done 1 + } + } + } err]} { + # Do not call Finish here, but instead let the read half of the socket + # process whatever server reply there is to get. + + set state(posterror) $err + set done 1 + } + if {$done} { + catch {flush $sock} + fileevent $sock writable {} + fileevent $sock readable [list http::Event $sock $token] + } + + # Callback to the client after we've completely handled everything. + + if {[string length $state(-queryprogress)]} { + eval $state(-queryprogress) \ + [list $token $state(querylength) $state(queryoffset)] + } +} + +# http::Event +# +# Handle input on the socket +# +# Arguments +# sock The socket receiving input. +# token The token returned from http::geturl +# +# Side Effects +# Read the socket and handle callbacks. + +proc http::Event {sock token} { + variable $token + upvar 0 $token state + + if {![info exists state]} { + Log "Event $sock with invalid token '$token' - remote close?" + if {![eof $sock]} { + if {[set d [read $sock]] ne ""} { + Log "WARNING: additional data left on closed socket" + } + } + CloseSocket $sock return } - if {$state(state) == "header"} { - set n [gets $s line] - if {$n == 0} { + if {$state(state) eq "connecting"} { + if {[catch {gets $sock state(http)} n]} { + return [Finish $token $n] + } elseif {$n >= 0} { + set state(state) "header" + } + } elseif {$state(state) eq "header"} { + if {[catch {gets $sock line} n]} { + return [Finish $token $n] + } elseif {$n == 0} { + # We have now read all headers + # We ignore HTTP/1.1 100 Continue returns. RFC2616 sec 8.2.3 + if {$state(http) == "" || ([regexp {^\S+\s(\d+)} $state(http) {} x] && $x == 100)} { + return + } + set state(state) body - if {![regexp -nocase ^text $state(type)]} { + + # If doing a HEAD, then we won't get any body + if {$state(-validate)} { + Eof $token + return + } + + # For non-chunked transfer we may have no body - in this case we + # may get no further file event if the connection doesn't close + # and no more data is sent. We can tell and must finish up now - + # not later. + if { + !(([info exists state(connection)] + && ($state(connection) eq "close")) + || [info exists state(transfer)]) + && ($state(totalsize) == 0) + } { + Log "body size is 0 and no events likely - complete." + Eof $token + return + } + + # We have to use binary translation to count bytes properly. + fconfigure $sock -translation binary + + if { + $state(-binary) || ![string match -nocase text* $state(type)] + } { # Turn off conversions for non-text data - fconfigure $s -translation binary - if {[info exists state(-channel)]} { + set state(binary) 1 + } + if {[info exists state(-channel)]} { + if {$state(binary) || [llength [ContentEncoding $token]]} { fconfigure $state(-channel) -translation binary } - } - if {[info exists state(-channel)] && - ![info exists state(-handler)]} { - # Initiate a sequence of background fcopies - fileevent $s readable {} - CopyStart $s $token + if {![info exists state(-handler)]} { + # Initiate a sequence of background fcopies + fileevent $sock readable {} + CopyStart $sock $token + return + } } } elseif {$n > 0} { - if {[regexp -nocase {^content-type:(.+)$} $line x type]} { - set state(type) [string trim $type] - } - if {[regexp -nocase {^content-length:(.+)$} $line x length]} { - set state(totalsize) [string trim $length] - } + # Process header lines if {[regexp -nocase {^([^:]+):(.+)$} $line x key value]} { - lappend state(meta) $key $value - } elseif {[regexp ^HTTP $line]} { - set state(http) $line + switch -- [string tolower $key] { + content-type { + set state(type) [string trim [string tolower $value]] + # grab the optional charset information + if {[regexp -nocase \ + {charset\s*=\s*\"((?:[^""]|\\\")*)\"} \ + $state(type) -> cs]} { + set state(charset) [string map {{\"} \"} $cs] + } else { + regexp -nocase {charset\s*=\s*(\S+?);?} \ + $state(type) -> state(charset) + } + } + content-length { + set state(totalsize) [string trim $value] + } + content-encoding { + set state(coding) [string trim $value] + } + transfer-encoding { + set state(transfer) \ + [string trim [string tolower $value]] + } + proxy-connection - + connection { + set state(connection) \ + [string trim [string tolower $value]] + } + } + lappend state(meta) $key [string trim $value] } } } else { + # Now reading body if {[catch { if {[info exists state(-handler)]} { - set n [eval $state(-handler) {$s $token}] + set n [eval $state(-handler) [list $sock $token]] + } elseif {[info exists state(transfer_final)]} { + set line [getTextLine $sock] + set n [string length $line] + if {$n > 0} { + Log "found $n bytes following final chunk" + append state(transfer_final) $line + } else { + Log "final chunk part" + Eof $token + } + } elseif { + [info exists state(transfer)] + && $state(transfer) eq "chunked" + } { + set size 0 + set chunk [getTextLine $sock] + set n [string length $chunk] + if {[string trim $chunk] ne ""} { + scan $chunk %x size + if {$size != 0} { + set bl [fconfigure $sock -blocking] + fconfigure $sock -blocking 1 + set chunk [read $sock $size] + fconfigure $sock -blocking $bl + set n [string length $chunk] + if {$n >= 0} { + append state(body) $chunk + } + if {$size != [string length $chunk]} { + Log "WARNING: mis-sized chunk:\ + was [string length $chunk], should be $size" + } + getTextLine $sock + } else { + set state(transfer_final) {} + } + } } else { - set block [read $s $state(-blocksize)] + #Log "read non-chunk $state(currentsize) of $state(totalsize)" + set block [read $sock $state(-blocksize)] set n [string length $block] if {$n >= 0} { append state(body) $block } } - if {$n >= 0} { - incr state(currentsize) $n + if {[info exists state]} { + if {$n >= 0} { + incr state(currentsize) $n + } + # If Content-Length - check for end of data. + if { + ($state(totalsize) > 0) + && ($state(currentsize) >= $state(totalsize)) + } { + Eof $token + } } } err]} { - Finish $token $err + return [Finish $token $err] } else { if {[info exists state(-progress)]} { - eval $state(-progress) {$token $state(totalsize) $state(currentsize)} + eval $state(-progress) \ + [list $token $state(totalsize) $state(currentsize)] } } } + + # catch as an Eof above may have closed the socket already + if {![catch {eof $sock} eof] && $eof} { + if {[info exists $token]} { + set state(connection) close + Eof $token + } else { + # open connection closed on a token that has been cleaned up. + CloseSocket $sock + } + return + } } - proc http::CopyStart {s token} { - variable $token + +# http::getTextLine -- +# +# Get one line with the stream in blocking crlf mode +# +# Arguments +# sock The socket receiving input. +# +# Results: +# The line of text, without trailing newline + +proc http::getTextLine {sock} { + set tr [fconfigure $sock -translation] + set bl [fconfigure $sock -blocking] + fconfigure $sock -translation crlf -blocking 1 + set r [gets $sock] + fconfigure $sock -translation $tr -blocking $bl + return $r +} + +# http::CopyStart +# +# Error handling wrapper around fcopy +# +# Arguments +# sock The socket to copy from +# token The token returned from http::geturl +# +# Side Effects +# This closes the connection upon error + +proc http::CopyStart {sock token {initial 1}} { + upvar #0 $token state + if {[info exists state(transfer)] && $state(transfer) eq "chunked"} { + foreach coding [ContentEncoding $token] { + lappend state(zlib) [zlib stream $coding] + } + make-transformation-chunked $sock [namespace code [list CopyChunk $token]] + } else { + if {$initial} { + foreach coding [ContentEncoding $token] { + zlib push $coding $sock + } + } + if {[catch { + fcopy $sock $state(-channel) -size $state(-blocksize) -command \ + [list http::CopyDone $token] + } err]} { + Finish $token $err + } + } +} + +proc http::CopyChunk {token chunk} { upvar 0 $token state - if {[catch { - fcopy $s $state(-channel) -size $state(-blocksize) -command \ - [list http::CopyDone $token] - } err]} { - Finish $token $err + if {[set count [string length $chunk]]} { + incr state(currentsize) $count + if {[info exists state(zlib)]} { + foreach stream $state(zlib) { + set chunk [$stream add $chunk] + } + } + puts -nonewline $state(-channel) $chunk + if {[info exists state(-progress)]} { + eval [linsert $state(-progress) end \ + $token $state(totalsize) $state(currentsize)] + } + } else { + Log "CopyChunk Finish $token" + if {[info exists state(zlib)]} { + set excess "" + foreach stream $state(zlib) { + catch {set excess [$stream add -finalize $excess]} + } + puts -nonewline $state(-channel) $excess + foreach stream $state(zlib) { $stream close } + unset state(zlib) + } + Eof $token ;# FIX ME: pipelining. } } - proc http::CopyDone {token count {error {}}} { + +# http::CopyDone +# +# fcopy completion callback +# +# Arguments +# token The token returned from http::geturl +# count The amount transfered +# +# Side Effects +# Invokes callbacks + +proc http::CopyDone {token count {error {}}} { variable $token upvar 0 $token state - set s $state(sock) + set sock $state(sock) incr state(currentsize) $count if {[info exists state(-progress)]} { - eval $state(-progress) {$token $state(totalsize) $state(currentsize)} + eval $state(-progress) \ + [list $token $state(totalsize) $state(currentsize)] } - if {([string length $error] != 0)} { + # At this point the token may have been reset + if {[string length $error]} { Finish $token $error - } elseif {[::eof $s]} { + } elseif {[catch {eof $sock} iseof] || $iseof} { Eof $token } else { - CopyStart $s $token + CopyStart $sock $token 0 } } - proc http::Eof {token} { + +# http::Eof +# +# Handle eof on the socket +# +# Arguments +# token The token returned from http::geturl +# +# Side Effects +# Clean up the socket + +proc http::Eof {token {force 0}} { variable $token upvar 0 $token state - if {$state(state) == "header"} { + if {$state(state) eq "header"} { # Premature eof set state(status) eof } else { set state(status) ok } - set state(state) eof + + if {[string length $state(body)] > 0} { + if {[catch { + foreach coding [ContentEncoding $token] { + set state(body) [zlib $coding $state(body)] + } + } err]} { + Log "error doing $coding '$state(body)'" + return [Finish $token $err] + } + + if {!$state(binary)} { + # If we are getting text, set the incoming channel's encoding + # correctly. iso8859-1 is the RFC default, but this could be any IANA + # charset. However, we only know how to convert what we have + # encodings for. + + set enc [CharsetToEncoding $state(charset)] + if {$enc ne "binary"} { + set state(body) [encoding convertfrom $enc $state(body)] + } + + # Translate text line endings. + set state(body) [string map {\r\n \n \r \n} $state(body)] + } + } Finish $token } # http::wait -- # -# See documentaion for details. +# See documentation for details. # # Arguments: # token Connection token. +# # Results: # The status after the wait. @@ -394,69 +1335,186 @@ proc http::wait {token} { variable $token upvar 0 $token state - if {![info exists state(status)] || [string length $state(status)] == 0} { - vwait $token\(status) - } - if {[info exists state(error)]} { - set errorlist $state(error) - unset state(error) - eval error $errorlist + if {![info exists state(status)] || $state(status) eq ""} { + # We must wait on the original variable name, not the upvar alias + vwait ${token}(status) } - return $state(status) + + return [status $token] } # http::formatQuery -- # -# See documentaion for details. -# Call http::formatQuery with an even number of arguments, where -# the first is a name, the second is a value, the third is another -# name, and so on. +# See documentation for details. Call http::formatQuery with an even +# number of arguments, where the first is a name, the second is a value, +# the third is another name, and so on. # # Arguments: # args A list of name-value pairs. +# # Results: -# TODO +# TODO proc http::formatQuery {args} { set result "" set sep "" foreach i $args { - append result $sep [mapReply $i] - if {$sep != "="} { - set sep = - } else { + append result $sep [mapReply $i] + if {$sep eq "="} { set sep & + } else { + set sep = } } return $result } -# do x-www-urlencoded character mapping -# The spec says: "non-alphanumeric characters are replaced by '%HH'" -# 1 leave alphanumerics characters alone -# 2 Convert every other character to an array lookup -# 3 Escape constructs that are "special" to the tcl parser -# 4 "subst" the result, doing all the array substitutions - - proc http::mapReply {string} { +# http::mapReply -- +# +# Do x-www-urlencoded character mapping +# +# Arguments: +# string The string the needs to be encoded +# +# Results: +# The encoded string + +proc http::mapReply {string} { + variable http variable formMap - set alphanumeric a-zA-Z0-9 - regsub -all \[^$alphanumeric\] $string {$formMap(&)} string - regsub -all \n $string {\\n} string - regsub -all \t $string {\\t} string - regsub -all {[][{})\\]\)} $string {\\&} string - return [subst $string] + + # The spec says: "non-alphanumeric characters are replaced by '%HH'". Use + # a pre-computed map and [string map] to do the conversion (much faster + # than [regsub]/[subst]). [Bug 1020491] + + if {$http(-urlencoding) ne ""} { + set string [encoding convertto $http(-urlencoding) $string] + return [string map $formMap $string] + } + set converted [string map $formMap $string] + if {[string match "*\[\u0100-\uffff\]*" $converted]} { + regexp "\[\u0100-\uffff\]" $converted badChar + # Return this error message for maximum compatability... :^/ + return -code error \ + "can't read \"formMap($badChar)\": no such element in array" + } + return $converted } -# Default proxy filter. - proc http::ProxyRequired {host} { +# http::ProxyRequired -- +# Default proxy filter. +# +# Arguments: +# host The destination host +# +# Results: +# The current proxy settings + +proc http::ProxyRequired {host} { variable http if {[info exists http(-proxyhost)] && [string length $http(-proxyhost)]} { - if {![info exists http(-proxyport)] || ![string length $http(-proxyport)]} { + if { + ![info exists http(-proxyport)] || + ![string length $http(-proxyport)] + } { set http(-proxyport) 8080 } return [list $http(-proxyhost) $http(-proxyport)] + } +} + +# http::CharsetToEncoding -- +# +# Tries to map a given IANA charset to a tcl encoding. If no encoding +# can be found, returns binary. +# + +proc http::CharsetToEncoding {charset} { + variable encodings + + set charset [string tolower $charset] + if {[regexp {iso-?8859-([0-9]+)} $charset -> num]} { + set encoding "iso8859-$num" + } elseif {[regexp {iso-?2022-(jp|kr)} $charset -> ext]} { + set encoding "iso2022-$ext" + } elseif {[regexp {shift[-_]?js} $charset]} { + set encoding "shiftjis" + } elseif {[regexp {(?:windows|cp)-?([0-9]+)} $charset -> num]} { + set encoding "cp$num" + } elseif {$charset eq "us-ascii"} { + set encoding "ascii" + } elseif {[regexp {(?:iso-?)?lat(?:in)?-?([0-9]+)} $charset -> num]} { + switch -- $num { + 5 {set encoding "iso8859-9"} + 1 - 2 - 3 { + set encoding "iso8859-$num" + } + } } else { - return {} + # other charset, like euc-xx, utf-8,... may directly map to encoding + set encoding $charset + } + set idx [lsearch -exact $encodings $encoding] + if {$idx >= 0} { + return $encoding + } else { + return "binary" } } + +# Return the list of content-encoding transformations we need to do in order. +proc http::ContentEncoding {token} { + upvar 0 $token state + set r {} + if {[info exists state(coding)]} { + foreach coding [split $state(coding) ,] { + switch -exact -- $coding { + deflate { lappend r inflate } + gzip - x-gzip { lappend r gunzip } + compress - x-compress { lappend r decompress } + identity {} + default { + return -code error "unsupported content-encoding \"$coding\"" + } + } + } + } + return $r +} + +proc http::make-transformation-chunked {chan command} { + set lambda {{chan command} { + set data "" + set size -1 + yield + while {1} { + chan configure $chan -translation {crlf binary} + while {[gets $chan line] < 1} { yield } + chan configure $chan -translation {binary binary} + if {[scan $line %x size] != 1} { return -code error "invalid size: \"$line\"" } + set chunk "" + while {$size && ![chan eof $chan]} { + set part [chan read $chan $size] + incr size -[string length $part] + append chunk $part + } + if {[catch { + uplevel #0 [linsert $command end $chunk] + }]} { + http::Log "Error in callback: $::errorInfo" + } + if {[string length $chunk] == 0} { + # channel might have been closed in the callback + catch {chan event $chan readable {}} + return + } + } + }} + coroutine dechunk$chan ::apply $lambda $chan $command + chan event $chan readable [namespace origin dechunk$chan] + return +} + +# Local variables: +# indent-tabs-mode: t +# End: diff --git a/library/http/pkgIndex.tcl b/library/http/pkgIndex.tcl index 01052f3..27ba795 100644 --- a/library/http/pkgIndex.tcl +++ b/library/http/pkgIndex.tcl @@ -1,11 +1,2 @@ -# Tcl package index file, version 1.0 -# This file is generated by the "pkg_mkIndex" command -# and sourced either when an application starts up or -# by a "package unknown" script. It invokes the -# "package ifneeded" command to set up package-related -# information so that packages will be loaded automatically -# in response to "package require" commands. When this -# script is sourced, the variable $dir must contain the -# full path name of this file's directory. - -package ifneeded http 2.0 [list tclPkgSetup $dir http 2.0 {{http.tcl source {::http::config ::http::formatQuery ::http::geturl ::http::reset ::http::wait}}}] +if {![package vsatisfies [package provide Tcl] 8.6]} {return} +package ifneeded http 2.8.8 [list tclPkgSetup $dir http 2.8.8 {{http.tcl source {::http::config ::http::formatQuery ::http::geturl ::http::reset ::http::wait ::http::register ::http::unregister ::http::mapReply}}}] |
