diff options
Diffstat (limited to 'library/http/http.tcl')
-rw-r--r-- | library/http/http.tcl | 633 |
1 files changed, 365 insertions, 268 deletions
diff --git a/library/http/http.tcl b/library/http/http.tcl index 06829cd..a6b2bfd 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -1,19 +1,17 @@ # http.tcl -- # # Client-side HTTP for GET, POST, and HEAD commands. These routines can -# be used in untrusted code that uses the Safesock security policy. These -# procedures use a callback interface to avoid using vwait, which is not -# defined in the safe base. +# be used in untrusted code that uses the Safesock security policy. +# These procedures use a callback interface to avoid using vwait, which +# is not defined in the safe base. # # See the file "license.terms" for information on usage and redistribution of # this file, and for a DISCLAIMER OF ALL WARRANTIES. -# -# RCS: @(#) $Id: http.tcl,v 1.67.2.5 2008/10/23 23:34:32 patthoyts Exp $ -package require Tcl 8.4 -# Keep this in sync with pkgIndex.tcl and with the install directories -# in Makefiles -package provide http 2.7.2 +package require Tcl 8.6 +# Keep this in sync with pkgIndex.tcl and with the install directories in +# Makefiles +package provide http 2.8.8 namespace eval http { # Allow resourcing to not clobber existing data @@ -27,23 +25,29 @@ namespace eval http { -proxyfilter http::ProxyRequired -urlencoding utf-8 } - set http(-useragent) "Tcl http client package [package provide http]" + # We need a useragent string of this style or various servers will refuse to + # send us compressed content even when we ask for it. This follows the + # de-facto layout of user-agent strings in current browsers. + set http(-useragent) "Mozilla/5.0\ + ([string totitle $::tcl_platform(platform)]; U;\ + $::tcl_platform(os) $::tcl_platform(osVersion))\ + http/[package provide http] Tcl/[package provide Tcl]" } proc init {} { # Set up the map for quoting chars. RFC3986 Section 2.3 say percent - # encode all except: "... percent-encoded octets in the ranges of ALPHA - # (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period (%2E), - # underscore (%5F), or tilde (%7E) should not be created by URI + # encode all except: "... percent-encoded octets in the ranges of + # ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period + # (%2E), underscore (%5F), or tilde (%7E) should not be created by URI # producers ..." for {set i 0} {$i <= 256} {incr i} { set c [format %c $i] if {![string match {[-._~a-zA-Z0-9]} $c]} { - set map($c) %[format %.2x $i] + set map($c) %[format %.2X $i] } } # These are handled specially - set map(\n) %0d%0a + set map(\n) %0D%0A variable formMap [array get map] # Create a map for HTTP/1.1 open sockets @@ -94,22 +98,22 @@ namespace eval http { # Arguments: # msg Message to output # -proc http::Log {args} {} +if {[info command http::Log] eq {}} {proc http::Log {args} {}} # http::register -- # # See documentation for details. # # Arguments: -# proto URL protocol prefix, e.g. https -# port Default port for protocol -# command Command to use to create socket +# proto URL protocol prefix, e.g. https +# port Default port for protocol +# command Command to use to create socket # Results: # list of port and command that was registered. proc http::register {proto port command} { variable urlTypes - set urlTypes($proto) [list $port $command] + set urlTypes([string tolower $proto]) [list $port $command] } # http::unregister -- @@ -117,17 +121,18 @@ proc http::register {proto port command} { # Unregisters URL protocol handler # # Arguments: -# proto URL protocol prefix, e.g. https +# proto URL protocol prefix, e.g. https # Results: # list of port and command that was unregistered. proc http::unregister {proto} { variable urlTypes - if {![info exists urlTypes($proto)]} { + set lower [string tolower $proto] + if {![info exists urlTypes($lower)]} { return -code error "unsupported url type \"$proto\"" } - set old $urlTypes($proto) - unset urlTypes($proto) + set old $urlTypes($lower) + unset urlTypes($lower) return $old } @@ -152,21 +157,19 @@ proc http::config {args} { return $result } set options [string map {- ""} $options] - set pat ^-([join $options |])$ + set pat ^-(?:[join $options |])$ if {[llength $args] == 1} { set flag [lindex $args 0] - if {[regexp -- $pat $flag]} { - return $http($flag) - } else { + if {![regexp -- $pat $flag]} { return -code error "Unknown option $flag, must be: $usage" } + return $http($flag) } else { foreach {flag value} $args { - if {[regexp -- $pat $flag]} { - set http($flag) $value - } else { + if {![regexp -- $pat $flag]} { return -code error "Unknown option $flag, must be: $usage" } + set http($flag) $value } } } @@ -179,14 +182,14 @@ proc http::config {args} { # token Connection token. # errormsg (optional) If set, forces status to error. # skipCB (optional) If set, don't call the -command callback. This -# is useful when geturl wants to throw an exception instead -# of calling the callback. That way, the same error isn't -# reported to two places. +# is useful when geturl wants to throw an exception instead +# of calling the callback. That way, the same error isn't +# reported to two places. # # Side Effects: # Closes the socket -proc http::Finish { token {errormsg ""} {skipCB 0}} { +proc http::Finish {token {errormsg ""} {skipCB 0}} { variable $token upvar 0 $token state global errorInfo errorCode @@ -194,30 +197,31 @@ proc http::Finish { token {errormsg ""} {skipCB 0}} { set state(error) [list $errormsg $errorInfo $errorCode] set state(status) "error" } - if {($state(status) eq "timeout") || ($state(status) eq "error") - || ([info exists state(connection)] && ($state(connection) eq "close")) + if { + ($state(status) eq "timeout") || ($state(status) eq "error") || + ([info exists state(connection)] && ($state(connection) eq "close")) } { CloseSocket $state(sock) $token } - if {[info exists state(after)]} { after cancel $state(after) } - if {[info exists state(-command)] && !$skipCB} { - if {[catch {eval $state(-command) {$token}} err]} { - if {$errormsg eq ""} { - set state(error) [list $err $errorInfo $errorCode] - set state(status) error - } + if {[info exists state(after)]} { + after cancel $state(after) + } + if {[info exists state(-command)] && !$skipCB + && ![info exists state(done-command-cb)]} { + set state(done-command-cb) yes + if {[catch {eval $state(-command) {$token}} err] && $errormsg eq ""} { + set state(error) [list $err $errorInfo $errorCode] + set state(status) error } - # Command callback may already have unset our state - unset -nocomplain state(-command) } } # http::CloseSocket - # -# Close a socket and remove it from the persistent sockets table. -# If possible an http token is included here but when we are called -# from a fileevent on remote closure we need to find the correct -# entry - hence the second section. +# Close a socket and remove it from the persistent sockets table. If +# possible an http token is included here but when we are called from a +# fileevent on remote closure we need to find the correct entry - hence +# the second section. proc ::http::CloseSocket {s {token {}}} { variable socketmap @@ -227,23 +231,27 @@ proc ::http::CloseSocket {s {token {}}} { variable $token upvar 0 $token state if {[info exists state(socketinfo)]} { - set conn_id $state(socketinfo) + set conn_id $state(socketinfo) } } else { set map [array get socketmap] set ndx [lsearch -exact $map $s] if {$ndx != -1} { - incr ndx -1 - set conn_id [lindex $map $ndx] + incr ndx -1 + set conn_id [lindex $map $ndx] } } if {$conn_id eq {} || ![info exists socketmap($conn_id)]} { Log "Closing socket $s (no connection info)" - if {[catch {close $s} err]} { Log "Error: $err" } + if {[catch {close $s} err]} { + Log "Error: $err" + } } else { if {[info exists socketmap($conn_id)]} { Log "Closing connection $conn_id (sock $socketmap($conn_id))" - if {[catch {close $socketmap($conn_id)} err]} { Log "Error: $err" } + if {[catch {close $socketmap($conn_id)} err]} { + Log "Error: $err" + } unset socketmap($conn_id) } else { Log "Cannot close connection $conn_id - no socket in socket map" @@ -262,7 +270,7 @@ proc ::http::CloseSocket {s {token {}}} { # Side Effects: # See Finish -proc http::reset { token {why reset} } { +proc http::reset {token {why reset}} { variable $token upvar 0 $token state set state(status) $why @@ -285,10 +293,10 @@ proc http::reset { token {why reset} } { # args Option value pairs. Valid options include: # -blocksize, -validate, -headers, -timeout # Results: -# Returns a token for this connection. This token is the name of an array -# that the caller should unset to garbage collect the state. +# Returns a token for this connection. This token is the name of an +# array that the caller should unset to garbage collect the state. -proc http::geturl { url args } { +proc http::geturl {url args} { variable http variable urlTypes variable defaultCharset @@ -351,14 +359,17 @@ proc http::geturl { url args } { } set usage [join [lsort $options] ", "] set options [string map {- ""} $options] - set pat ^-([join $options |])$ + set pat ^-(?:[join $options |])$ foreach {flag value} $args { if {[regexp -- $pat $flag]} { # Validate numbers - if {[info exists type($flag)] && - ![string is $type($flag) -strict $value]} { + if { + [info exists type($flag)] && + ![string is $type($flag) -strict $value] + } { unset $token - return -code error "Bad value for $flag ($value), must be $type($flag)" + return -code error \ + "Bad value for $flag ($value), must be $type($flag)" } set state($flag) $value } else { @@ -384,20 +395,25 @@ proc http::geturl { url args } { # First, before the colon, is the protocol scheme (e.g. http) # Second, for HTTP-like protocols, is the authority # The authority is preceded by // and lasts up to (but not including) - # the following / and it identifies up to four parts, of which only one, - # the host, is required (if an authority is present at all). All other - # parts of the authority (user name, password, port number) are optional. + # the following / or ? and it identifies up to four parts, of which + # only one, the host, is required (if an authority is present at all). + # All other parts of the authority (user name, password, port number) + # are optional. # Third is the resource name, which is split into two parts at a ? # The first part (from the single "/" up to "?") is the path, and the # second part (from that "?" up to "#") is the query. *HOWEVER*, we do # not need to separate them; we send the whole lot to the server. + # Both, path and query are allowed to be missing, including their + # delimiting character. # Fourth is the fragment identifier, which is everything after the first # "#" in the URL. The fragment identifier MUST NOT be sent to the server # and indeed, we don't bother to validate it (it could be an error to # pass it in here, but it's cheap to strip). # # An example of a URL that has all the parts: - # http://jschmoe:xyzzy@www.bogus.net:8000/foo/bar.tml?q=foo#changes + # + # http://jschmoe:xyzzy@www.bogus.net:8000/foo/bar.tml?q=foo#changes + # # The "http" is the protocol, the user is "jschmoe", the password is # "xyzzy", the host is "www.bogus.net", the port is "8000", the path is # "/foo/bar.tml", the query is "q=foo", and the fragment is "changes". @@ -405,12 +421,10 @@ proc http::geturl { url args } { # Note that the RE actually combines the user and password parts, as # recommended in RFC 3986. Indeed, that RFC states that putting passwords # in URLs is a Really Bad Idea, something with which I would agree utterly. - # Also note that we do not currently support IPv6 addresses. # # From a validation perspective, we need to ensure that the parts of the - # URL that are going to the server are correctly encoded. - # This is only done if $state(-strict) is true (inherited from - # $::http::strict). + # URL that are going to the server are correctly encoded. This is only + # done if $state(-strict) is true (inherited from $::http::strict). set URLmatcher {(?x) # this is _expanded_ syntax ^ @@ -421,10 +435,13 @@ proc http::geturl { url args } { [^@/\#?]+ # <userinfo part of authority> ) @ )? - ( [^/:\#?]+ ) # <host part of authority> + ( # <host part of authority> + [^/:\#?]+ | # host name or IPv4 address + \[ [^/\#?]+ \] # IPv6 address in square brackets + ) (?: : (\d+) )? # <port part of authority> )? - ( / [^\#?]* (?: \? [^\#?]* )?)? # <path> (including query) + ( [/\?] [^\#]*)? # <path> (including query) (?: \# (.*) )? # <fragment> $ } @@ -435,6 +452,7 @@ proc http::geturl { url args } { return -code error "Unsupported URL: $url" } # Phase two: validate + set host [string trim $host {[]}]; # strip square brackets from IPv6 address if {$host eq ""} { # Caller has to provide a host name; we do not have a "default host" # that would enable us to handle relative URLs. @@ -467,6 +485,12 @@ proc http::geturl { url args } { } } if {$srvurl ne ""} { + # RFC 3986 allows empty paths (not even a /), but servers + # return 400 if the path in the HTTP request doesn't start + # with / , so add it here if needed. + if {[string index $srvurl 0] ne "/"} { + set srvurl /$srvurl + } # Check for validity according to RFC 3986, Appendix A set validityRE {(?xi) ^ @@ -481,7 +505,7 @@ proc http::geturl { url args } { # Provide a better error message in this error case if {[regexp {(?i)%(?![0-9a-f][0-9a-f])..} $srvurl bad]} { return -code error \ - "Illegal encoding character usage \"$bad\" in URL path" + "Illegal encoding character usage \"$bad\" in URL path" } return -code error "Illegal characters in URL path" } @@ -491,12 +515,13 @@ proc http::geturl { url args } { if {$proto eq ""} { set proto http } - if {![info exists urlTypes($proto)]} { + set lower [string tolower $proto] + if {![info exists urlTypes($lower)]} { unset $token return -code error "Unsupported URL type \"$proto\"" } - set defport [lindex $urlTypes($proto) 0] - set defcmd [lindex $urlTypes($proto) 1] + set defport [lindex $urlTypes($lower) 0] + set defcmd [lindex $urlTypes($lower) 1] if {$port eq ""} { set port $defport @@ -523,11 +548,10 @@ proc http::geturl { url args } { # If a timeout is specified we set up the after event and arrange for an # asynchronous socket connection. - set sockopts [list] + set sockopts [list -async] if {$state(-timeout) > 0} { set state(after) [after $state(-timeout) \ [list http::reset $token timeout]] - lappend sockopts -async } # If we are using the proxy, we must pass in the full URL that includes @@ -565,15 +589,15 @@ proc http::geturl { url args } { lappend sockopts -myaddr $state(-myaddr) } if {[catch {eval $defcmd $sockopts $targetAddr} sock]} { - # something went wrong while trying to establish the - # connection. Clean up after events and such, but DON'T call the - # command callback (if available) because we're going to throw an - # exception from here instead. + # something went wrong while trying to establish the connection. + # Clean up after events and such, but DON'T call the command + # callback (if available) because we're going to throw an + # exception from here instead. set state(sock) $sock - Finish $token "" 1 - cleanup $token - return -code error $sock + Finish $token "" 1 + cleanup $token + return -code error $sock } } set state(sock) $sock @@ -583,16 +607,21 @@ proc http::geturl { url args } { set socketmap($state(socketinfo)) $sock } - # Wait for the connection to complete. + if {![info exists phost]} { + set phost "" + } + fileevent $sock writable [list http::Connect $token $proto $phost $srvurl] - if {$state(-timeout) > 0} { - fileevent $sock writable [list http::Connect $token] + # Wait for the connection to complete. + if {![info exists state(-command)]} { + # geturl does EVERYTHING asynchronously, so if the user + # calls it synchronously, we just do a wait here. http::wait $token if {![info exists state]} { # If we timed out then Finish has been called and the users - # command callback may have cleaned up the token. If so - # we end up here with nothing left to do. + # command callback may have cleaned up the token. If so we end up + # here with nothing left to do. return $token } elseif {$state(status) eq "error"} { # Something went wrong while trying to establish the connection. @@ -602,13 +631,30 @@ proc http::geturl { url args } { set err [lindex $state(error) 0] cleanup $token return -code error $err - } elseif {$state(status) ne "connect"} { - # Likely to be connection timeout - return $token } - set state(status) "" } + return $token +} + + +proc http::Connected { token proto phost srvurl} { + variable http + variable urlTypes + + variable $token + upvar 0 $token state + + # Set back the variables needed here + set sock $state(sock) + set isQueryChannel [info exists state(-querychannel)] + set isQuery [info exists state(-query)] + set host [lindex [split $state(socketinfo) :] 0] + set port [lindex [split $state(socketinfo) :] 1] + + set lower [string tolower $proto] + set defport [lindex $urlTypes($lower) 0] + # Send data in cr-lf format, but accept any line terminators fconfigure $sock -translation {auto crlf} -buffersize $state(-blocksize) @@ -640,17 +686,21 @@ proc http::geturl { url args } { if {[info exists state(-method)] && $state(-method) ne ""} { set how $state(-method) } - + # We cannot handle chunked encodings with -handler, so force HTTP/1.0 + # until we can manage this. + if {[info exists state(-handler)]} { + set state(-protocol) 1.0 + } if {[catch { puts $sock "$how $srvurl HTTP/$state(-protocol)" puts $sock "Accept: $http(-accept)" array set hdrs $state(-headers) if {[info exists hdrs(Host)]} { - # Allow Host spoofing [Bug 928154] + # Allow Host spoofing. [Bug 928154] puts $sock "Host: $hdrs(Host)" } elseif {$port == $defport} { - # Don't add port in this case, to handle broken servers. - # [Bug #504508] + # Don't add port in this case, to handle broken servers. [Bug + # #504508] puts $sock "Host: $host" } else { puts $sock "Host: $host:$port" @@ -658,20 +708,26 @@ proc http::geturl { url args } { unset hdrs puts $sock "User-Agent: $http(-useragent)" if {$state(-protocol) == 1.0 && $state(-keepalive)} { - puts $sock "Connection: keep-alive" + puts $sock "Connection: keep-alive" } if {$state(-protocol) > 1.0 && !$state(-keepalive)} { - puts $sock "Connection: close" ;# RFC2616 sec 8.1.2.1 + puts $sock "Connection: close" ;# RFC2616 sec 8.1.2.1 } if {[info exists phost] && ($phost ne "") && $state(-keepalive)} { - puts $sock "Proxy-Connection: Keep-Alive" + puts $sock "Proxy-Connection: Keep-Alive" } set accept_encoding_seen 0 + set content_type_seen 0 foreach {key value} $state(-headers) { - if {[string equal -nocase $key "host"]} { continue } - if {[string equal -nocase $key "accept-encoding"]} { - set accept_encoding_seen 1 - } + if {[string equal -nocase $key "host"]} { + continue + } + if {[string equal -nocase $key "accept-encoding"]} { + set accept_encoding_seen 1 + } + if {[string equal -nocase $key "content-type"]} { + set content_type_seen 1 + } set value [string map [list \n "" \r ""] $value] set key [string trim $key] if {[string equal -nocase $key "content-length"]} { @@ -682,11 +738,8 @@ proc http::geturl { url args } { puts $sock "$key: $value" } } - # Soft zlib dependency check - no package require - if {!$accept_encoding_seen && [llength [package provide zlib]] - && !([info exists state(-channel)] || [info exists state(-handler)]) - } { - puts $sock "Accept-Encoding: gzip, identity, *;q=0.1" + if {!$accept_encoding_seen && ![info exists state(-handler)]} { + puts $sock "Accept-Encoding: deflate,gzip,compress" } if {$isQueryChannel && $state(querylength) == 0} { # Try to determine size of data in channel. If we cannot seek, the @@ -707,16 +760,19 @@ proc http::geturl { url args } { # It is possible to have both the read and write fileevents active at # this point. The only scenario it seems to affect is a server that # closes the connection without reading the POST data. (e.g., early - # versions TclHttpd in various error cases). Depending on the platform, - # the client may or may not be able to get the response from the server - # because of the error it will get trying to write the post data. - # Having both fileevents active changes the timing and the behavior, - # but no two platforms (among Solaris, Linux, and NT) behave the same, - # and none behave all that well in any case. Servers should always read - # their POST data if they expect the client to read their response. + # versions TclHttpd in various error cases). Depending on the + # platform, the client may or may not be able to get the response from + # the server because of the error it will get trying to write the post + # data. Having both fileevents active changes the timing and the + # behavior, but no two platforms (among Solaris, Linux, and NT) behave + # the same, and none behave all that well in any case. Servers should + # always read their POST data if they expect the client to read their + # response. if {$isQuery || $isQueryChannel} { - puts $sock "Content-Type: $state(-type)" + if {!$content_type_seen} { + puts $sock "Content-Type: $state(-type)" + } if {!$contDone} { puts $sock "Content-Length: $state(querylength)" } @@ -729,35 +785,17 @@ proc http::geturl { url args } { fileevent $sock readable [list http::Event $sock $token] } - if {! [info exists state(-command)]} { - # geturl does EVERYTHING asynchronously, so if the user calls it - # synchronously, we just do a wait here. - - wait $token - if {$state(status) eq "error"} { - # Something went wrong, so throw the exception, and the - # enclosing catch will do cleanup. - return -code error [lindex $state(error) 0] - } - } } err]} { # The socket probably was never connected, or the connection dropped # later. - # Clean up after events and such, but DON'T call the command callback - # (if available) because we're going to throw an exception from here - # instead. - # if state(status) is error, it means someone's already called Finish # to do the above-described clean up. if {$state(status) ne "error"} { - Finish $token $err 1 + Finish $token $err } - cleanup $token - return -code error $err } - return $token } # Data access functions: @@ -772,7 +810,9 @@ proc http::data {token} { return $state(body) } proc http::status {token} { - if {![info exists $token]} { return "error" } + if {![info exists $token]} { + return "error" + } variable $token upvar 0 $token state return $state(status) @@ -839,16 +879,18 @@ proc http::cleanup {token} { # Sets the status of the connection, which unblocks # the waiting geturl call -proc http::Connect {token} { +proc http::Connect {token proto phost srvurl} { variable $token upvar 0 $token state - global errorInfo errorCode - if {[eof $state(sock)] || - [string length [fconfigure $state(sock) -error]]} { - Finish $token "connect failed [fconfigure $state(sock) -error]" 1 + set err "due to unexpected EOF" + if { + [eof $state(sock)] || + [set err [fconfigure $state(sock) -error]] ne "" + } { + Finish $token "connect failed $err" } else { - set state(status) connect fileevent $state(sock) writable {} + ::http::Connected $token $proto $phost $srvurl } return } @@ -883,7 +925,6 @@ proc http::Write {token} { incr state(queryoffset) $state(-queryblocksize) if {$state(queryoffset) >= $state(querylength)} { set state(queryoffset) $state(querylength) - puts $sock "" set done 1 } } else { @@ -934,8 +975,8 @@ proc http::Event {sock token} { if {![info exists state]} { Log "Event $sock with invalid token '$token' - remote close?" - if {! [eof $sock]} { - if {[string length [set d [read $sock]]] != 0} { + if {![eof $sock]} { + if {[set d [read $sock]] ne ""} { Log "WARNING: additional data left on closed socket" } } @@ -943,9 +984,10 @@ proc http::Event {sock token} { return } if {$state(state) eq "connecting"} { - set state(state) "header" if {[catch {gets $sock state(http)} n]} { return [Finish $token $n] + } elseif {$n >= 0} { + set state(state) "header" } } elseif {$state(state) eq "header"} { if {[catch {gets $sock line} n]} { @@ -953,7 +995,9 @@ proc http::Event {sock token} { } elseif {$n == 0} { # We have now read all headers # We ignore HTTP/1.1 100 Continue returns. RFC2616 sec 8.2.3 - if {$state(http) == "" || [lindex $state(http) 1] == 100} { return } + if {$state(http) == "" || ([regexp {^\S+\s(\d+)} $state(http) {} x] && $x == 100)} { + return + } set state(state) body @@ -963,15 +1007,16 @@ proc http::Event {sock token} { return } - # For non-chunked transfer we may have no body -- in this case we - # may get no further file event if the connection doesn't close and - # no more data is sent. We can tell and must finish up now - not - # later. - if {!(([info exists state(connection)] - && ($state(connection) eq "close")) - || [info exists state(transfer)]) - && $state(totalsize) == 0 - } then { + # For non-chunked transfer we may have no body - in this case we + # may get no further file event if the connection doesn't close + # and no more data is sent. We can tell and must finish up now - + # not later. + if { + !(([info exists state(connection)] + && ($state(connection) eq "close")) + || [info exists state(transfer)]) + && ($state(totalsize) == 0) + } { Log "body size is 0 and no events likely - complete." Eof $token return @@ -980,22 +1025,22 @@ proc http::Event {sock token} { # We have to use binary translation to count bytes properly. fconfigure $sock -translation binary - if {$state(-binary) || ![string match -nocase text* $state(type)]} { + if { + $state(-binary) || ![string match -nocase text* $state(type)] + } { # Turn off conversions for non-text data set state(binary) 1 } - if {$state(binary) || [string match *gzip* $state(coding)] - || [string match *compress* $state(coding)]} { - if {[info exists state(-channel)]} { + if {[info exists state(-channel)]} { + if {$state(binary) || [llength [ContentEncoding $token]]} { fconfigure $state(-channel) -translation binary } - } - if {[info exists state(-channel)] && - ![info exists state(-handler)]} { - # Initiate a sequence of background fcopies - fileevent $sock readable {} - CopyStart $sock $token - return + if {![info exists state(-handler)]} { + # Initiate a sequence of background fcopies + fileevent $sock readable {} + CopyStart $sock $token + return + } } } elseif {$n > 0} { # Process header lines @@ -1004,8 +1049,14 @@ proc http::Event {sock token} { content-type { set state(type) [string trim [string tolower $value]] # grab the optional charset information - regexp -nocase {charset\s*=\s*(\S+?);?} \ - $state(type) -> state(charset) + if {[regexp -nocase \ + {charset\s*=\s*\"((?:[^""]|\\\")*)\"} \ + $state(type) -> cs]} { + set state(charset) [string map {{\"} \"} $cs] + } else { + regexp -nocase {charset\s*=\s*(\S+?);?} \ + $state(type) -> state(charset) + } } content-length { set state(totalsize) [string trim $value] @@ -1041,8 +1092,10 @@ proc http::Event {sock token} { Log "final chunk part" Eof $token } - } elseif {[info exists state(transfer)] - && $state(transfer) eq "chunked"} { + } elseif { + [info exists state(transfer)] + && $state(transfer) eq "chunked" + } { set size 0 set chunk [getTextLine $sock] set n [string length $chunk] @@ -1079,8 +1132,10 @@ proc http::Event {sock token} { incr state(currentsize) $n } # If Content-Length - check for end of data. - if {($state(totalsize) > 0) - && ($state(currentsize) >= $state(totalsize))} { + if { + ($state(totalsize) > 0) + && ($state(currentsize) >= $state(totalsize)) + } { Eof $token } } @@ -1137,14 +1192,54 @@ proc http::getTextLine {sock} { # Side Effects # This closes the connection upon error -proc http::CopyStart {sock token} { - variable $token +proc http::CopyStart {sock token {initial 1}} { + upvar #0 $token state + if {[info exists state(transfer)] && $state(transfer) eq "chunked"} { + foreach coding [ContentEncoding $token] { + lappend state(zlib) [zlib stream $coding] + } + make-transformation-chunked $sock [namespace code [list CopyChunk $token]] + } else { + if {$initial} { + foreach coding [ContentEncoding $token] { + zlib push $coding $sock + } + } + if {[catch { + fcopy $sock $state(-channel) -size $state(-blocksize) -command \ + [list http::CopyDone $token] + } err]} { + Finish $token $err + } + } +} + +proc http::CopyChunk {token chunk} { upvar 0 $token state - if {[catch { - fcopy $sock $state(-channel) -size $state(-blocksize) -command \ - [list http::CopyDone $token] - } err]} { - Finish $token $err + if {[set count [string length $chunk]]} { + incr state(currentsize) $count + if {[info exists state(zlib)]} { + foreach stream $state(zlib) { + set chunk [$stream add $chunk] + } + } + puts -nonewline $state(-channel) $chunk + if {[info exists state(-progress)]} { + eval [linsert $state(-progress) end \ + $token $state(totalsize) $state(currentsize)] + } + } else { + Log "CopyChunk Finish $token" + if {[info exists state(zlib)]} { + set excess "" + foreach stream $state(zlib) { + catch {set excess [$stream add -finalize $excess]} + } + puts -nonewline $state(-channel) $excess + foreach stream $state(zlib) { $stream close } + unset state(zlib) + } + Eof $token ;# FIX ME: pipelining. } } @@ -1174,7 +1269,7 @@ proc http::CopyDone {token count {error {}}} { } elseif {[catch {eof $sock} iseof] || $iseof} { Eof $token } else { - CopyStart $sock $token + CopyStart $sock $token 0 } } @@ -1198,30 +1293,31 @@ proc http::Eof {token {force 0}} { set state(status) ok } - if {($state(coding) eq "gzip") && [string length $state(body)] > 0} { - if {[catch { - set state(body) [Gunzip $state(body)] - } err]} { - return [Finish $token $err] - } - } - - if {!$state(binary)} { + if {[string length $state(body)] > 0} { + if {[catch { + foreach coding [ContentEncoding $token] { + set state(body) [zlib $coding $state(body)] + } + } err]} { + Log "error doing $coding '$state(body)'" + return [Finish $token $err] + } - # If we are getting text, set the incoming channel's - # encoding correctly. iso8859-1 is the RFC default, but - # this could be any IANA charset. However, we only know - # how to convert what we have encodings for. + if {!$state(binary)} { + # If we are getting text, set the incoming channel's encoding + # correctly. iso8859-1 is the RFC default, but this could be any IANA + # charset. However, we only know how to convert what we have + # encodings for. - set enc [CharsetToEncoding $state(charset)] - if {$enc ne "binary"} { - set state(body) [encoding convertfrom $enc $state(body)] - } + set enc [CharsetToEncoding $state(charset)] + if {$enc ne "binary"} { + set state(body) [encoding convertfrom $enc $state(body)] + } - # Translate text line endings. - set state(body) [string map {\r\n \n \r \n} $state(body)] + # Translate text line endings. + set state(body) [string map {\r\n \n \r \n} $state(body)] + } } - Finish $token } @@ -1297,7 +1393,7 @@ proc http::mapReply {string} { } set converted [string map $formMap $string] if {[string match "*\[\u0100-\uffff\]*" $converted]} { - regexp {[\u0100-\uffff]} $converted badChar + regexp "\[\u0100-\uffff\]" $converted badChar # Return this error message for maximum compatability... :^/ return -code error \ "can't read \"formMap($badChar)\": no such element in array" @@ -1317,8 +1413,10 @@ proc http::mapReply {string} { proc http::ProxyRequired {host} { variable http if {[info exists http(-proxyhost)] && [string length $http(-proxyhost)]} { - if {![info exists http(-proxyport)] || \ - ![string length $http(-proxyport)]} { + if { + ![info exists http(-proxyport)] || + ![string length $http(-proxyport)] + } { set http(-proxyport) 8080 } return [list $http(-proxyhost) $http(-proxyport)] @@ -1327,33 +1425,33 @@ proc http::ProxyRequired {host} { # http::CharsetToEncoding -- # -# Tries to map a given IANA charset to a tcl encoding. -# If no encoding can be found, returns binary. +# Tries to map a given IANA charset to a tcl encoding. If no encoding +# can be found, returns binary. # proc http::CharsetToEncoding {charset} { variable encodings set charset [string tolower $charset] - if {[regexp {iso-?8859-([0-9]+)} $charset - num]} { + if {[regexp {iso-?8859-([0-9]+)} $charset -> num]} { set encoding "iso8859-$num" - } elseif {[regexp {iso-?2022-(jp|kr)} $charset - ext]} { + } elseif {[regexp {iso-?2022-(jp|kr)} $charset -> ext]} { set encoding "iso2022-$ext" - } elseif {[regexp {shift[-_]?js} $charset -]} { + } elseif {[regexp {shift[-_]?js} $charset]} { set encoding "shiftjis" - } elseif {[regexp {(windows|cp)-?([0-9]+)} $charset - - num]} { + } elseif {[regexp {(?:windows|cp)-?([0-9]+)} $charset -> num]} { set encoding "cp$num" } elseif {$charset eq "us-ascii"} { set encoding "ascii" - } elseif {[regexp {(iso-?)?lat(in)?-?([0-9]+)} $charset - - - num]} { + } elseif {[regexp {(?:iso-?)?lat(?:in)?-?([0-9]+)} $charset -> num]} { switch -- $num { 5 {set encoding "iso8859-9"} - 1 - - 2 - - 3 {set encoding "iso8859-$num"} + 1 - 2 - 3 { + set encoding "iso8859-$num" + } } } else { - # other charset, like euc-xx, utf-8,... may directly maps to encoding + # other charset, like euc-xx, utf-8,... may directly map to encoding set encoding $charset } set idx [lsearch -exact $encodings $encoding] @@ -1364,58 +1462,57 @@ proc http::CharsetToEncoding {charset} { } } -# http::Gunzip -- -# -# Decompress data transmitted using the gzip transfer coding. -# - -# FIX ME: redo using zlib sinflate -proc http::Gunzip {data} { - binary scan $data Scb5icc magic method flags time xfl os - set pos 10 - if {$magic != 0x1f8b} { - return -code error "invalid data: supplied data is not in gzip format" - } - if {$method != 8} { - return -code error "invalid compression method" - } - - foreach {f_text f_crc f_extra f_name f_comment} [split $flags ""] break - set extra "" - if { $f_extra } { - binary scan $data @${pos}S xlen - incr pos 2 - set extra [string range $data $pos $xlen] - set pos [incr xlen] - } - - set name "" - if { $f_name } { - set ndx [string first \0 $data $pos] - set name [string range $data $pos $ndx] - set pos [incr ndx] - } - - set comment "" - if { $f_comment } { - set ndx [string first \0 $data $pos] - set comment [string range $data $pos $ndx] - set pos [incr ndx] - } - - set fcrc "" - if { $f_crc } { - set fcrc [string range $data $pos [incr pos]] - incr pos +# Return the list of content-encoding transformations we need to do in order. +proc http::ContentEncoding {token} { + upvar 0 $token state + set r {} + if {[info exists state(coding)]} { + foreach coding [split $state(coding) ,] { + switch -exact -- $coding { + deflate { lappend r inflate } + gzip - x-gzip { lappend r gunzip } + compress - x-compress { lappend r decompress } + identity {} + default { + return -code error "unsupported content-encoding \"$coding\"" + } + } + } } + return $r +} - binary scan [string range $data end-7 end] ii crc size - set inflated [zlib inflate [string range $data $pos end-8]] - set chk [zlib crc32 $inflated] - if { ($crc & 0xffffffff) != ($chk & 0xffffffff)} { - return -code error "invalid data: checksum mismatch $crc != $chk" - } - return $inflated +proc http::make-transformation-chunked {chan command} { + set lambda {{chan command} { + set data "" + set size -1 + yield + while {1} { + chan configure $chan -translation {crlf binary} + while {[gets $chan line] < 1} { yield } + chan configure $chan -translation {binary binary} + if {[scan $line %x size] != 1} { return -code error "invalid size: \"$line\"" } + set chunk "" + while {$size && ![chan eof $chan]} { + set part [chan read $chan $size] + incr size -[string length $part] + append chunk $part + } + if {[catch { + uplevel #0 [linsert $command end $chunk] + }]} { + http::Log "Error in callback: $::errorInfo" + } + if {[string length $chunk] == 0} { + # channel might have been closed in the callback + catch {chan event $chan readable {}} + return + } + } + }} + coroutine dechunk$chan ::apply $lambda $chan $command + chan event $chan readable [namespace origin dechunk$chan] + return } # Local variables: |