summaryrefslogtreecommitdiffstats
path: root/library/http/http.tcl
diff options
context:
space:
mode:
Diffstat (limited to 'library/http/http.tcl')
-rw-r--r--library/http/http.tcl633
1 files changed, 365 insertions, 268 deletions
diff --git a/library/http/http.tcl b/library/http/http.tcl
index 06829cd..a6b2bfd 100644
--- a/library/http/http.tcl
+++ b/library/http/http.tcl
@@ -1,19 +1,17 @@
# http.tcl --
#
# Client-side HTTP for GET, POST, and HEAD commands. These routines can
-# be used in untrusted code that uses the Safesock security policy. These
-# procedures use a callback interface to avoid using vwait, which is not
-# defined in the safe base.
+# be used in untrusted code that uses the Safesock security policy.
+# These procedures use a callback interface to avoid using vwait, which
+# is not defined in the safe base.
#
# See the file "license.terms" for information on usage and redistribution of
# this file, and for a DISCLAIMER OF ALL WARRANTIES.
-#
-# RCS: @(#) $Id: http.tcl,v 1.67.2.5 2008/10/23 23:34:32 patthoyts Exp $
-package require Tcl 8.4
-# Keep this in sync with pkgIndex.tcl and with the install directories
-# in Makefiles
-package provide http 2.7.2
+package require Tcl 8.6
+# Keep this in sync with pkgIndex.tcl and with the install directories in
+# Makefiles
+package provide http 2.8.8
namespace eval http {
# Allow resourcing to not clobber existing data
@@ -27,23 +25,29 @@ namespace eval http {
-proxyfilter http::ProxyRequired
-urlencoding utf-8
}
- set http(-useragent) "Tcl http client package [package provide http]"
+ # We need a useragent string of this style or various servers will refuse to
+ # send us compressed content even when we ask for it. This follows the
+ # de-facto layout of user-agent strings in current browsers.
+ set http(-useragent) "Mozilla/5.0\
+ ([string totitle $::tcl_platform(platform)]; U;\
+ $::tcl_platform(os) $::tcl_platform(osVersion))\
+ http/[package provide http] Tcl/[package provide Tcl]"
}
proc init {} {
# Set up the map for quoting chars. RFC3986 Section 2.3 say percent
- # encode all except: "... percent-encoded octets in the ranges of ALPHA
- # (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period (%2E),
- # underscore (%5F), or tilde (%7E) should not be created by URI
+ # encode all except: "... percent-encoded octets in the ranges of
+ # ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period
+ # (%2E), underscore (%5F), or tilde (%7E) should not be created by URI
# producers ..."
for {set i 0} {$i <= 256} {incr i} {
set c [format %c $i]
if {![string match {[-._~a-zA-Z0-9]} $c]} {
- set map($c) %[format %.2x $i]
+ set map($c) %[format %.2X $i]
}
}
# These are handled specially
- set map(\n) %0d%0a
+ set map(\n) %0D%0A
variable formMap [array get map]
# Create a map for HTTP/1.1 open sockets
@@ -94,22 +98,22 @@ namespace eval http {
# Arguments:
# msg Message to output
#
-proc http::Log {args} {}
+if {[info command http::Log] eq {}} {proc http::Log {args} {}}
# http::register --
#
# See documentation for details.
#
# Arguments:
-# proto URL protocol prefix, e.g. https
-# port Default port for protocol
-# command Command to use to create socket
+# proto URL protocol prefix, e.g. https
+# port Default port for protocol
+# command Command to use to create socket
# Results:
# list of port and command that was registered.
proc http::register {proto port command} {
variable urlTypes
- set urlTypes($proto) [list $port $command]
+ set urlTypes([string tolower $proto]) [list $port $command]
}
# http::unregister --
@@ -117,17 +121,18 @@ proc http::register {proto port command} {
# Unregisters URL protocol handler
#
# Arguments:
-# proto URL protocol prefix, e.g. https
+# proto URL protocol prefix, e.g. https
# Results:
# list of port and command that was unregistered.
proc http::unregister {proto} {
variable urlTypes
- if {![info exists urlTypes($proto)]} {
+ set lower [string tolower $proto]
+ if {![info exists urlTypes($lower)]} {
return -code error "unsupported url type \"$proto\""
}
- set old $urlTypes($proto)
- unset urlTypes($proto)
+ set old $urlTypes($lower)
+ unset urlTypes($lower)
return $old
}
@@ -152,21 +157,19 @@ proc http::config {args} {
return $result
}
set options [string map {- ""} $options]
- set pat ^-([join $options |])$
+ set pat ^-(?:[join $options |])$
if {[llength $args] == 1} {
set flag [lindex $args 0]
- if {[regexp -- $pat $flag]} {
- return $http($flag)
- } else {
+ if {![regexp -- $pat $flag]} {
return -code error "Unknown option $flag, must be: $usage"
}
+ return $http($flag)
} else {
foreach {flag value} $args {
- if {[regexp -- $pat $flag]} {
- set http($flag) $value
- } else {
+ if {![regexp -- $pat $flag]} {
return -code error "Unknown option $flag, must be: $usage"
}
+ set http($flag) $value
}
}
}
@@ -179,14 +182,14 @@ proc http::config {args} {
# token Connection token.
# errormsg (optional) If set, forces status to error.
# skipCB (optional) If set, don't call the -command callback. This
-# is useful when geturl wants to throw an exception instead
-# of calling the callback. That way, the same error isn't
-# reported to two places.
+# is useful when geturl wants to throw an exception instead
+# of calling the callback. That way, the same error isn't
+# reported to two places.
#
# Side Effects:
# Closes the socket
-proc http::Finish { token {errormsg ""} {skipCB 0}} {
+proc http::Finish {token {errormsg ""} {skipCB 0}} {
variable $token
upvar 0 $token state
global errorInfo errorCode
@@ -194,30 +197,31 @@ proc http::Finish { token {errormsg ""} {skipCB 0}} {
set state(error) [list $errormsg $errorInfo $errorCode]
set state(status) "error"
}
- if {($state(status) eq "timeout") || ($state(status) eq "error")
- || ([info exists state(connection)] && ($state(connection) eq "close"))
+ if {
+ ($state(status) eq "timeout") || ($state(status) eq "error") ||
+ ([info exists state(connection)] && ($state(connection) eq "close"))
} {
CloseSocket $state(sock) $token
}
- if {[info exists state(after)]} { after cancel $state(after) }
- if {[info exists state(-command)] && !$skipCB} {
- if {[catch {eval $state(-command) {$token}} err]} {
- if {$errormsg eq ""} {
- set state(error) [list $err $errorInfo $errorCode]
- set state(status) error
- }
+ if {[info exists state(after)]} {
+ after cancel $state(after)
+ }
+ if {[info exists state(-command)] && !$skipCB
+ && ![info exists state(done-command-cb)]} {
+ set state(done-command-cb) yes
+ if {[catch {eval $state(-command) {$token}} err] && $errormsg eq ""} {
+ set state(error) [list $err $errorInfo $errorCode]
+ set state(status) error
}
- # Command callback may already have unset our state
- unset -nocomplain state(-command)
}
}
# http::CloseSocket -
#
-# Close a socket and remove it from the persistent sockets table.
-# If possible an http token is included here but when we are called
-# from a fileevent on remote closure we need to find the correct
-# entry - hence the second section.
+# Close a socket and remove it from the persistent sockets table. If
+# possible an http token is included here but when we are called from a
+# fileevent on remote closure we need to find the correct entry - hence
+# the second section.
proc ::http::CloseSocket {s {token {}}} {
variable socketmap
@@ -227,23 +231,27 @@ proc ::http::CloseSocket {s {token {}}} {
variable $token
upvar 0 $token state
if {[info exists state(socketinfo)]} {
- set conn_id $state(socketinfo)
+ set conn_id $state(socketinfo)
}
} else {
set map [array get socketmap]
set ndx [lsearch -exact $map $s]
if {$ndx != -1} {
- incr ndx -1
- set conn_id [lindex $map $ndx]
+ incr ndx -1
+ set conn_id [lindex $map $ndx]
}
}
if {$conn_id eq {} || ![info exists socketmap($conn_id)]} {
Log "Closing socket $s (no connection info)"
- if {[catch {close $s} err]} { Log "Error: $err" }
+ if {[catch {close $s} err]} {
+ Log "Error: $err"
+ }
} else {
if {[info exists socketmap($conn_id)]} {
Log "Closing connection $conn_id (sock $socketmap($conn_id))"
- if {[catch {close $socketmap($conn_id)} err]} { Log "Error: $err" }
+ if {[catch {close $socketmap($conn_id)} err]} {
+ Log "Error: $err"
+ }
unset socketmap($conn_id)
} else {
Log "Cannot close connection $conn_id - no socket in socket map"
@@ -262,7 +270,7 @@ proc ::http::CloseSocket {s {token {}}} {
# Side Effects:
# See Finish
-proc http::reset { token {why reset} } {
+proc http::reset {token {why reset}} {
variable $token
upvar 0 $token state
set state(status) $why
@@ -285,10 +293,10 @@ proc http::reset { token {why reset} } {
# args Option value pairs. Valid options include:
# -blocksize, -validate, -headers, -timeout
# Results:
-# Returns a token for this connection. This token is the name of an array
-# that the caller should unset to garbage collect the state.
+# Returns a token for this connection. This token is the name of an
+# array that the caller should unset to garbage collect the state.
-proc http::geturl { url args } {
+proc http::geturl {url args} {
variable http
variable urlTypes
variable defaultCharset
@@ -351,14 +359,17 @@ proc http::geturl { url args } {
}
set usage [join [lsort $options] ", "]
set options [string map {- ""} $options]
- set pat ^-([join $options |])$
+ set pat ^-(?:[join $options |])$
foreach {flag value} $args {
if {[regexp -- $pat $flag]} {
# Validate numbers
- if {[info exists type($flag)] &&
- ![string is $type($flag) -strict $value]} {
+ if {
+ [info exists type($flag)] &&
+ ![string is $type($flag) -strict $value]
+ } {
unset $token
- return -code error "Bad value for $flag ($value), must be $type($flag)"
+ return -code error \
+ "Bad value for $flag ($value), must be $type($flag)"
}
set state($flag) $value
} else {
@@ -384,20 +395,25 @@ proc http::geturl { url args } {
# First, before the colon, is the protocol scheme (e.g. http)
# Second, for HTTP-like protocols, is the authority
# The authority is preceded by // and lasts up to (but not including)
- # the following / and it identifies up to four parts, of which only one,
- # the host, is required (if an authority is present at all). All other
- # parts of the authority (user name, password, port number) are optional.
+ # the following / or ? and it identifies up to four parts, of which
+ # only one, the host, is required (if an authority is present at all).
+ # All other parts of the authority (user name, password, port number)
+ # are optional.
# Third is the resource name, which is split into two parts at a ?
# The first part (from the single "/" up to "?") is the path, and the
# second part (from that "?" up to "#") is the query. *HOWEVER*, we do
# not need to separate them; we send the whole lot to the server.
+ # Both, path and query are allowed to be missing, including their
+ # delimiting character.
# Fourth is the fragment identifier, which is everything after the first
# "#" in the URL. The fragment identifier MUST NOT be sent to the server
# and indeed, we don't bother to validate it (it could be an error to
# pass it in here, but it's cheap to strip).
#
# An example of a URL that has all the parts:
- # http://jschmoe:xyzzy@www.bogus.net:8000/foo/bar.tml?q=foo#changes
+ #
+ # http://jschmoe:xyzzy@www.bogus.net:8000/foo/bar.tml?q=foo#changes
+ #
# The "http" is the protocol, the user is "jschmoe", the password is
# "xyzzy", the host is "www.bogus.net", the port is "8000", the path is
# "/foo/bar.tml", the query is "q=foo", and the fragment is "changes".
@@ -405,12 +421,10 @@ proc http::geturl { url args } {
# Note that the RE actually combines the user and password parts, as
# recommended in RFC 3986. Indeed, that RFC states that putting passwords
# in URLs is a Really Bad Idea, something with which I would agree utterly.
- # Also note that we do not currently support IPv6 addresses.
#
# From a validation perspective, we need to ensure that the parts of the
- # URL that are going to the server are correctly encoded.
- # This is only done if $state(-strict) is true (inherited from
- # $::http::strict).
+ # URL that are going to the server are correctly encoded. This is only
+ # done if $state(-strict) is true (inherited from $::http::strict).
set URLmatcher {(?x) # this is _expanded_ syntax
^
@@ -421,10 +435,13 @@ proc http::geturl { url args } {
[^@/\#?]+ # <userinfo part of authority>
) @
)?
- ( [^/:\#?]+ ) # <host part of authority>
+ ( # <host part of authority>
+ [^/:\#?]+ | # host name or IPv4 address
+ \[ [^/\#?]+ \] # IPv6 address in square brackets
+ )
(?: : (\d+) )? # <port part of authority>
)?
- ( / [^\#?]* (?: \? [^\#?]* )?)? # <path> (including query)
+ ( [/\?] [^\#]*)? # <path> (including query)
(?: \# (.*) )? # <fragment>
$
}
@@ -435,6 +452,7 @@ proc http::geturl { url args } {
return -code error "Unsupported URL: $url"
}
# Phase two: validate
+ set host [string trim $host {[]}]; # strip square brackets from IPv6 address
if {$host eq ""} {
# Caller has to provide a host name; we do not have a "default host"
# that would enable us to handle relative URLs.
@@ -467,6 +485,12 @@ proc http::geturl { url args } {
}
}
if {$srvurl ne ""} {
+ # RFC 3986 allows empty paths (not even a /), but servers
+ # return 400 if the path in the HTTP request doesn't start
+ # with / , so add it here if needed.
+ if {[string index $srvurl 0] ne "/"} {
+ set srvurl /$srvurl
+ }
# Check for validity according to RFC 3986, Appendix A
set validityRE {(?xi)
^
@@ -481,7 +505,7 @@ proc http::geturl { url args } {
# Provide a better error message in this error case
if {[regexp {(?i)%(?![0-9a-f][0-9a-f])..} $srvurl bad]} {
return -code error \
- "Illegal encoding character usage \"$bad\" in URL path"
+ "Illegal encoding character usage \"$bad\" in URL path"
}
return -code error "Illegal characters in URL path"
}
@@ -491,12 +515,13 @@ proc http::geturl { url args } {
if {$proto eq ""} {
set proto http
}
- if {![info exists urlTypes($proto)]} {
+ set lower [string tolower $proto]
+ if {![info exists urlTypes($lower)]} {
unset $token
return -code error "Unsupported URL type \"$proto\""
}
- set defport [lindex $urlTypes($proto) 0]
- set defcmd [lindex $urlTypes($proto) 1]
+ set defport [lindex $urlTypes($lower) 0]
+ set defcmd [lindex $urlTypes($lower) 1]
if {$port eq ""} {
set port $defport
@@ -523,11 +548,10 @@ proc http::geturl { url args } {
# If a timeout is specified we set up the after event and arrange for an
# asynchronous socket connection.
- set sockopts [list]
+ set sockopts [list -async]
if {$state(-timeout) > 0} {
set state(after) [after $state(-timeout) \
[list http::reset $token timeout]]
- lappend sockopts -async
}
# If we are using the proxy, we must pass in the full URL that includes
@@ -565,15 +589,15 @@ proc http::geturl { url args } {
lappend sockopts -myaddr $state(-myaddr)
}
if {[catch {eval $defcmd $sockopts $targetAddr} sock]} {
- # something went wrong while trying to establish the
- # connection. Clean up after events and such, but DON'T call the
- # command callback (if available) because we're going to throw an
- # exception from here instead.
+ # something went wrong while trying to establish the connection.
+ # Clean up after events and such, but DON'T call the command
+ # callback (if available) because we're going to throw an
+ # exception from here instead.
set state(sock) $sock
- Finish $token "" 1
- cleanup $token
- return -code error $sock
+ Finish $token "" 1
+ cleanup $token
+ return -code error $sock
}
}
set state(sock) $sock
@@ -583,16 +607,21 @@ proc http::geturl { url args } {
set socketmap($state(socketinfo)) $sock
}
- # Wait for the connection to complete.
+ if {![info exists phost]} {
+ set phost ""
+ }
+ fileevent $sock writable [list http::Connect $token $proto $phost $srvurl]
- if {$state(-timeout) > 0} {
- fileevent $sock writable [list http::Connect $token]
+ # Wait for the connection to complete.
+ if {![info exists state(-command)]} {
+ # geturl does EVERYTHING asynchronously, so if the user
+ # calls it synchronously, we just do a wait here.
http::wait $token
if {![info exists state]} {
# If we timed out then Finish has been called and the users
- # command callback may have cleaned up the token. If so
- # we end up here with nothing left to do.
+ # command callback may have cleaned up the token. If so we end up
+ # here with nothing left to do.
return $token
} elseif {$state(status) eq "error"} {
# Something went wrong while trying to establish the connection.
@@ -602,13 +631,30 @@ proc http::geturl { url args } {
set err [lindex $state(error) 0]
cleanup $token
return -code error $err
- } elseif {$state(status) ne "connect"} {
- # Likely to be connection timeout
- return $token
}
- set state(status) ""
}
+ return $token
+}
+
+
+proc http::Connected { token proto phost srvurl} {
+ variable http
+ variable urlTypes
+
+ variable $token
+ upvar 0 $token state
+
+ # Set back the variables needed here
+ set sock $state(sock)
+ set isQueryChannel [info exists state(-querychannel)]
+ set isQuery [info exists state(-query)]
+ set host [lindex [split $state(socketinfo) :] 0]
+ set port [lindex [split $state(socketinfo) :] 1]
+
+ set lower [string tolower $proto]
+ set defport [lindex $urlTypes($lower) 0]
+
# Send data in cr-lf format, but accept any line terminators
fconfigure $sock -translation {auto crlf} -buffersize $state(-blocksize)
@@ -640,17 +686,21 @@ proc http::geturl { url args } {
if {[info exists state(-method)] && $state(-method) ne ""} {
set how $state(-method)
}
-
+ # We cannot handle chunked encodings with -handler, so force HTTP/1.0
+ # until we can manage this.
+ if {[info exists state(-handler)]} {
+ set state(-protocol) 1.0
+ }
if {[catch {
puts $sock "$how $srvurl HTTP/$state(-protocol)"
puts $sock "Accept: $http(-accept)"
array set hdrs $state(-headers)
if {[info exists hdrs(Host)]} {
- # Allow Host spoofing [Bug 928154]
+ # Allow Host spoofing. [Bug 928154]
puts $sock "Host: $hdrs(Host)"
} elseif {$port == $defport} {
- # Don't add port in this case, to handle broken servers.
- # [Bug #504508]
+ # Don't add port in this case, to handle broken servers. [Bug
+ # #504508]
puts $sock "Host: $host"
} else {
puts $sock "Host: $host:$port"
@@ -658,20 +708,26 @@ proc http::geturl { url args } {
unset hdrs
puts $sock "User-Agent: $http(-useragent)"
if {$state(-protocol) == 1.0 && $state(-keepalive)} {
- puts $sock "Connection: keep-alive"
+ puts $sock "Connection: keep-alive"
}
if {$state(-protocol) > 1.0 && !$state(-keepalive)} {
- puts $sock "Connection: close" ;# RFC2616 sec 8.1.2.1
+ puts $sock "Connection: close" ;# RFC2616 sec 8.1.2.1
}
if {[info exists phost] && ($phost ne "") && $state(-keepalive)} {
- puts $sock "Proxy-Connection: Keep-Alive"
+ puts $sock "Proxy-Connection: Keep-Alive"
}
set accept_encoding_seen 0
+ set content_type_seen 0
foreach {key value} $state(-headers) {
- if {[string equal -nocase $key "host"]} { continue }
- if {[string equal -nocase $key "accept-encoding"]} {
- set accept_encoding_seen 1
- }
+ if {[string equal -nocase $key "host"]} {
+ continue
+ }
+ if {[string equal -nocase $key "accept-encoding"]} {
+ set accept_encoding_seen 1
+ }
+ if {[string equal -nocase $key "content-type"]} {
+ set content_type_seen 1
+ }
set value [string map [list \n "" \r ""] $value]
set key [string trim $key]
if {[string equal -nocase $key "content-length"]} {
@@ -682,11 +738,8 @@ proc http::geturl { url args } {
puts $sock "$key: $value"
}
}
- # Soft zlib dependency check - no package require
- if {!$accept_encoding_seen && [llength [package provide zlib]]
- && !([info exists state(-channel)] || [info exists state(-handler)])
- } {
- puts $sock "Accept-Encoding: gzip, identity, *;q=0.1"
+ if {!$accept_encoding_seen && ![info exists state(-handler)]} {
+ puts $sock "Accept-Encoding: deflate,gzip,compress"
}
if {$isQueryChannel && $state(querylength) == 0} {
# Try to determine size of data in channel. If we cannot seek, the
@@ -707,16 +760,19 @@ proc http::geturl { url args } {
# It is possible to have both the read and write fileevents active at
# this point. The only scenario it seems to affect is a server that
# closes the connection without reading the POST data. (e.g., early
- # versions TclHttpd in various error cases). Depending on the platform,
- # the client may or may not be able to get the response from the server
- # because of the error it will get trying to write the post data.
- # Having both fileevents active changes the timing and the behavior,
- # but no two platforms (among Solaris, Linux, and NT) behave the same,
- # and none behave all that well in any case. Servers should always read
- # their POST data if they expect the client to read their response.
+ # versions TclHttpd in various error cases). Depending on the
+ # platform, the client may or may not be able to get the response from
+ # the server because of the error it will get trying to write the post
+ # data. Having both fileevents active changes the timing and the
+ # behavior, but no two platforms (among Solaris, Linux, and NT) behave
+ # the same, and none behave all that well in any case. Servers should
+ # always read their POST data if they expect the client to read their
+ # response.
if {$isQuery || $isQueryChannel} {
- puts $sock "Content-Type: $state(-type)"
+ if {!$content_type_seen} {
+ puts $sock "Content-Type: $state(-type)"
+ }
if {!$contDone} {
puts $sock "Content-Length: $state(querylength)"
}
@@ -729,35 +785,17 @@ proc http::geturl { url args } {
fileevent $sock readable [list http::Event $sock $token]
}
- if {! [info exists state(-command)]} {
- # geturl does EVERYTHING asynchronously, so if the user calls it
- # synchronously, we just do a wait here.
-
- wait $token
- if {$state(status) eq "error"} {
- # Something went wrong, so throw the exception, and the
- # enclosing catch will do cleanup.
- return -code error [lindex $state(error) 0]
- }
- }
} err]} {
# The socket probably was never connected, or the connection dropped
# later.
- # Clean up after events and such, but DON'T call the command callback
- # (if available) because we're going to throw an exception from here
- # instead.
-
# if state(status) is error, it means someone's already called Finish
# to do the above-described clean up.
if {$state(status) ne "error"} {
- Finish $token $err 1
+ Finish $token $err
}
- cleanup $token
- return -code error $err
}
- return $token
}
# Data access functions:
@@ -772,7 +810,9 @@ proc http::data {token} {
return $state(body)
}
proc http::status {token} {
- if {![info exists $token]} { return "error" }
+ if {![info exists $token]} {
+ return "error"
+ }
variable $token
upvar 0 $token state
return $state(status)
@@ -839,16 +879,18 @@ proc http::cleanup {token} {
# Sets the status of the connection, which unblocks
# the waiting geturl call
-proc http::Connect {token} {
+proc http::Connect {token proto phost srvurl} {
variable $token
upvar 0 $token state
- global errorInfo errorCode
- if {[eof $state(sock)] ||
- [string length [fconfigure $state(sock) -error]]} {
- Finish $token "connect failed [fconfigure $state(sock) -error]" 1
+ set err "due to unexpected EOF"
+ if {
+ [eof $state(sock)] ||
+ [set err [fconfigure $state(sock) -error]] ne ""
+ } {
+ Finish $token "connect failed $err"
} else {
- set state(status) connect
fileevent $state(sock) writable {}
+ ::http::Connected $token $proto $phost $srvurl
}
return
}
@@ -883,7 +925,6 @@ proc http::Write {token} {
incr state(queryoffset) $state(-queryblocksize)
if {$state(queryoffset) >= $state(querylength)} {
set state(queryoffset) $state(querylength)
- puts $sock ""
set done 1
}
} else {
@@ -934,8 +975,8 @@ proc http::Event {sock token} {
if {![info exists state]} {
Log "Event $sock with invalid token '$token' - remote close?"
- if {! [eof $sock]} {
- if {[string length [set d [read $sock]]] != 0} {
+ if {![eof $sock]} {
+ if {[set d [read $sock]] ne ""} {
Log "WARNING: additional data left on closed socket"
}
}
@@ -943,9 +984,10 @@ proc http::Event {sock token} {
return
}
if {$state(state) eq "connecting"} {
- set state(state) "header"
if {[catch {gets $sock state(http)} n]} {
return [Finish $token $n]
+ } elseif {$n >= 0} {
+ set state(state) "header"
}
} elseif {$state(state) eq "header"} {
if {[catch {gets $sock line} n]} {
@@ -953,7 +995,9 @@ proc http::Event {sock token} {
} elseif {$n == 0} {
# We have now read all headers
# We ignore HTTP/1.1 100 Continue returns. RFC2616 sec 8.2.3
- if {$state(http) == "" || [lindex $state(http) 1] == 100} { return }
+ if {$state(http) == "" || ([regexp {^\S+\s(\d+)} $state(http) {} x] && $x == 100)} {
+ return
+ }
set state(state) body
@@ -963,15 +1007,16 @@ proc http::Event {sock token} {
return
}
- # For non-chunked transfer we may have no body -- in this case we
- # may get no further file event if the connection doesn't close and
- # no more data is sent. We can tell and must finish up now - not
- # later.
- if {!(([info exists state(connection)]
- && ($state(connection) eq "close"))
- || [info exists state(transfer)])
- && $state(totalsize) == 0
- } then {
+ # For non-chunked transfer we may have no body - in this case we
+ # may get no further file event if the connection doesn't close
+ # and no more data is sent. We can tell and must finish up now -
+ # not later.
+ if {
+ !(([info exists state(connection)]
+ && ($state(connection) eq "close"))
+ || [info exists state(transfer)])
+ && ($state(totalsize) == 0)
+ } {
Log "body size is 0 and no events likely - complete."
Eof $token
return
@@ -980,22 +1025,22 @@ proc http::Event {sock token} {
# We have to use binary translation to count bytes properly.
fconfigure $sock -translation binary
- if {$state(-binary) || ![string match -nocase text* $state(type)]} {
+ if {
+ $state(-binary) || ![string match -nocase text* $state(type)]
+ } {
# Turn off conversions for non-text data
set state(binary) 1
}
- if {$state(binary) || [string match *gzip* $state(coding)]
- || [string match *compress* $state(coding)]} {
- if {[info exists state(-channel)]} {
+ if {[info exists state(-channel)]} {
+ if {$state(binary) || [llength [ContentEncoding $token]]} {
fconfigure $state(-channel) -translation binary
}
- }
- if {[info exists state(-channel)] &&
- ![info exists state(-handler)]} {
- # Initiate a sequence of background fcopies
- fileevent $sock readable {}
- CopyStart $sock $token
- return
+ if {![info exists state(-handler)]} {
+ # Initiate a sequence of background fcopies
+ fileevent $sock readable {}
+ CopyStart $sock $token
+ return
+ }
}
} elseif {$n > 0} {
# Process header lines
@@ -1004,8 +1049,14 @@ proc http::Event {sock token} {
content-type {
set state(type) [string trim [string tolower $value]]
# grab the optional charset information
- regexp -nocase {charset\s*=\s*(\S+?);?} \
- $state(type) -> state(charset)
+ if {[regexp -nocase \
+ {charset\s*=\s*\"((?:[^""]|\\\")*)\"} \
+ $state(type) -> cs]} {
+ set state(charset) [string map {{\"} \"} $cs]
+ } else {
+ regexp -nocase {charset\s*=\s*(\S+?);?} \
+ $state(type) -> state(charset)
+ }
}
content-length {
set state(totalsize) [string trim $value]
@@ -1041,8 +1092,10 @@ proc http::Event {sock token} {
Log "final chunk part"
Eof $token
}
- } elseif {[info exists state(transfer)]
- && $state(transfer) eq "chunked"} {
+ } elseif {
+ [info exists state(transfer)]
+ && $state(transfer) eq "chunked"
+ } {
set size 0
set chunk [getTextLine $sock]
set n [string length $chunk]
@@ -1079,8 +1132,10 @@ proc http::Event {sock token} {
incr state(currentsize) $n
}
# If Content-Length - check for end of data.
- if {($state(totalsize) > 0)
- && ($state(currentsize) >= $state(totalsize))} {
+ if {
+ ($state(totalsize) > 0)
+ && ($state(currentsize) >= $state(totalsize))
+ } {
Eof $token
}
}
@@ -1137,14 +1192,54 @@ proc http::getTextLine {sock} {
# Side Effects
# This closes the connection upon error
-proc http::CopyStart {sock token} {
- variable $token
+proc http::CopyStart {sock token {initial 1}} {
+ upvar #0 $token state
+ if {[info exists state(transfer)] && $state(transfer) eq "chunked"} {
+ foreach coding [ContentEncoding $token] {
+ lappend state(zlib) [zlib stream $coding]
+ }
+ make-transformation-chunked $sock [namespace code [list CopyChunk $token]]
+ } else {
+ if {$initial} {
+ foreach coding [ContentEncoding $token] {
+ zlib push $coding $sock
+ }
+ }
+ if {[catch {
+ fcopy $sock $state(-channel) -size $state(-blocksize) -command \
+ [list http::CopyDone $token]
+ } err]} {
+ Finish $token $err
+ }
+ }
+}
+
+proc http::CopyChunk {token chunk} {
upvar 0 $token state
- if {[catch {
- fcopy $sock $state(-channel) -size $state(-blocksize) -command \
- [list http::CopyDone $token]
- } err]} {
- Finish $token $err
+ if {[set count [string length $chunk]]} {
+ incr state(currentsize) $count
+ if {[info exists state(zlib)]} {
+ foreach stream $state(zlib) {
+ set chunk [$stream add $chunk]
+ }
+ }
+ puts -nonewline $state(-channel) $chunk
+ if {[info exists state(-progress)]} {
+ eval [linsert $state(-progress) end \
+ $token $state(totalsize) $state(currentsize)]
+ }
+ } else {
+ Log "CopyChunk Finish $token"
+ if {[info exists state(zlib)]} {
+ set excess ""
+ foreach stream $state(zlib) {
+ catch {set excess [$stream add -finalize $excess]}
+ }
+ puts -nonewline $state(-channel) $excess
+ foreach stream $state(zlib) { $stream close }
+ unset state(zlib)
+ }
+ Eof $token ;# FIX ME: pipelining.
}
}
@@ -1174,7 +1269,7 @@ proc http::CopyDone {token count {error {}}} {
} elseif {[catch {eof $sock} iseof] || $iseof} {
Eof $token
} else {
- CopyStart $sock $token
+ CopyStart $sock $token 0
}
}
@@ -1198,30 +1293,31 @@ proc http::Eof {token {force 0}} {
set state(status) ok
}
- if {($state(coding) eq "gzip") && [string length $state(body)] > 0} {
- if {[catch {
- set state(body) [Gunzip $state(body)]
- } err]} {
- return [Finish $token $err]
- }
- }
-
- if {!$state(binary)} {
+ if {[string length $state(body)] > 0} {
+ if {[catch {
+ foreach coding [ContentEncoding $token] {
+ set state(body) [zlib $coding $state(body)]
+ }
+ } err]} {
+ Log "error doing $coding '$state(body)'"
+ return [Finish $token $err]
+ }
- # If we are getting text, set the incoming channel's
- # encoding correctly. iso8859-1 is the RFC default, but
- # this could be any IANA charset. However, we only know
- # how to convert what we have encodings for.
+ if {!$state(binary)} {
+ # If we are getting text, set the incoming channel's encoding
+ # correctly. iso8859-1 is the RFC default, but this could be any IANA
+ # charset. However, we only know how to convert what we have
+ # encodings for.
- set enc [CharsetToEncoding $state(charset)]
- if {$enc ne "binary"} {
- set state(body) [encoding convertfrom $enc $state(body)]
- }
+ set enc [CharsetToEncoding $state(charset)]
+ if {$enc ne "binary"} {
+ set state(body) [encoding convertfrom $enc $state(body)]
+ }
- # Translate text line endings.
- set state(body) [string map {\r\n \n \r \n} $state(body)]
+ # Translate text line endings.
+ set state(body) [string map {\r\n \n \r \n} $state(body)]
+ }
}
-
Finish $token
}
@@ -1297,7 +1393,7 @@ proc http::mapReply {string} {
}
set converted [string map $formMap $string]
if {[string match "*\[\u0100-\uffff\]*" $converted]} {
- regexp {[\u0100-\uffff]} $converted badChar
+ regexp "\[\u0100-\uffff\]" $converted badChar
# Return this error message for maximum compatability... :^/
return -code error \
"can't read \"formMap($badChar)\": no such element in array"
@@ -1317,8 +1413,10 @@ proc http::mapReply {string} {
proc http::ProxyRequired {host} {
variable http
if {[info exists http(-proxyhost)] && [string length $http(-proxyhost)]} {
- if {![info exists http(-proxyport)] || \
- ![string length $http(-proxyport)]} {
+ if {
+ ![info exists http(-proxyport)] ||
+ ![string length $http(-proxyport)]
+ } {
set http(-proxyport) 8080
}
return [list $http(-proxyhost) $http(-proxyport)]
@@ -1327,33 +1425,33 @@ proc http::ProxyRequired {host} {
# http::CharsetToEncoding --
#
-# Tries to map a given IANA charset to a tcl encoding.
-# If no encoding can be found, returns binary.
+# Tries to map a given IANA charset to a tcl encoding. If no encoding
+# can be found, returns binary.
#
proc http::CharsetToEncoding {charset} {
variable encodings
set charset [string tolower $charset]
- if {[regexp {iso-?8859-([0-9]+)} $charset - num]} {
+ if {[regexp {iso-?8859-([0-9]+)} $charset -> num]} {
set encoding "iso8859-$num"
- } elseif {[regexp {iso-?2022-(jp|kr)} $charset - ext]} {
+ } elseif {[regexp {iso-?2022-(jp|kr)} $charset -> ext]} {
set encoding "iso2022-$ext"
- } elseif {[regexp {shift[-_]?js} $charset -]} {
+ } elseif {[regexp {shift[-_]?js} $charset]} {
set encoding "shiftjis"
- } elseif {[regexp {(windows|cp)-?([0-9]+)} $charset - - num]} {
+ } elseif {[regexp {(?:windows|cp)-?([0-9]+)} $charset -> num]} {
set encoding "cp$num"
} elseif {$charset eq "us-ascii"} {
set encoding "ascii"
- } elseif {[regexp {(iso-?)?lat(in)?-?([0-9]+)} $charset - - - num]} {
+ } elseif {[regexp {(?:iso-?)?lat(?:in)?-?([0-9]+)} $charset -> num]} {
switch -- $num {
5 {set encoding "iso8859-9"}
- 1 -
- 2 -
- 3 {set encoding "iso8859-$num"}
+ 1 - 2 - 3 {
+ set encoding "iso8859-$num"
+ }
}
} else {
- # other charset, like euc-xx, utf-8,... may directly maps to encoding
+ # other charset, like euc-xx, utf-8,... may directly map to encoding
set encoding $charset
}
set idx [lsearch -exact $encodings $encoding]
@@ -1364,58 +1462,57 @@ proc http::CharsetToEncoding {charset} {
}
}
-# http::Gunzip --
-#
-# Decompress data transmitted using the gzip transfer coding.
-#
-
-# FIX ME: redo using zlib sinflate
-proc http::Gunzip {data} {
- binary scan $data Scb5icc magic method flags time xfl os
- set pos 10
- if {$magic != 0x1f8b} {
- return -code error "invalid data: supplied data is not in gzip format"
- }
- if {$method != 8} {
- return -code error "invalid compression method"
- }
-
- foreach {f_text f_crc f_extra f_name f_comment} [split $flags ""] break
- set extra ""
- if { $f_extra } {
- binary scan $data @${pos}S xlen
- incr pos 2
- set extra [string range $data $pos $xlen]
- set pos [incr xlen]
- }
-
- set name ""
- if { $f_name } {
- set ndx [string first \0 $data $pos]
- set name [string range $data $pos $ndx]
- set pos [incr ndx]
- }
-
- set comment ""
- if { $f_comment } {
- set ndx [string first \0 $data $pos]
- set comment [string range $data $pos $ndx]
- set pos [incr ndx]
- }
-
- set fcrc ""
- if { $f_crc } {
- set fcrc [string range $data $pos [incr pos]]
- incr pos
+# Return the list of content-encoding transformations we need to do in order.
+proc http::ContentEncoding {token} {
+ upvar 0 $token state
+ set r {}
+ if {[info exists state(coding)]} {
+ foreach coding [split $state(coding) ,] {
+ switch -exact -- $coding {
+ deflate { lappend r inflate }
+ gzip - x-gzip { lappend r gunzip }
+ compress - x-compress { lappend r decompress }
+ identity {}
+ default {
+ return -code error "unsupported content-encoding \"$coding\""
+ }
+ }
+ }
}
+ return $r
+}
- binary scan [string range $data end-7 end] ii crc size
- set inflated [zlib inflate [string range $data $pos end-8]]
- set chk [zlib crc32 $inflated]
- if { ($crc & 0xffffffff) != ($chk & 0xffffffff)} {
- return -code error "invalid data: checksum mismatch $crc != $chk"
- }
- return $inflated
+proc http::make-transformation-chunked {chan command} {
+ set lambda {{chan command} {
+ set data ""
+ set size -1
+ yield
+ while {1} {
+ chan configure $chan -translation {crlf binary}
+ while {[gets $chan line] < 1} { yield }
+ chan configure $chan -translation {binary binary}
+ if {[scan $line %x size] != 1} { return -code error "invalid size: \"$line\"" }
+ set chunk ""
+ while {$size && ![chan eof $chan]} {
+ set part [chan read $chan $size]
+ incr size -[string length $part]
+ append chunk $part
+ }
+ if {[catch {
+ uplevel #0 [linsert $command end $chunk]
+ }]} {
+ http::Log "Error in callback: $::errorInfo"
+ }
+ if {[string length $chunk] == 0} {
+ # channel might have been closed in the callback
+ catch {chan event $chan readable {}}
+ return
+ }
+ }
+ }}
+ coroutine dechunk$chan ::apply $lambda $chan $command
+ chan event $chan readable [namespace origin dechunk$chan]
+ return
}
# Local variables: