From abb6e593c4fe93a99241275011aaa97b6a259485 Mon Sep 17 00:00:00 2001 From: hobbs Date: Wed, 12 Mar 2008 09:51:36 +0000 Subject: * unix/Makefile.in (install-libraries): Bump http to 2.7 * win/Makefile.in (install-libraries): Added -myaddr option to allow * library/http/http.tcl (http::geturl): control of selected socket * library/http/pkgIndex.tcl: interface. [Bug 559898] * doc/http.n, tests/http.test: Added -keepalive and -protocol 1.1 with chunked transfer encoding support. [Bug 1063703, 1470377, 219225] Added ability to override Host in -headers. [Bug 928154] Added -strict option to control URL validation on per-call basis. [Bug 1560506] --- ChangeLog | 13 + doc/http.n | 34 ++- library/http/http.tcl | 598 ++++++++++++++++++++++++++++++++++++---------- library/http/pkgIndex.tcl | 10 +- tests/http.test | 16 +- unix/Makefile.in | 6 +- win/Makefile.in | 6 +- 7 files changed, 530 insertions(+), 153 deletions(-) diff --git a/ChangeLog b/ChangeLog index dabeb47..25b941c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +2008-03-12 Jeff Hobbs + + * unix/Makefile.in (install-libraries): Bump http to 2.7 + * win/Makefile.in (install-libraries): Added -myaddr option to allow + * library/http/http.tcl (http::geturl): control of selected socket + * library/http/pkgIndex.tcl: interface. [Bug 559898] + * doc/http.n, tests/http.test: Added -keepalive and + -protocol 1.1 with chunked transfer encoding support. [Bug + 1063703, 1470377, 219225] + Added ability to override Host in -headers. [Bug 928154] + Added -strict option to control URL validation on per-call basis. + [Bug 1560506] + 2008-03-11 Jeff Hobbs * library/http/http.tcl (http::geturl): add -method option to support diff --git a/doc/http.n b/doc/http.n index 69538f4..8c4434e 100644 --- a/doc/http.n +++ b/doc/http.n @@ -6,16 +6,16 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" RCS: @(#) $Id: http.n,v 1.34 2008/03/12 05:57:44 hobbs Exp $ +'\" RCS: @(#) $Id: http.n,v 1.35 2008/03/12 09:51:39 hobbs Exp $ '\" .so man.macros -.TH "http" n 2.5.5 http "Tcl Bundled Packages" +.TH "http" n 2.7 http "Tcl Bundled Packages" .BS '\" Note: do not modify the .SH NAME line immediately below! .SH NAME -http \- Client-side implementation of the HTTP/1.0 protocol +http \- Client-side implementation of the HTTP/1.1 protocol .SH SYNOPSIS -\fBpackage require http ?2.5.5?\fR +\fBpackage require http ?2.7?\fR .\" See Also -useragent option documentation in body! .sp \fB::http::config \fI?options?\fR @@ -50,9 +50,9 @@ http \- Client-side implementation of the HTTP/1.0 protocol .BE .SH DESCRIPTION .PP -The \fBhttp\fR package provides the client side of the HTTP/1.0 +The \fBhttp\fR package provides the client side of the HTTP/1.1 protocol. The package implements the GET, POST, and HEAD operations -of HTTP/1.0. It allows configuration of a proxy host to get through +of HTTP/1.1. It allows configuration of a proxy host to get through firewalls. The package is compatible with the \fBSafesock\fR security policy, so it can be used by untrusted applets to do URL fetching from a restricted set of hosts. This package can be extended to support @@ -123,7 +123,7 @@ characters. .TP \fB\-useragent\fR \fIstring\fR The value of the User-Agent header in the HTTP request. The default is -.QW "\fBTcl http client package 2.5\fR" . +.QW "\fBTcl http client package 2.7\fR" . .RE .TP \fB::http::geturl\fR \fIurl\fR ?\fIoptions\fR? @@ -206,11 +206,19 @@ HTTP request: Pragma: no-cache .CE .TP +\fB\-keepalive\fR \fIboolean\fR +If true (the default), attempt to keep the connection open for servicing +multiple requests. +.TP \fB\-method\fR \fItype\fR Force the HTTP request method to \fItype\fR. \fB::http::geturl\fR will -auto-select GET, POST or HEAD based on other optiosn, but this option +auto-select GET, POST or HEAD based on other options, but this option enables choices like PUT and DELETE for webdav support. .TP +\fB\-myaddr\fR \fIaddress\fR +Pass an specific local address to the underlying \fBsocket\fR call in case +multiple interfaces are available. +.TP \fB\-progress\fR \fIcallback\fR The \fIcallback\fR is made after each transfer of data from the URL. The callback gets three additional arguments: the \fItoken\fR from @@ -227,6 +235,11 @@ proc httpProgress {token total current} { .CE .RE .TP +\fB\-protocol\fR \fIversion\fR +Select the HTTP protocol version to use. This should be 1.0 or 1.1 (the +default). Should only be necessary for servers that do not understand or +otherwise complain about HTTP/1.1. +.TP \fB\-query\fR \fIquery\fR This flag causes \fB::http::geturl\fR to do a POST request that passes the \fIquery\fR to the server. The \fIquery\fR must be an x-url-encoding @@ -256,6 +269,9 @@ The \fIcallback\fR is made after each transfer of data to the URL (i.e. POST) and acts exactly like the \fB\-progress\fR option (the callback format is the same). .TP +\fB\-strict\fR \fIboolean\fR +Whether to enforce RFC 3986 URL validation on the request. Default is 1. +.TP \fB\-timeout\fR \fImilliseconds\fR If \fImilliseconds\fR is non-zero, then \fB::http::geturl\fR sets up a timeout to occur after the specified number of milliseconds. @@ -458,7 +474,7 @@ The HTTP status reply from the server. This value is returned by the \fB::http::code\fR command. The format of this value is: .RS .CS -\fIHTTP/1.0 code string\fR +\fIHTTP/1.1 code string\fR .CE The \fIcode\fR is a three-digit number defined in the HTTP standard. A code of 200 is OK. Codes beginning with 4 or 5 indicate errors. diff --git a/library/http/http.tcl b/library/http/http.tcl index d508ba0..98b1a64 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -8,23 +8,12 @@ # See the file "license.terms" for information on usage and redistribution of # this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: http.tcl,v 1.65 2008/03/12 05:57:44 hobbs Exp $ - -# Rough version history: -# 1.0 Old http_get interface. -# 2.0 http:: namespace and http::geturl. -# 2.1 Added callbacks to handle arriving data, and timeouts. -# 2.2 Added ability to fetch into a channel. -# 2.3 Added SSL support, and ability to post from a channel. This version -# also cleans up error cases and eliminates the "ioerror" status in -# favor of raising an error -# 2.4 Added -binary option to http::geturl and charset element to the state -# array. +# RCS: @(#) $Id: http.tcl,v 1.66 2008/03/12 09:51:38 hobbs Exp $ package require Tcl 8.4 # Keep this in sync with pkgIndex.tcl and with the install directories # in Makefiles -package provide http 2.5.5 +package provide http 2.7 namespace eval http { # Allow resourcing to not clobber existing data @@ -56,6 +45,16 @@ namespace eval http { # These are handled specially set map(\n) %0d%0a variable formMap [array get map] + + # Create a map for HTTP/1.1 open sockets + variable socketmap + if {[info exists socketmap]} { + # Close but don't remove open sockets on re-init + foreach {url sock} [array get socketmap] { + catch {close $sock} + } + } + array set socketmap {} } init @@ -66,15 +65,37 @@ namespace eval http { variable encodings [string tolower [encoding names]] # This can be changed, but iso8859-1 is the RFC standard. - variable defaultCharset "iso8859-1" + variable defaultCharset + if {![info exists defaultCharset]} { + set defaultCharset "iso8859-1" + } # Force RFC 3986 strictness in geturl url verification? - variable strict 1 + variable strict + if {![info exists strict]} { + set strict 1 + } + + # Let user control default keepalive for compatibility + variable defaultKeepalive + if {![info exists defaultKeepalive]} { + set defaultKeepalive 1 + } namespace export geturl config reset wait formatQuery register unregister # Useful, but not exported: data size status code } +# http::Log -- +# +# Debugging output -- define this to observe HTTP/1.1 socket usage. +# Should echo any args received. +# +# Arguments: +# msg Message to output +# +proc http::Log {args} {} + # http::register -- # # See documentation for details. @@ -173,7 +194,11 @@ proc http::Finish { token {errormsg ""} {skipCB 0}} { set state(error) [list $errormsg $errorInfo $errorCode] set state(status) "error" } - catch {close $state(sock)} + if {($state(status) eq "timeout") || ($state(status) eq "error") + || ([info exists state(connection)] && ($state(connection) eq "close")) + } { + CloseSocket $state(sock) $token + } if {[info exists state(after)]} { after cancel $state(after) } if {[info exists state(-command)] && !$skipCB} { if {[catch {eval $state(-command) {$token}} err]} { @@ -187,6 +212,45 @@ proc http::Finish { token {errormsg ""} {skipCB 0}} { } } +# http::CloseSocket - +# +# Close a socket and remove it from the persistent sockets table. +# If possible an http token is included here but when we are called +# from a fileevent on remote closure we need to find the correct +# entry - hence the second section. + +proc ::http::CloseSocket {s {token {}}} { + variable socketmap + catch {fileevent $s readable {}} + set conn_id {} + if {$token ne ""} { + variable $token + upvar 0 $token state + if {[info exists state(socketinfo)]} { + set conn_id $state(socketinfo) + } + } else { + set map [array get socketmap] + set ndx [lsearch -exact $map $s] + if {$ndx != -1} { + incr ndx -1 + set conn_id [lindex $map $ndx] + } + } + if {$conn_id eq {} || ![info exists socketmap($conn_id)]} { + Log "Closing socket $s (no connection info)" + if {[catch {close $s} err]} { Log "Error: $err" } + } else { + if {[info exists socketmap($conn_id)]} { + Log "Closing connection $conn_id (sock $socketmap($conn_id))" + if {[catch {close $socketmap($conn_id)} err]} { Log "Error: $err" } + unset socketmap($conn_id) + } else { + Log "Cannot close connection $conn_id - no socket in socket map" + } + } +} + # http::reset -- # # See documentation for details. @@ -228,6 +292,7 @@ proc http::geturl { url args } { variable http variable urlTypes variable defaultCharset + variable defaultKeepalive variable strict # Initialize the state variable, an array. We'll return the name of this @@ -252,6 +317,8 @@ proc http::geturl { url args } { -timeout 0 -type application/x-www-form-urlencoded -queryprogress {} + -protocol 1.1 + binary 0 state header meta {} coding {} @@ -263,29 +330,33 @@ proc http::geturl { url args } { body {} status "" http "" + connection close } + set state(-keepalive) $defaultKeepalive + set state(-strict) $strict # These flags have their types verified [Bug 811170] array set type { -binary boolean -blocksize integer -queryblocksize integer - -validate boolean + -strict boolean -timeout integer + -validate boolean } set state(charset) $defaultCharset set options { - -binary -blocksize -channel -command -handler -headers - -method -progress -query -queryblocksize - -querychannel -queryprogress -validate -timeout -type + -binary -blocksize -channel -command -handler -headers -keepalive + -method -myaddr -progress -protocol -query -queryblocksize + -querychannel -queryprogress -strict -timeout -type -validate } - set usage [join $options ", "] + set usage [join [lsort $options] ", "] set options [string map {- ""} $options] set pat ^-([join $options |])$ foreach {flag value} $args { - if {[regexp $pat $flag]} { + if {[regexp -- $pat $flag]} { # Validate numbers - if {[info exists type($flag)] && \ - ![string is $type($flag) -strict $value]} { + if {[info exists type($flag)] && + ![string is $type($flag) -strict $value]} { unset $token return -code error "Bad value for $flag ($value), must be $type($flag)" } @@ -338,7 +409,8 @@ proc http::geturl { url args } { # # From a validation perspective, we need to ensure that the parts of the # URL that are going to the server are correctly encoded. - # This is only done if $::http::strict is true (default 0 for compat). + # This is only done if $state(-strict) is true (inherited from + # $::http::strict). set URLmatcher {(?x) # this is _expanded_ syntax ^ @@ -384,7 +456,7 @@ proc http::geturl { url args } { (?: [-\w.~!$&'()*+,;=:] | %[0-9a-f][0-9a-f] )+ $ } - if {$strict && ![regexp -- $validityRE $user]} { + if {$state(-strict) && ![regexp -- $validityRE $user]} { unset $token # Provide a better error message in this error case if {[regexp {(?i)%(?![0-9a-f][0-9a-f]).?.?} $user bad]} { @@ -404,7 +476,7 @@ proc http::geturl { url args } { (?: \? (?: [-\w.~!$&'()*+,;=:@/?] | %[0-9a-f][0-9a-f] )* )? $ } - if {$strict && ![regexp -- $validityRE $srvurl]} { + if {$state(-strict) && ![regexp -- $validityRE $srvurl]} { unset $token # Provide a better error message in this error case if {[regexp {(?i)%(?![0-9a-f][0-9a-f])..} $srvurl bad]} { @@ -416,7 +488,7 @@ proc http::geturl { url args } { } else { set srvurl / } - if {[string length $proto] == 0} { + if {$proto eq ""} { set proto http } if {![info exists urlTypes($proto)]} { @@ -426,7 +498,7 @@ proc http::geturl { url args } { set defport [lindex $urlTypes($proto) 0] set defcmd [lindex $urlTypes($proto) 1] - if {[string length $port] == 0} { + if {$port eq ""} { set port $defport } if {![catch {$http(-proxyfilter) $host} proxy]} { @@ -451,39 +523,69 @@ proc http::geturl { url args } { # If a timeout is specified we set up the after event and arrange for an # asynchronous socket connection. + set sockopts [list] if {$state(-timeout) > 0} { set state(after) [after $state(-timeout) \ [list http::reset $token timeout]] - set async -async - } else { - set async "" + lappend sockopts -async } # If we are using the proxy, we must pass in the full URL that includes # the server name. - if {[info exists phost] && [string length $phost]} { + if {[info exists phost] && ($phost ne "")} { set srvurl $url - set conStat [catch {eval $defcmd $async {$phost $pport}} s] + set targetAddr [list $phost $pport] } else { - set conStat [catch {eval $defcmd $async {$host $port}} s] + set targetAddr [list $host $port] } - - if {$conStat} { - # Something went wrong while trying to establish the connection. Clean - # up after events and such, but DON'T call the command callback (if - # available) because we're going to throw an exception from here - # instead. - Finish $token "" 1 - cleanup $token - return -code error $s + # Proxy connections aren't shared among different hosts. + set state(socketinfo) $host:$port + + # See if we are supposed to use a previously opened channel. + if {$state(-keepalive)} { + variable socketmap + if {[info exists socketmap($state(socketinfo))]} { + if {[catch {fconfigure $socketmap($state(socketinfo))}]} { + Log "WARNING: socket for $state(socketinfo) was closed" + unset socketmap($state(socketinfo)) + } else { + set sock $socketmap($state(socketinfo)) + Log "reusing socket $sock for $state(socketinfo)" + catch {fileevent $sock writable {}} + catch {fileevent $sock readable {}} + } + } + # don't automatically close this connection socket + set state(connection) {} + } + if {![info exists sock]} { + # Pass -myaddr directly to the socket command + if {[info exists state(-myaddr)]} { + lappend sockopts -myaddr $state(-myaddr) + } + if {[catch {eval $defcmd $sockopts $targetAddr} sock]} { + # something went wrong while trying to establish the + # connection. Clean up after events and such, but DON'T call the + # command callback (if available) because we're going to throw an + # exception from here instead. + + Finish $token "" 1 + cleanup $token + return -code error $sock + } + } + set state(sock) $sock + Log "Using $sock for $state(socketinfo)" \ + [expr {$state(-keepalive)?"keepalive":""}] + if {$state(-keepalive)} { + set socketmap($state(socketinfo)) $sock } - set state(sock) $s # Wait for the connection to complete. if {$state(-timeout) > 0} { - fileevent $s writable [list http::Connect $token] + fileevent $sock writable [list http::Connect $token] http::wait $token if {![info exists state]} { @@ -508,12 +610,12 @@ proc http::geturl { url args } { # Send data in cr-lf format, but accept any line terminators - fconfigure $s -translation {auto crlf} -buffersize $state(-blocksize) + fconfigure $sock -translation {auto crlf} -buffersize $state(-blocksize) # The following is disallowed in safe interpreters, but the socket is # already in non-blocking mode in that case. - catch {fconfigure $s -blocking off} + catch {fconfigure $sock -blocking off} set how GET if {$isQuery} { set state(querylength) [string length $state(-query)] @@ -539,27 +641,52 @@ proc http::geturl { url args } { } if {[catch { - puts $s "$how $srvurl HTTP/1.0" - puts $s "Accept: $http(-accept)" - if {$port == $defport} { - # Don't add port in this case, to handle broken servers. [Bug - # 504508] - puts $s "Host: $host" + puts $sock "$how $srvurl HTTP/$state(-protocol)" + puts $sock "Accept: $http(-accept)" + array set hdrs $state(-headers) + if {[info exists hdrs(Host)]} { + # Allow Host spoofing [Bug 928154] + puts $sock "Host: $hdrs(Host)" + } elseif {$port == $defport} { + # Don't add port in this case, to handle broken servers. + # [Bug #504508] + puts $sock "Host: $host" } else { - puts $s "Host: $host:$port" + puts $sock "Host: $host:$port" } - puts $s "User-Agent: $http(-useragent)" + unset hdrs + puts $sock "User-Agent: $http(-useragent)" + if {$state(-protocol) == 1.0 && $state(-keepalive)} { + puts $sock "Connection: keep-alive" + } + if {$state(-protocol) > 1.0 && !$state(-keepalive)} { + puts $sock "Connection: close" ;# RFC2616 sec 8.1.2.1 + } + if {[info exists phost] && ($phost ne "") && $state(-keepalive)} { + puts $sock "Proxy-Connection: Keep-Alive" + } + set accept_encoding_seen 0 foreach {key value} $state(-headers) { + if {[string equal -nocase $key "host"]} { continue } + if {[string equal -nocase $key "accept-encoding"]} { + set accept_encoding_seen 1 + } set value [string map [list \n "" \r ""] $value] set key [string trim $key] - if {$key eq "Content-Length"} { + if {[string equal -nocase $key "content-length"]} { set contDone 1 set state(querylength) $value } if {[string length $key]} { - puts $s "$key: $value" + puts $sock "$key: $value" } } + # Soft zlib dependency check - no package require + if {!$accept_encoding_seen && [llength [package provide zlib]] + && !([info exists state(-channel)] || [info exists state(-handler)]) + } { + puts $sock "Accept-Encoding: gzip, identity, *;q=0.1" + } if {$isQueryChannel && $state(querylength) == 0} { # Try to determine size of data in channel. If we cannot seek, the # surrounding catch will trap us @@ -588,17 +715,17 @@ proc http::geturl { url args } { # their POST data if they expect the client to read their response. if {$isQuery || $isQueryChannel} { - puts $s "Content-Type: $state(-type)" + puts $sock "Content-Type: $state(-type)" if {!$contDone} { - puts $s "Content-Length: $state(querylength)" + puts $sock "Content-Length: $state(querylength)" } - puts $s "" - fconfigure $s -translation {auto binary} - fileevent $s writable [list http::Write $token] + puts $sock "" + fconfigure $sock -translation {auto binary} + fileevent $sock writable [list http::Write $token] } else { - puts $s "" - flush $s - fileevent $s readable [list http::Event $token] + puts $sock "" + flush $sock + fileevent $sock readable [list http::Event $sock $token] } if {! [info exists state(-command)]} { @@ -738,7 +865,7 @@ proc http::Connect {token} { proc http::Write {token} { variable $token upvar 0 $token state - set s $state(sock) + set sock $state(sock) # Output a block. Tcl will buffer this if the socket blocks set done 0 @@ -749,19 +876,20 @@ proc http::Write {token} { # Chop up large query strings so queryprogress callback can give # smooth feedback. - puts -nonewline $s \ + puts -nonewline $sock \ [string range $state(-query) $state(queryoffset) \ [expr {$state(queryoffset) + $state(-queryblocksize) - 1}]] incr state(queryoffset) $state(-queryblocksize) if {$state(queryoffset) >= $state(querylength)} { set state(queryoffset) $state(querylength) + puts $sock "" set done 1 } } else { # Copy blocks from the query channel set outStr [read $state(-querychannel) $state(-queryblocksize)] - puts -nonewline $s $outStr + puts -nonewline $sock $outStr incr state(queryoffset) [string length $outStr] if {[eof $state(-querychannel)]} { set done 1 @@ -775,9 +903,9 @@ proc http::Write {token} { set done 1 } if {$done} { - catch {flush $s} - fileevent $s writable {} - fileevent $s readable [list http::Event $token] + catch {flush $sock} + fileevent $sock writable {} + fileevent $sock readable [list http::Event $sock $token] } # Callback to the client after we've completely handled everything. @@ -793,79 +921,164 @@ proc http::Write {token} { # Handle input on the socket # # Arguments +# sock The socket receiving input. # token The token returned from http::geturl # # Side Effects # Read the socket and handle callbacks. -proc http::Event {token} { +proc http::Event {sock token} { variable $token upvar 0 $token state - set s $state(sock) + if {![info exists state]} { + Log "Event $sock with invalid token '$token' - remote close?" + if {! [eof $sock]} { + if {[string length [set d [read $sock]]] != 0} { + Log "WARNING: additional data left on closed socket" + } + } + CloseSocket $sock + return + } if {$state(state) eq "header"} { - if {[catch {gets $s line} n]} { + if {[catch {gets $sock line} n]} { return [Finish $token $n] } elseif {$n == 0} { - variable encodings + # We have now read all headers + # We ignore HTTP/1.1 100 Continue returns. RFC2616 sec 8.2.3 + if {$state(http) == "" || [lindex $state(http) 1] == 100} { return } + set state(state) body - if {$state(-binary) || ![string match -nocase text* $state(type)] - || [string match *gzip* $state(coding)] - || [string match *compress* $state(coding)]} { + + # If doing a HEAD, then we won't get any body + if {$state(-validate)} { + Eof $token + return + } + + # For non-chunked transfer we may have no body -- in this case we + # may get no further file event if the connection doesn't close and + # no more data is sent. We can tell and must finish up now - not + # later. + if {!(([info exists state(connection)] + && ($state(connection) eq "close")) + || [info exists state(transfer)]) + && $state(totalsize) == 0 + } then { + Log "body size is 0 and no events likely - complete." + Eof $token + return + } + + # We have to use binary translation to count bytes properly. + fconfigure $sock -translation binary + + if {$state(-binary) || ![string match -nocase text* $state(type)]} { # Turn off conversions for non-text data - fconfigure $s -translation binary + set state(binary) 1 + } + if {$state(binary) || [string match *gzip* $state(coding)] + || [string match *compress* $state(coding)]} { if {[info exists state(-channel)]} { fconfigure $state(-channel) -translation binary } - } else { - # If we are getting text, set the incoming channel's encoding - # correctly. iso8859-1 is the RFC default, but this could be - # any IANA charset. However, we only know how to convert what - # we have encodings for. - set idx [lsearch -exact $encodings \ - [string tolower $state(charset)]] - if {$idx >= 0} { - fconfigure $s -encoding [lindex $encodings $idx] - } } - if {[info exists state(-channel)] && \ - ![info exists state(-handler)]} { + if {[info exists state(-channel)] && + ![info exists state(-handler)]} { # Initiate a sequence of background fcopies - fileevent $s readable {} - CopyStart $s $token + fileevent $sock readable {} + CopyStart $sock $token return } } elseif {$n > 0} { - if {[regexp -nocase {^content-type:(.+)$} $line x type]} { - set state(type) [string trim $type] - # grab the optional charset information - regexp -nocase {charset\s*=\s*(\S+)} $type x state(charset) - } - if {[regexp -nocase {^content-length:(.+)$} $line x length]} { - set state(totalsize) [string trim $length] - } - if {[regexp -nocase {^content-encoding:(.+)$} $line x coding]} { - set state(coding) [string trim $coding] - } + # Process header lines if {[regexp -nocase {^([^:]+):(.+)$} $line x key value]} { + switch -- [string tolower $key] { + content-type { + set state(type) [string trim [string tolower $value]] + # grab the optional charset information + regexp -nocase {charset\s*=\s*(\S+?);?} \ + $state(type) -> state(charset) + } + content-length { + set state(totalsize) [string trim $value] + } + content-encoding { + set state(coding) [string trim $value] + } + transfer-encoding { + set state(transfer) \ + [string trim [string tolower $value]] + } + proxy-connection - + connection { + set state(connection) \ + [string trim [string tolower $value]] + } + } lappend state(meta) $key [string trim $value] } elseif {[string match HTTP* $line]} { set state(http) $line } } } else { + # Now reading body if {[catch { if {[info exists state(-handler)]} { - set n [eval $state(-handler) {$s $token}] + set n [eval $state(-handler) [list $sock $token]] + } elseif {[info exists state(transfer_final)]} { + set line [getTextLine $sock] + set n [string length $line] + if {$n > 0} { + Log "found $n bytes following final chunk" + append state(transfer_final) $line + } else { + Log "final chunk part" + Eof $token + } + } elseif {[info exists state(transfer)] + && $state(transfer) eq "chunked"} { + set size 0 + set chunk [getTextLine $sock] + set n [string length $chunk] + if {[string trim $chunk] ne ""} { + scan $chunk %x size + if {$size != 0} { + set bl [fconfigure $sock -blocking] + fconfigure $sock -blocking 1 + set chunk [read $sock $size] + fconfigure $sock -blocking $bl + set n [string length $chunk] + if {$n >= 0} { + append state(body) $chunk + } + if {$size != [string length $chunk]} { + Log "WARNING: mis-sized chunk:\ + was [string length $chunk], should be $size" + } + getTextLine $sock + } else { + set state(transfer_final) {} + } + } } else { - set block [read $s $state(-blocksize)] + #Log "read non-chunk $state(currentsize) of $state(totalsize)" + set block [read $sock $state(-blocksize)] set n [string length $block] if {$n >= 0} { append state(body) $block } } - if {$n >= 0} { - incr state(currentsize) $n + if {[info exists state]} { + if {$n >= 0} { + incr state(currentsize) $n + } + # If Content-Length - check for end of data. + if {($state(totalsize) > 0) + && ($state(currentsize) >= $state(totalsize))} { + Eof $token + } } } err]} { return [Finish $token $err] @@ -877,28 +1090,54 @@ proc http::Event {token} { } } - if {[eof $s]} { - Eof $token + # catch as an Eof above may have closed the socket already + if {![catch {eof $sock} eof] && $eof} { + if {[info exists $token]} { + set state(connection) close + Eof $token + } else { + # open connection closed on a token that has been cleaned up. + CloseSocket $sock + } return } } +# http::getTextLine -- +# +# Get one line with the stream in blocking crlf mode +# +# Arguments +# sock The socket receiving input. +# +# Results: +# The line of text, without trailing newline + +proc http::getTextLine {sock} { + set tr [fconfigure $sock -translation] + set bl [fconfigure $sock -blocking] + fconfigure $sock -translation crlf -blocking 1 + set r [gets $sock] + fconfigure $sock -translation $tr -blocking $bl + return $r +} + # http::CopyStart # # Error handling wrapper around fcopy # # Arguments -# s The socket to copy from +# sock The socket to copy from # token The token returned from http::geturl # # Side Effects # This closes the connection upon error -proc http::CopyStart {s token} { +proc http::CopyStart {sock token} { variable $token upvar 0 $token state if {[catch { - fcopy $s $state(-channel) -size $state(-blocksize) -command \ + fcopy $sock $state(-channel) -size $state(-blocksize) -command \ [list http::CopyDone $token] } err]} { Finish $token $err @@ -919,7 +1158,7 @@ proc http::CopyStart {s token} { proc http::CopyDone {token count {error {}}} { variable $token upvar 0 $token state - set s $state(sock) + set sock $state(sock) incr state(currentsize) $count if {[info exists state(-progress)]} { eval $state(-progress) \ @@ -928,10 +1167,10 @@ proc http::CopyDone {token count {error {}}} { # At this point the token may have been reset if {[string length $error]} { Finish $token $error - } elseif {[catch {eof $s} iseof] || $iseof} { + } elseif {[catch {eof $sock} iseof] || $iseof} { Eof $token } else { - CopyStart $s $token + CopyStart $sock $token } } @@ -945,7 +1184,7 @@ proc http::CopyDone {token count {error {}}} { # Side Effects # Clean up the socket -proc http::Eof {token} { +proc http::Eof {token {force 0}} { variable $token upvar 0 $token state if {$state(state) eq "header"} { @@ -954,7 +1193,31 @@ proc http::Eof {token} { } else { set state(status) ok } - set state(state) eof + + if {($state(coding) eq "gzip") && [string length $state(body)] > 0} { + if {[catch { + set state(body) [Gunzip $state(body)] + } err]} { + return [Finish $token $err] + } + } + + if {!$state(binary)} { + + # If we are getting text, set the incoming channel's + # encoding correctly. iso8859-1 is the RFC default, but + # this could be any IANA charset. However, we only know + # how to convert what we have encodings for. + + set enc [CharsetToEncoding $state(charset)] + if {$enc ne "binary"} { + set state(body) [encoding convertfrom $enc $state(body)] + } + + # Translate text line endings. + set state(body) [string map {\r\n \n \r \n} $state(body)] + } + Finish $token } @@ -972,7 +1235,7 @@ proc http::wait {token} { variable $token upvar 0 $token state - if {![info exists state(status)] || [string length $state(status)] == 0} { + if {![info exists state(status)] || $state(status) eq ""} { # We must wait on the original variable name, not the upvar alias vwait ${token}(status) } @@ -982,7 +1245,7 @@ proc http::wait {token} { # http::formatQuery -- # -# See documentation for details. Call http::formatQuery with an even +# See documentation for details. Call http::formatQuery with an even # number of arguments, where the first is a name, the second is a value, # the third is another name, and so on. # @@ -1058,6 +1321,99 @@ proc http::ProxyRequired {host} { } } +# http::CharsetToEncoding -- +# +# Tries to map a given IANA charset to a tcl encoding. +# If no encoding can be found, returns binary. +# + +proc http::CharsetToEncoding {charset} { + variable encodings + + set charset [string tolower $charset] + if {[regexp {iso-?8859-([0-9]+)} $charset - num]} { + set encoding "iso8859-$num" + } elseif {[regexp {iso-?2022-(jp|kr)} $charset - ext]} { + set encoding "iso2022-$ext" + } elseif {[regexp {shift[-_]?js} $charset -]} { + set encoding "shiftjis" + } elseif {[regexp {(windows|cp)-?([0-9]+)} $charset - - num]} { + set encoding "cp$num" + } elseif {$charset eq "us-ascii"} { + set encoding "ascii" + } elseif {[regexp {(iso-?)?lat(in)?-?([0-9]+)} $charset - - - num]} { + switch -- $num { + 5 {set encoding "iso8859-9"} + 1 - + 2 - + 3 {set encoding "iso8859-$num"} + } + } else { + # other charset, like euc-xx, utf-8,... may directly maps to encoding + set encoding $charset + } + set idx [lsearch -exact $encodings $encoding] + if {$idx >= 0} { + return $encoding + } else { + return "binary" + } +} + +# http::Gunzip -- +# +# Decompress data transmitted using the gzip transfer coding. +# + +# FIX ME: redo using zlib sinflate +proc http::Gunzip {data} { + binary scan $data Scb5icc magic method flags time xfl os + set pos 10 + if {$magic != 0x1f8b} { + return -code error "invalid data: supplied data is not in gzip format" + } + if {$method != 8} { + return -code error "invalid compression method" + } + + foreach {f_text f_crc f_extra f_name f_comment} [split $flags ""] break + set extra "" + if { $f_extra } { + binary scan $data @${pos}S xlen + incr pos 2 + set extra [string range $data $pos $xlen] + set pos [incr xlen] + } + + set name "" + if { $f_name } { + set ndx [string first \0 $data $pos] + set name [string range $data $pos $ndx] + set pos [incr ndx] + } + + set comment "" + if { $f_comment } { + set ndx [string first \0 $data $pos] + set comment [string range $data $pos $ndx] + set pos [incr ndx] + } + + set fcrc "" + if { $f_crc } { + set fcrc [string range $data $pos [incr pos]] + incr pos + } + + binary scan [string range $data end-7 end] ii crc size + set inflated [zlib inflate [string range $data $pos end-8]] + + if { $crc != [set chk [zlib crc32 $inflated]] } { + return -code error "invalid data: checksum mismatch $crc != $chk" + } + return $inflated +} + # Local variables: # indent-tabs-mode: t # End: diff --git a/library/http/pkgIndex.tcl b/library/http/pkgIndex.tcl index cf6a1ff..870019a 100644 --- a/library/http/pkgIndex.tcl +++ b/library/http/pkgIndex.tcl @@ -1,12 +1,4 @@ # Tcl package index file, version 1.1 -# This file is generated by the "pkg_mkIndex" command -# and sourced either when an application starts up or -# by a "package unknown" script. It invokes the -# "package ifneeded" command to set up package-related -# information so that packages will be loaded automatically -# in response to "package require" commands. When this -# script is sourced, the variable $dir must contain the -# full path name of this file's directory. if {![package vsatisfies [package provide Tcl] 8.4]} {return} -package ifneeded http 2.5.5 [list tclPkgSetup $dir http 2.5.5 {{http.tcl source {::http::config ::http::formatQuery ::http::geturl ::http::reset ::http::wait ::http::register ::http::unregister ::http::mapReply}}}] +package ifneeded http 2.7 [list tclPkgSetup $dir http 2.7 {{http.tcl source {::http::config ::http::formatQuery ::http::geturl ::http::reset ::http::wait ::http::register ::http::unregister ::http::mapReply}}}] diff --git a/tests/http.test b/tests/http.test index 235252e..18cf75a 100644 --- a/tests/http.test +++ b/tests/http.test @@ -12,7 +12,7 @@ # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # # -# RCS: @(#) $Id: http.test,v 1.47 2008/03/12 05:57:44 hobbs Exp $ +# RCS: @(#) $Id: http.test,v 1.48 2008/03/12 09:51:39 hobbs Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest 2 @@ -117,7 +117,7 @@ test http-2.1 {http::reset} { test http-3.1 {http::geturl} { list [catch {http::geturl -bogus flag} msg] $msg -} {1 {Unknown option flag, can be: -binary, -blocksize, -channel, -command, -handler, -headers, -method, -progress, -query, -queryblocksize, -querychannel, -queryprogress, -validate, -timeout, -type}} +} {1 {Unknown option flag, can be: -binary, -blocksize, -channel, -command, -handler, -headers, -keepalive, -method, -myaddr, -progress, -protocol, -query, -queryblocksize, -querychannel, -queryprogress, -strict, -timeout, -type, -validate}} test http-3.2 {http::geturl} { catch {http::geturl http:junk} err set err @@ -205,7 +205,7 @@ test http-3.10 {http::geturl queryprogress} { lappend postProgress $y } set postProgress {} - set t [http::geturl $posturl -query $query \ + set t [http::geturl $posturl -keepalive 0 -query $query \ -queryprogress postProgress -queryblocksize 16384] http::wait $t list [http::status $t] [string length $query] $postProgress [http::data $t] @@ -332,7 +332,7 @@ test http-3.24 {http::geturl parse failures} -body { } -returnCodes error -result {Illegal encoding character usage "%qu" in URL path} test http-4.1 {http::Event} { - set token [http::geturl $url] + set token [http::geturl $url -keepalive 0] upvar #0 $token data array set meta $data(meta) expr ($data(totalsize) == $meta(Content-Length)) @@ -398,7 +398,7 @@ if 0 { } {111 111} } test http-4.7 {http::Event} { - set token [http::geturl $url -progress myProgress] + set token [http::geturl $url -keepalive 0 -progress myProgress] set progress } {111 111} test http-4.8 {http::Event} { @@ -417,20 +417,20 @@ test http-4.10 {http::Event} { # Short timeout to working server (the test server). This lets us try a # reset during the connection. test http-4.11 {http::Event} { - set token [http::geturl $url -timeout 1 -command {#}] + set token [http::geturl $url -timeout 1 -keepalive 0 -command {#}] http::reset $token http::status $token } {reset} # Longer timeout with reset. test http-4.12 {http::Event} { - set token [http::geturl $url/?timeout=10 -command {#}] + set token [http::geturl $url/?timeout=10 -keepalive 0 -command {#}] http::reset $token http::status $token } {reset} # Medium timeout to working server that waits even longer. The timeout # hits while waiting for a reply. test http-4.13 {http::Event} { - set token [http::geturl $url?timeout=30 -timeout 10 -command {#}] + set token [http::geturl $url?timeout=30 -keepalive 0 -timeout 10 -command {#}] http::wait $token http::status $token } {timeout} diff --git a/unix/Makefile.in b/unix/Makefile.in index 673d9a1..de79f1c 100644 --- a/unix/Makefile.in +++ b/unix/Makefile.in @@ -4,7 +4,7 @@ # "./configure", which is a configuration script generated by the "autoconf" # program (constructs like "@foo@" will get replaced in the actual Makefile. # -# RCS: @(#) $Id: Makefile.in,v 1.228 2008/03/11 22:28:33 das Exp $ +# RCS: @(#) $Id: Makefile.in,v 1.229 2008/03/12 09:51:39 hobbs Exp $ VERSION = @TCL_VERSION@ MAJOR_VERSION = @TCL_MAJOR_VERSION@ @@ -782,8 +782,8 @@ install-libraries: libraries $(INSTALL_TZDATA) install-msgs do \ $(INSTALL_DATA) $$i "$(SCRIPT_INSTALL_DIR)"/http1.0; \ done; - @echo "Installing package http 2.5.5 as a Tcl Module"; - @$(INSTALL_DATA) $(TOP_DIR)/library/http/http.tcl "$(SCRIPT_INSTALL_DIR)"/../tcl8/8.4/http-2.5.5.tm; + @echo "Installing package http 2.7 as a Tcl Module"; + @$(INSTALL_DATA) $(TOP_DIR)/library/http/http.tcl "$(SCRIPT_INSTALL_DIR)"/../tcl8/8.4/http-2.7.tm; @echo "Installing library opt0.4 directory"; @for i in $(TOP_DIR)/library/opt/*.tcl ; \ do \ diff --git a/win/Makefile.in b/win/Makefile.in index 5cace90..f453595 100644 --- a/win/Makefile.in +++ b/win/Makefile.in @@ -4,7 +4,7 @@ # "./configure", which is a configuration script generated by the "autoconf" # program (constructs like "@foo@" will get replaced in the actual Makefile. # -# RCS: @(#) $Id: Makefile.in,v 1.123 2008/02/26 19:52:54 patthoyts Exp $ +# RCS: @(#) $Id: Makefile.in,v 1.124 2008/03/12 09:51:39 hobbs Exp $ VERSION = @TCL_VERSION@ @@ -635,8 +635,8 @@ install-libraries: libraries install-tzdata install-msgs do \ $(COPY) "$$j" "$(SCRIPT_INSTALL_DIR)/http1.0"; \ done; - @echo "Installing package http 2.5.5 as a Tcl Module"; - @$(COPY) $(ROOT_DIR)/library/http/http.tcl $(SCRIPT_INSTALL_DIR)/../tcl8/8.4/http-2.5.5.tm; + @echo "Installing package http 2.7 as a Tcl Module"; + @$(COPY) $(ROOT_DIR)/library/http/http.tcl $(SCRIPT_INSTALL_DIR)/../tcl8/8.4/http-2.7.tm; @echo "Installing library opt0.4 directory"; @for j in $(ROOT_DIR)/library/opt/*.tcl; \ do \ -- cgit v0.12