# http.tcl -- # # Client-side HTTP for GET, POST, and HEAD commands. These routines can # be used in untrusted code that uses the Safesock security policy. # These procedures use a callback interface to avoid using vwait, which # is not defined in the safe base. # # See the file "license.terms" for information on usage and redistribution of # this file, and for a DISCLAIMER OF ALL WARRANTIES. package require Tcl 8.6 # Keep this in sync with pkgIndex.tcl and with the install directories in # Makefiles package provide http 2.8.8 namespace eval http { # Allow resourcing to not clobber existing data variable http if {![info exists http]} { array set http { -accept */* -proxyhost {} -proxyport {} -proxyfilter http::ProxyRequired -urlencoding utf-8 } # We need a useragent string of this style or various servers will refuse to # send us compressed content even when we ask for it. This follows the # de-facto layout of user-agent strings in current browsers. set http(-useragent) "Mozilla/5.0\ ([string totitle $::tcl_platform(platform)]; U;\ $::tcl_platform(os) $::tcl_platform(osVersion))\ http/[package provide http] Tcl/[package provide Tcl]" } proc init {} { # Set up the map for quoting chars. RFC3986 Section 2.3 say percent # encode all except: "... percent-encoded octets in the ranges of # ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period # (%2E), underscore (%5F), or tilde (%7E) should not be created by URI # producers ..." for {set i 0} {$i <= 256} {incr i} { set c [format %c $i] if {![string match {[-._~a-zA-Z0-9]} $c]} { set map($c) %[format %.2X $i] } } # These are handled specially set map(\n) %0D%0A variable formMap [array get map] # Create a map for HTTP/1.1 open sockets variable socketmap if {[info exists socketmap]} { # Close but don't remove open sockets on re-init foreach {url sock} [array get socketmap] { catch {close $sock} } } array set socketmap {} } init variable urlTypes if {![info exists urlTypes]} { set urlTypes(http) [list 80 ::socket] } variable encodings [string tolower [encoding names]] # This can be changed, but iso8859-1 is the RFC standard. variable defaultCharset if {![info exists defaultCharset]} { set defaultCharset "iso8859-1" } # Force RFC 3986 strictness in geturl url verification? variable strict if {![info exists strict]} { set strict 1 } # Let user control default keepalive for compatibility variable defaultKeepalive if {![info exists defaultKeepalive]} { set defaultKeepalive 0 } namespace export geturl config reset wait formatQuery register unregister # Useful, but not exported: data size status code } # http::Log -- # # Debugging output -- define this to observe HTTP/1.1 socket usage. # Should echo any args received. # # Arguments: # msg Message to output # if {[info command http::Log] eq {}} {proc http::Log {args} {}} # http::register -- # # See documentation for details. # # Arguments: # proto URL protocol prefix, e.g. https # port Default port for protocol # command Command to use to create socket # Results: # list of port and command that was registered. proc http::register {proto port command} { variable urlTypes set urlTypes([string tolower $proto]) [list $port $command] } # http::unregister -- # # Unregisters URL protocol handler # # Arguments: # proto URL protocol prefix, e.g. https # Results: # list of port and command that was unregistered. proc http::unregister {proto} { variable urlTypes set lower [string tolower $proto] if {![info exists urlTypes($lower)]} { return -code error "unsupported url type \"$proto\"" } set old $urlTypes($lower) unset urlTypes($lower) return $old } # http::config -- # # See documentation for details. # # Arguments: # args Options parsed by the procedure. # Results: # TODO proc http::config {args} { variable http set options [lsort [array names http -*]] set usage [join $options ", "] if {[llength $args] == 0} { set result {} foreach name $options { lappend result $name $http($name) } return $result } set options [string map {- ""} $options] set pat ^-(?:[join $options |])$ if {[llength $args] == 1} { set flag [lindex $args 0] if {![regexp -- $pat $flag]} { return -code error "Unknown option $flag, must be: $usage" } return $http($flag) } else { foreach {flag value} $args { if {![regexp -- $pat $flag]} { return -code error "Unknown option $flag, must be: $usage" } set http($flag) $value } } } # http::Finish -- # # Clean up the socket and eval close time callbacks # # Arguments: # token Connection token. # errormsg (optional) If set, forces status to error. # skipCB (optional) If set, don't call the -command callback. This # is useful when geturl wants to throw an exception instead # of calling the callback. That way, the same error isn't # reported to two places. # # Side Effects: # Closes the socket proc http::Finish {token {errormsg ""} {skipCB 0}} { variable $token upvar 0 $token state global errorInfo errorCode if {$errormsg ne ""} { set state(error) [list $errormsg $errorInfo $errorCode] set state(status) "error" } if { ($state(status) eq "timeout") || ($state(status) eq "error") || ([info exists state(connection)] && ($state(connection) eq "close")) } { CloseSocket $state(sock) $token } if {[info exists state(after)]} { after cancel $state(after) } if {[info exists state(-command)] && !$skipCB && ![info exists state(done-command-cb)]} { set state(done-command-cb) yes if {[catch {eval $state(-command) {$token}} err] && $errormsg eq ""} { set state(error) [list $err $errorInfo $errorCode] set state(status) error } } } # http::CloseSocket - # # Close a socket and remove it from the persistent sockets table. If # possible an http token is included here but when we are called from a # fileevent on remote closure we need to find the correct entry - hence # the second section. proc ::http::CloseSocket {s {token {}}} { variable socketmap catch {fileevent $s readable {}} set conn_id {} if {$token ne ""} { variable $token upvar 0 $token state if {[info exists state(socketinfo)]} { set conn_id $state(socketinfo) } } else { set map [array get socketmap] set ndx [lsearch -exact $map $s] if {$ndx != -1} { incr ndx -1 set conn_id [lindex $map $ndx] } } if {$conn_id eq {} || ![info exists socketmap($conn_id)]} { Log "Closing socket $s (no connection info)" if {[catch {close $s} err]} { Log "Error: $err" } } else { if {[info exists socketmap($conn_id)]} { Log "Closing connection $conn_id (sock $socketmap($conn_id))" if {[catch {close $socketmap($conn_id)} err]} { Log "Error: $err" } unset socketmap($conn_id) } else { Log "Cannot close connection $conn_id - no socket in socket map" } } } # http::reset -- # # See documentation for details. # # Arguments: # token Connection token. # why Status info. # # Side Effects: # See Finish proc http::reset {token {why reset}} { variable $token upvar 0 $token state set state(status) $why catch {fileevent $state(sock) readable {}} catch {fileevent $state(sock) writable {}} Finish $token if {[info exists state(error)]} { set errorlist $state(error) unset state eval ::error $errorlist } } # http::geturl -- # # Establishes a connection to a remote url via http. # # Arguments: # url The http URL to goget. # args Option value pairs. Valid options include: # -blocksize, -validate, -headers, -timeout # Results: # Returns a token for this connection. This token is the name of an # array that the caller should unset to garbage collect the state. proc http::geturl {url args} { variable http variable urlTypes variable defaultCharset variable defaultKeepalive variable strict # Initialize the state variable, an array. We'll return the name of this # array as the token for the transaction. if {![info exists http(uid)]} { set http(uid) 0 } set token [namespace current]::[incr http(uid)] variable $token upvar 0 $token state reset $token # Process command options. array set state { -binary false -blocksize 8192 -queryblocksize 8192 -validate 0 -headers {} -timeout 0 -type application/x-www-form-urlencoded -queryprogress {} -protocol 1.1 binary 0 state connecting meta {} coding {} currentsize 0 totalsize 0 querylength 0 queryoffset 0 type text/html body {} status "" http "" connection close } set state(-keepalive) $defaultKeepalive set state(-strict) $strict # These flags have their types verified [Bug 811170] array set type { -binary boolean -blocksize integer -queryblocksize integer -strict boolean -timeout integer -validate boolean } set state(charset) $defaultCharset set options { -binary -blocksize -channel -command -handler -headers -keepalive -method -myaddr -progress -protocol -query -queryblocksize -querychannel -queryprogress -strict -timeout -type -validate } set usage [join [lsort $options] ", "] set options [string map {- ""} $options] set pat ^-(?:[join $options |])$ foreach {flag value} $args { if {[regexp -- $pat $flag]} { # Validate numbers if { [info exists type($flag)] && ![string is $type($flag) -strict $value] } { unset $token return -code error \ "Bad value for $flag ($value), must be $type($flag)" } set state($flag) $value } else { unset $token return -code error "Unknown option $flag, can be: $usage" } } # Make sure -query and -querychannel aren't both specified set isQueryChannel [info exists state(-querychannel)] set isQuery [info exists state(-query)] if {$isQuery && $isQueryChannel} { unset $token return -code error "Can't combine -query and -querychannel options!" } # Validate URL, determine the server host and port, and check proxy case # Recognize user:pass@host URLs also, although we do not do anything with # that info yet. # URLs have basically four parts. # First, before the colon, is the protocol scheme (e.g. http) # Second, for HTTP-like protocols, is the authority # The authority is preceded by // and lasts up to (but not including) # the following / or ? and it identifies up to four parts, of which # only one, the host, is required (if an authority is present at all). # All other parts of the authority (user name, password, port number) # are optional. # Third is the resource name, which is split into two parts at a ? # The first part (from the single "/" up to "?") is the path, and the # second part (from that "?" up to "#") is the query. *HOWEVER*, we do # not need to separate them; we send the whole lot to the server. # Both, path and query are allowed to be missing, including their # delimiting character. # Fourth is the fragment identifier, which is everything after the first # "#" in the URL. The fragment identifier MUST NOT be sent to the server # and indeed, we don't bother to validate it (it could be an error to # pass it in here, but it's cheap to strip). # # An example of a URL that has all the parts: # # http://jschmoe:xyzzy@www.bogus.net:8000/foo/bar.tml?q=foo#changes # # The "http" is the protocol, the user is "jschmoe", the password is # "xyzzy", the host is "www.bogus.net", the port is "8000", the path is # "/foo/bar.tml", the query is "q=foo", and the fragment is "changes". # # Note that the RE actually combines the user and password parts, as # recommended in RFC 3986. Indeed, that RFC states that putting passwords # in URLs is a Really Bad Idea, something with which I would agree utterly. # # From a validation perspective, we need to ensure that the parts of the # URL that are going to the server are correctly encoded. This is only # done if $state(-strict) is true (inherited from $::http::strict). set URLmatcher {(?x) # this is _expanded_ syntax ^ (?: (\w+) : ) ? # (?: // (?: ( [^@/\#?]+ # ) @ )? ( # [^/:\#?]+ | # host name or IPv4 address \[ [^/\#?]+ \] # IPv6 address in square brackets ) (?: : (\d+) )? # )? ( [/\?] [^\#]*)? # (including query) (?: \# (.*) )? # $ } # Phase one: parse if {![regexp -- $URLmatcher $url -> proto user host port srvurl]} { unset $token return -code error "Unsupported URL: $url" } # Phase two: validate set host [string trim $host {[]}]; # strip square brackets from IPv6 address if {$host eq ""} { # Caller has to provide a host name; we do not have a "default host" # that would enable us to handle relative URLs. unset $token return -code error "Missing host part: $url" # Note that we don't check the hostname for validity here; if it's # invalid, we'll simply fail to resolve it later on. } if {$port ne "" && $port > 65535} { unset $token return -code error "Invalid port number: $port" } # The user identification and resource identification parts of the URL can # have encoded characters in them; take care! if {$user ne ""} { # Check for validity according to RFC 3986, Appendix A set validityRE {(?xi) ^ (?: [-\w.~!$&'()*+,;=:] | %[0-9a-f][0-9a-f] )+ $ } if {$state(-strict) && ![regexp -- $validityRE $user]} { unset $token # Provide a better error message in this error case if {[regexp {(?i)%(?![0-9a-f][0-9a-f]).?.?} $user bad]} { return -code error \ "Illegal encoding character usage \"$bad\" in URL user" } return -code error "Illegal characters in URL user" } } if {$srvurl ne ""} { # RFC 3986 allows empty paths (not even a /), but servers # return 400 if the path in the HTTP request doesn't start # with / , so add it here if needed. if {[string index $srvurl 0] ne "/"} { set srvurl /$srvurl } # Check for validity according to RFC 3986, Appendix A set validityRE {(?xi) ^ # Path part (already must start with / character) (?: [-\w.~!$&'()*+,;=:@/] | %[0-9a-f][0-9a-f] )* # Query part (optional, permits ? characters) (?: \? (?: [-\w.~!$&'()*+,;=:@/?] | %[0-9a-f][0-9a-f] )* )? $ } if {$state(-strict) && ![regexp -- $validityRE $srvurl]} { unset $token # Provide a better error message in this error case if {[regexp {(?i)%(?![0-9a-f][0-9a-f])..} $srvurl bad]} { return -code error \ "Illegal encoding character usage \"$bad\" in URL path" } return -code error "Illegal characters in URL path" } } else { set srvurl / } if {$proto eq ""} { set proto http } set lower [string tolower $proto] if {![info exists urlTypes($lower)]} { unset $token return -code error "Unsupported URL type \"$proto\"" } set defport [lindex $urlTypes($lower) 0] set defcmd [lindex $urlTypes($lower) 1] if {$port eq ""} { set port $defport } if {![catch {$http(-proxyfilter) $host} proxy]} { set phost [lindex $proxy 0] set pport [lindex $proxy 1] } # OK, now reassemble into a full URL set url ${proto}:// if {$user ne ""} { append url $user append url @ } append url $host if {$port != $defport} { append url : $port } append url $srvurl # Don't append the fragment! set state(url) $url # If a timeout is specified we set up the after event and arrange for an # asynchronous socket connection. set sockopts [list -async] if {$state(-timeout) > 0} { set state(after) [after $state(-timeout) \ [list http::reset $token timeout]] } # If we are using the proxy, we must pass in the full URL that includes # the server name. if {[info exists phost] && ($phost ne "")} { set srvurl $url set targetAddr [list $phost $pport] } else { set targetAddr [list $host $port] } # Proxy connections aren't shared among different hosts. set state(socketinfo) $host:$port # See if we are supposed to use a previously opened channel. if {$state(-keepalive)} { variable socketmap if {[info exists socketmap($state(socketinfo))]} { if {[catch {fconfigure $socketmap($state(socketinfo))}]} { Log "WARNING: socket for $state(socketinfo) was closed" unset socketmap($state(socketinfo)) } else { set sock $socketmap($state(socketinfo)) Log "reusing socket $sock for $state(socketinfo)" catch {fileevent $sock writable {}} catch {fileevent $sock readable {}} } } # don't automatically close this connection socket set state(connection) {} } if {![info exists sock]} { # Pass -myaddr directly to the socket command if {[info exists state(-myaddr)]} { lappend sockopts -myaddr $state(-myaddr) } if {[catch {eval $defcmd $sockopts $targetAddr} sock]} { # something went wrong while trying to establish the connection. # Clean up after events and such, but DON'T call the command # callback (if available) because we're going to throw an # exception from here instead. set state(sock) $sock Finish $token "" 1 cleanup $token return -code error $sock } } set state(sock) $sock Log "Using $sock for $state(socketinfo)" \ [expr {$state(-keepalive)?"keepalive":""}] if {$state(-keepalive)} { set socketmap($state(socketinfo)) $sock } if {![info exists phost]} { set phost "" } fileevent $sock writable [list http::Connect $token $proto $phost $srvurl] # Wait for the connection to complete. if {![info exists state(-command)]} { # geturl does EVERYTHING asynchronously, so if the user # calls it synchronously, we just do a wait here. http::wait $token if {![info exists state]} { # If we timed out then Finish has been called and the users # command callback may have cleaned up the token. If so we end up # here with nothing left to do. return $token } elseif {$state(status) eq "error"} { # Something went wrong while trying to establish the connection. # Clean up after events and such, but DON'T call the command # callback (if available) because we're going to throw an # exception from here instead. set err [lindex $state(error) 0] cleanup $token return -code error $err } } return $token } proc http::Connected { token proto phost srvurl} { variable http variable urlTypes variable $token upvar 0 $token state # Set back the variables needed here set sock $state(sock) set isQueryChannel [info exists state(-querychannel)] set isQuery [info exists state(-query)] set host [lindex [split $state(socketinfo) :] 0] set port [lindex [split $state(socketinfo) :] 1] set lower [string tolower $proto] set defport [lindex $urlTypes($lower) 0] # Send data in cr-lf format, but accept any line terminators fconfigure $sock -translation {auto crlf} -buffersize $state(-blocksize) # The following is disallowed in safe interpreters, but the socket is # already in non-blocking mode in that case. catch {fconfigure $sock -blocking off} set how GET if {$isQuery} { set state(querylength) [string length $state(-query)] if {$state(querylength) > 0} { set how POST set contDone 0 } else { # There's no query data. unset state(-query) set isQuery 0 } } elseif {$state(-validate)} { set how HEAD } elseif {$isQueryChannel} { set how POST # The query channel must be blocking for the async Write to # work properly. fconfigure $state(-querychannel) -blocking 1 -translation binary set contDone 0 } if {[info exists state(-method)] && $state(-method) ne ""} { set how $state(-method) } # We cannot handle chunked encodings with -handler, so force HTTP/1.0 # until we can manage this. if {[info exists state(-handler)]} { set state(-protocol) 1.0 } if {[catch { puts $sock "$how $srvurl HTTP/$state(-protocol)" puts $sock "Accept: $http(-accept)" array set hdrs $state(-headers) if {[info exists hdrs(Host)]} { # Allow Host spoofing. [Bug 928154] puts $sock "Host: $hdrs(Host)" } elseif {$port == $defport} { # Don't add port in this case, to handle broken servers. [Bug # #504508] puts $sock "Host: $host" } else { puts $sock "Host: $host:$port" } unset hdrs puts $sock "User-Agent: $http(-useragent)" if {$state(-protocol) == 1.0 && $state(-keepalive)} { puts $sock "Connection: keep-alive" } if {$state(-protocol) > 1.0 && !$state(-keepalive)} { puts $sock "Connection: close" ;# RFC2616 sec 8.1.2.1 } if {[info exists phost] && ($phost ne "") && $state(-keepalive)} { puts $sock "Proxy-Connection: Keep-Alive" } set accept_encoding_seen 0 set content_type_seen 0 foreach {key value} $state(-headers) { if {[string equal -nocase $key "host"]} { continue } if {[string equal -nocase $key "accept-encoding"]} { set accept_encoding_seen 1 } if {[string equal -nocase $key "content-type"]} { set content_type_seen 1 } set value [string map [list \n "" \r ""] $value] set key [string trim $key] if {[string equal -nocase $key "content-length"]} { set contDone 1 set state(querylength) $value } if {[string length $key]} { puts $sock "$key: $value" } } if {!$accept_encoding_seen && ![info exists state(-handler)]} { puts $sock "Accept-Encoding: deflate,gzip,compress" } if {$isQueryChannel && $state(querylength) == 0} { # Try to determine size of data in channel. If we cannot seek, the # surrounding catch will trap us set start [tell $state(-querychannel)] seek $state(-querychannel) 0 end set state(querylength) \ [expr {[tell $state(-querychannel)] - $start}] seek $state(-querychannel) $start } # Flush the request header and set up the fileevent that will either # push the POST data or read the response. # # fileevent note: # # It is possible to have both the read and write fileevents active at # this point. The only scenario it seems to affect is a server that # closes the connection without reading the POST data. (e.g., early # versions TclHttpd in various error cases). Depending on the # platform, the client may or may not be able to get the response from # the server because of the error it will get trying to write the post # data. Having both fileevents active changes the timing and the # behavior, but no two platforms (among Solaris, Linux, and NT) behave # the same, and none behave all that well in any case. Servers should # always read their POST data if they expect the client to read their # response. if {$isQuery || $isQueryChannel} { if {!$content_type_seen} { puts $sock "Content-Type: $state(-type)" } if {!$contDone} { puts $sock "Content-Length: $state(querylength)" } puts $sock "" fconfigure $sock -translation {auto binary} fileevent $sock writable [list http::Write $token] } else { puts $sock "" flush $sock fileevent $sock readable [list http::Event $sock $token] } } err]} { # The socket probably was never connected, or the connection dropped # later. # if state(status) is error, it means someone's already called Finish # to do the above-described clean up. if {$state(status) ne "error"} { Finish $token $err } } } # Data access functions: # Data - the URL data # Status - the transaction status: ok, reset, eof, timeout # Code - the HTTP transaction code, e.g., 200 # Size - the size of the URL data proc http::data {token} { variable $token upvar 0 $token state return $state(body) } proc http::status {token} { if {![info exists $token]} { return "error" } variable $token upvar 0 $token state return $state(status) } proc http::code {token} { variable $token upvar 0 $token state return $state(http) } proc http::ncode {token} { variable $token upvar 0 $token state if {[regexp {[0-9]{3}} $state(http) numeric_code]} { return $numeric_code } else { return $state(http) } } proc http::size {token} { variable $token upvar 0 $token state return $state(currentsize) } proc http::meta {token} { variable $token upvar 0 $token state return $state(meta) } proc http::error {token} { variable $token upvar 0 $token state if {[info exists state(error)]} { return $state(error) } return "" } # http::cleanup # # Garbage collect the state associated with a transaction # # Arguments # token The token returned from http::geturl # # Side Effects # unsets the state array proc http::cleanup {token} { variable $token upvar 0 $token state if {[info exists state]} { unset state } } # http::Connect # # This callback is made when an asyncronous connection completes. # # Arguments # token The token returned from http::geturl # # Side Effects # Sets the status of the connection, which unblocks # the waiting geturl call proc http::Connect {token proto phost srvurl} { variable $token upvar 0 $token state set err "due to unexpected EOF" if { [eof $state(sock)] || [set err [fconfigure $state(sock) -error]] ne "" } { Finish $token "connect failed $err" } else { fileevent $state(sock) writable {} ::http::Connected $token $proto $phost $srvurl } return } # http::Write # # Write POST query data to the socket # # Arguments # token The token for the connection # # Side Effects # Write the socket and handle callbacks. proc http::Write {token} { variable $token upvar 0 $token state set sock $state(sock) # Output a block. Tcl will buffer this if the socket blocks set done 0 if {[catch { # Catch I/O errors on dead sockets if {[info exists state(-query)]} { # Chop up large query strings so queryprogress callback can give # smooth feedback. puts -nonewline $sock \ [string range $state(-query) $state(queryoffset) \ [expr {$state(queryoffset) + $state(-queryblocksize) - 1}]] incr state(queryoffset) $state(-queryblocksize) if {$state(queryoffset) >= $state(querylength)} { set state(queryoffset) $state(querylength) set done 1 } } else { # Copy blocks from the query channel set outStr [read $state(-querychannel) $state(-queryblocksize)] puts -nonewline $sock $outStr incr state(queryoffset) [string length $outStr] if {[eof $state(-querychannel)]} { set done 1 } } } err]} { # Do not call Finish here, but instead let the read half of the socket # process whatever server reply there is to get. set state(posterror) $err set done 1 } if {$done} { catch {flush $sock} fileevent $sock writable {} fileevent $sock readable [list http::Event $sock $token] } # Callback to the client after we've completely handled everything. if {[string length $state(-queryprogress)]} { eval $state(-queryprogress) \ [list $token $state(querylength) $state(queryoffset)] } } # http::Event # # Handle input on the socket # # Arguments # sock The socket receiving input. # token The token returned from http::geturl # # Side Effects # Read the socket and handle callbacks. proc http::Event {sock token} { variable $token upvar 0 $token state if {![info exists state]} { Log "Event $sock with invalid token '$token' - remote close?" if {![eof $sock]} { if {[set d [read $sock]] ne ""} { Log "WARNING: additional data left on closed socket" } } CloseSocket $sock return } if {$state(state) eq "connecting"} { if {[catch {gets $sock state(http)} n]} { return [Finish $token $n] } elseif {$n >= 0} { set state(state) "header" } } elseif {$state(state) eq "header"} { if {[catch {gets $sock line} n]} { return [Finish $token $n] } elseif {$n == 0} { # We have now read all headers # We ignore HTTP/1.1 100 Continue returns. RFC2616 sec 8.2.3 if {$state(http) == "" || ([regexp {^\S+\s(\d+)} $state(http) {} x] && $x == 100)} { return } set state(state) body # If doing a HEAD, then we won't get any body if {$state(-validate)} { Eof $token return } # For non-chunked transfer we may have no body - in this case we # may get no further file event if the connection doesn't close # and no more data is sent. We can tell and must finish up now - # not later. if { !(([info exists state(connection)] && ($state(connection) eq "close")) || [info exists state(transfer)]) && ($state(totalsize) == 0) } { Log "body size is 0 and no events likely - complete." Eof $token return } # We have to use binary translation to count bytes properly. fconfigure $sock -translation binary if { $state(-binary) || ![string match -nocase text* $state(type)] } { # Turn off conversions for non-text data set state(binary) 1 } if {[info exists state(-channel)]} { if {$state(binary) || [llength [ContentEncoding $token]]} { fconfigure $state(-channel) -translation binary } if {![info exists state(-handler)]} { # Initiate a sequence of background fcopies fileevent $sock readable {} CopyStart $sock $token return } } } elseif {$n > 0} { # Process header lines if {[regexp -nocase {^([^:]+):(.+)$} $line x key value]} { switch -- [string tolower $key] { content-type { set state(type) [string trim [string tolower $value]] # grab the optional charset information if {[regexp -nocase \ {charset\s*=\s*\"((?:[^""]|\\\")*)\"} \ $state(type) -> cs]} { set state(charset) [string map {{\"} \"} $cs] } else { regexp -nocase {charset\s*=\s*(\S+?);?} \ $state(type) -> state(charset) } } content-length { set state(totalsize) [string trim $value] } content-encoding { set state(coding) [string trim $value] } transfer-encoding { set state(transfer) \ [string trim [string tolower $value]] } proxy-connection - connection { set state(connection) \ [string trim [string tolower $value]] } } lappend state(meta) $key [string trim $value] } } } else { # Now reading body if {[catch { if {[info exists state(-handler)]} { set n [eval $state(-handler) [list $sock $token]] } elseif {[info exists state(transfer_final)]} { set line [getTextLine $sock] set n [string length $line] if {$n > 0} { Log "found $n bytes following final chunk" append state(transfer_final) $line } else { Log "final chunk part" Eof $token } } elseif { [info exists state(transfer)] && $state(transfer) eq "chunked" } { set size 0 set chunk [getTextLine $sock] set n [string length $chunk] if {[string trim $chunk] ne ""} { scan $chunk %x size if {$size != 0} { set bl [fconfigure $sock -blocking] fconfigure $sock -blocking 1 set chunk [read $sock $size] fconfigure $sock -blocking $bl set n [string length $chunk] if {$n >= 0} { append state(body) $chunk } if {$size != [string length $chunk]} { Log "WARNING: mis-sized chunk:\ was [string length $chunk], should be $size" } getTextLine $sock } else { set state(transfer_final) {} } } } else { #Log "read non-chunk $state(currentsize) of $state(totalsize)" set block [read $sock $state(-blocksize)] set n [string length $block] if {$n >= 0} { append state(body) $block } } if {[info exists state]} { if {$n >= 0} { incr state(currentsize) $n } # If Content-Length - check for end of data. if { ($state(totalsize) > 0) && ($state(currentsize) >= $state(totalsize)) } { Eof $token } } } err]} { return [Finish $token $err] } else { if {[info exists state(-progress)]} { eval $state(-progress) \ [list $token $state(totalsize) $state(currentsize)] } } } # catch as an Eof above may have closed the socket already if {![catch {eof $sock} eof] && $eof} { if {[info exists $token]} { set state(connection) close Eof $token } else { # open connection closed on a token that has been cleaned up. CloseSocket $sock } return } } # http::getTextLine -- # # Get one line with the stream in blocking crlf mode # # Arguments # sock The socket receiving input. # # Results: # The line of text, without trailing newline proc http::getTextLine {sock} { set tr [fconfigure $sock -translation] set bl [fconfigure $sock -blocking] fconfigure $sock -translation crlf -blocking 1 set r [gets $sock] fconfigure $sock -translation $tr -blocking $bl return $r } # http::CopyStart # # Error handling wrapper around fcopy # # Arguments # sock The socket to copy from # token The token returned from http::geturl # # Side Effects # This closes the connection upon error proc http::CopyStart {sock token {initial 1}} { upvar #0 $token state if {[info exists state(transfer)] && $state(transfer) eq "chunked"} { foreach coding [ContentEncoding $token] { lappend state(zlib) [zlib stream $coding] } make-transformation-chunked $sock [namespace code [list CopyChunk $token]] } else { if {$initial} { foreach coding [ContentEncoding $token] { zlib push $coding $sock } } if {[catch { fcopy $sock $state(-channel) -size $state(-blocksize) -command \ [list http::CopyDone $token] } err]} { Finish $token $err } } } proc http::CopyChunk {token chunk} { upvar 0 $token state if {[set count [string length $chunk]]} { incr state(currentsize) $count if {[info exists state(zlib)]} { foreach stream $state(zlib) { set chunk [$stream add $chunk] } } puts -nonewline $state(-channel) $chunk if {[info exists state(-progress)]} { eval [linsert $state(-progress) end \ $token $state(totalsize) $state(currentsize)] } } else { Log "CopyChunk Finish $token" if {[info exists state(zlib)]} { set excess "" foreach stream $state(zlib) { catch {set excess [$stream add -finalize $excess]} } puts -nonewline $state(-channel) $excess foreach stream $state(zlib) { $stream close } unset state(zlib) } Eof $token ;# FIX ME: pipelining. } } # http::CopyDone # # fcopy completion callback # # Arguments # token The token returned from http::geturl # count The amount transfered # # Side Effects # Invokes callbacks proc http::CopyDone {token count {error {}}} { variable $token upvar 0 $token state set sock $state(sock) incr state(currentsize) $count if {[info exists state(-progress)]} { eval $state(-progress) \ [list $token $state(totalsize) $state(currentsize)] } # At this point the token may have been reset if {[string length $error]} { Finish $token $error } elseif {[catch {eof $sock} iseof] || $iseof} { Eof $token } else { CopyStart $sock $token 0 } } # http::Eof # # Handle eof on the socket # # Arguments # token The token returned from http::geturl # # Side Effects # Clean up the socket proc http::Eof {token {force 0}} { variable $token upvar 0 $token state if {$state(state) eq "header"} { # Premature eof set state(status) eof } else { set state(status) ok } if {[string length $state(body)] > 0} { if {[catch { foreach coding [ContentEncoding $token] { set state(body) [zlib $coding $state(body)] } } err]} { Log "error doing $coding '$state(body)'" return [Finish $token $err] } if {!$state(binary)} { # If we are getting text, set the incoming channel's encoding # correctly. iso8859-1 is the RFC default, but this could be any IANA # charset. However, we only know how to convert what we have # encodings for. set enc [CharsetToEncoding $state(charset)] if {$enc ne "binary"} { set state(body) [encoding convertfrom $enc $state(body)] } # Translate text line endings. set state(body) [string map {\r\n \n \r \n} $state(body)] } } Finish $token } # http::wait -- # # See documentation for details. # # Arguments: # token Connection token. # # Results: # The status after the wait. proc http::wait {token} { variable $token upvar 0 $token state if {![info exists state(status)] || $state(status) eq ""} { # We must wait on the original variable name, not the upvar alias vwait ${token}(status) } return [status $token] } # http::formatQuery -- # # See documentation for details. Call http::formatQuery with an even # number of arguments, where the first is a name, the second is a value, # the third is another name, and so on. # # Arguments: # args A list of name-value pairs. # # Results: # TODO proc http::formatQuery {args} { set result "" set sep "" foreach i $args { append result $sep [mapReply $i] if {$sep eq "="} { set sep & } else { set sep = } } return $result } # http::mapReply -- # # Do x-www-urlencoded character mapping # # Arguments: # string The string the needs to be encoded # # Results: # The encoded string proc http::mapReply {string} { variable http variable formMap # The spec says: "non-alphanumeric characters are replaced by '%HH'". Use # a pre-computed map and [string map] to do the conversion (much faster # than [regsub]/[subst]). [Bug 1020491] if {$http(-urlencoding) ne ""} { set string [encoding convertto $http(-urlencoding) $string] return [string map $formMap $string] } set converted [string map $formMap $string] if {[string match "*\[\u0100-\uffff\]*" $converted]} { regexp "\[\u0100-\uffff\]" $converted badChar # Return this error message for maximum compatability... :^/ return -code error \ "can't read \"formMap($badChar)\": no such element in array" } return $converted } # http::ProxyRequired -- # Default proxy filter. # # Arguments: # host The destination host # # Results: # The current proxy settings proc http::ProxyRequired {host} { variable http if {[info exists http(-proxyhost)] && [string length $http(-proxyhost)]} { if { ![info exists http(-proxyport)] || ![string length $http(-proxyport)] } { set http(-proxyport) 8080 } return [list $http(-proxyhost) $http(-proxyport)] } } # http::CharsetToEncoding -- # # Tries to map a given IANA charset to a tcl encoding. If no encoding # can be found, returns binary. # proc http::CharsetToEncoding {charset} { variable encodings set charset [string tolower $charset] if {[regexp {iso-?8859-([0-9]+)} $charset -> num]} { set encoding "iso8859-$num" } elseif {[regexp {iso-?2022-(jp|kr)} $charset -> ext]} { set encoding "iso2022-$ext" } elseif {[regexp {shift[-_]?js} $charset]} { set encoding "shiftjis" } elseif {[regexp {(?:windows|cp)-?([0-9]+)} $charset -> num]} { set encoding "cp$num" } elseif {$charset eq "us-ascii"} { set encoding "ascii" } elseif {[regexp {(?:iso-?)?lat(?:in)?-?([0-9]+)} $charset -> num]} { switch -- $num { 5 {set encoding "iso8859-9"} 1 - 2 - 3 { set encoding "iso8859-$num" } } } else { # other charset, like euc-xx, utf-8,... may directly map to encoding set encoding $charset } set idx [lsearch -exact $encodings $encoding] if {$idx >= 0} { return $encoding } else { return "binary" } } # Return the list of content-encoding transformations we need to do in order. proc http::ContentEncoding {token} { upvar 0 $token state set r {} if {[info exists state(coding)]} { foreach coding [split $state(coding) ,] { switch -exact -- $coding { deflate { lappend r inflate } gzip - x-gzip { lappend r gunzip } compress - x-compress { lappend r decompress } identity {} default { return -code error "unsupported content-encoding \"$coding\"" } } } } return $r } proc http::make-transformation-chunked {chan command} { set lambda {{chan command} { set data "" set size -1 yield while {1} { chan configure $chan -translation {crlf binary} while {[gets $chan line] < 1} { yield } chan configure $chan -translation {binary binary} if {[scan $line %x size] != 1} { return -code error "invalid size: \"$line\"" } set chunk "" while {$size && ![chan eof $chan]} { set part [chan read $chan $size] incr size -[string length $part] append chunk $part } if {[catch { uplevel #0 [linsert $command end $chunk] }]} { http::Log "Error in callback: $::errorInfo" } if {[string length $chunk] == 0} { # channel might have been closed in the callback catch {chan event $chan readable {}} return } } }} coroutine dechunk$chan ::apply $lambda $chan $command chan event $chan readable [namespace origin dechunk$chan] return } # Local variables: # indent-tabs-mode: t # End: