diff options
author | hobbs <hobbs> | 2001-09-07 00:03:08 (GMT) |
---|---|---|
committer | hobbs <hobbs> | 2001-09-07 00:03:08 (GMT) |
commit | 5fc06a7332a17e21c98691fdd8a3c9eaf82354ee (patch) | |
tree | 47368b22e97863dbd5d532cb02c96ce84b1a8df3 /library/http/http.tcl | |
parent | 56b85d53e89fe89f7ea4416c45677e8f4630bc4e (diff) | |
download | tcl-5fc06a7332a17e21c98691fdd8a3c9eaf82354ee.zip tcl-5fc06a7332a17e21c98691fdd8a3c9eaf82354ee.tar.gz tcl-5fc06a7332a17e21c98691fdd8a3c9eaf82354ee.tar.bz2 |
* doc/http.n: noted -binary, charset and coding state keys.
* tests/http.test:
* library/http/pkgIndex.tcl:
* library/http/http.tcl (geturl): correctly get charset parameter
and convert text according to specified encoding (if known). RFC
iso8859-1 is used by default. Also recognize Content-encoding to
see if we should do binary translation. Added a CYA -binary
switch for the cases that were missed. [Bug #219211 #219399]
Diffstat (limited to 'library/http/http.tcl')
-rw-r--r-- | library/http/http.tcl | 87 |
1 files changed, 56 insertions, 31 deletions
diff --git a/library/http/http.tcl b/library/http/http.tcl index fa5bd8f..314afd1 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -9,7 +9,7 @@ # See the file "license.terms" for information on usage and # redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: http.tcl,v 1.37 2001/08/21 01:09:13 hobbs Exp $ +# RCS: @(#) $Id: http.tcl,v 1.38 2001/09/07 00:03:08 hobbs Exp $ # Rough version history: # 1.0 Old http_get interface @@ -21,7 +21,8 @@ # "ioerror" status in favor of raising an error package require Tcl 8.2 -package provide http 2.3.2 +# keep this in sink with pkgIndex.tcl +package provide http 2.3.3 namespace eval http { variable http @@ -33,26 +34,29 @@ namespace eval http { } set http(-useragent) "Tcl http client package [package provide http]" - variable formMap - variable alphanumeric a-zA-Z0-9 - variable c - variable i 0 - for {} {$i <= 256} {incr i} { - set c [format %c $i] - if {![string match \[$alphanumeric\] $c]} { - set formMap($c) %[format %.2x $i] + proc init {} { + variable formMap + variable alphanumeric a-zA-Z0-9 + for {set i 0} {$i <= 256} {incr i} { + set c [format %c $i] + if {![string match \[$alphanumeric\] $c]} { + set formMap($c) %[format %.2x $i] + } } + # These are handled specially + array set formMap { " " + \n %0d%0a } } - # These are handled specially - array set formMap { - " " + \n %0d%0a - } + init variable urlTypes array set urlTypes { http {80 ::socket} } + variable encodings [string tolower [encoding names]] + # This can be changed, but iso8859-1 is the RFC standard. + variable defaultCharset "iso8859-1" + namespace export geturl config reset wait formatQuery register unregister # Useful, but not exported: data size status code } @@ -212,6 +216,7 @@ proc http::reset { token {why reset} } { proc http::geturl { url args } { variable http variable urlTypes + variable defaultCharset # Initialize the state variable, an array. We'll return the # name of this array as the token for the transaction. @@ -227,6 +232,7 @@ proc http::geturl { url args } { # Process command options. array set state { + -binary false -blocksize 8192 -queryblocksize 8192 -validate 0 @@ -236,6 +242,7 @@ proc http::geturl { url args } { -queryprogress {} state header meta {} + coding {} currentsize 0 totalsize 0 querylength 0 @@ -245,7 +252,8 @@ proc http::geturl { url args } { status "" http "" } - set options {-blocksize -channel -command -handler -headers \ + set state(charset) $defaultCharset + set options {-binary -blocksize -channel -command -handler -headers \ -progress -query -queryblocksize -querychannel -queryprogress\ -validate -timeout -type} set usage [join $options ", "] @@ -646,7 +654,7 @@ proc http::Write {token} { # Side Effects # Read the socket and handle callbacks. - proc http::Event {token} { +proc http::Event {token} { variable $token upvar 0 $token state set s $state(sock) @@ -659,15 +667,27 @@ proc http::Write {token} { if {[catch {gets $s line} n]} { Finish $token $n } elseif {$n == 0} { + variable encodings set state(state) body - if {![regexp -nocase ^text $state(type)]} { + if {$state(-binary) || ![regexp -nocase ^text $state(type)] || \ + [regexp gzip|compress $state(coding)]} { # Turn off conversions for non-text data fconfigure $s -translation binary if {[info exists state(-channel)]} { fconfigure $state(-channel) -translation binary } + } else { + # If we are getting text, set the incoming channel's + # encoding correctly. iso8859-1 is the RFC default, but + # this could be any IANA charset. However, we only know + # how to convert what we have encodings for. + set idx [lsearch -exact $encodings \ + [string tolower $state(charset)]] + if {$idx >= 0} { + fconfigure $s -encoding [lindex $encodings $idx] + } } - if {[info exists state(-channel)] && + if {[info exists state(-channel)] && \ ![info exists state(-handler)]} { # Initiate a sequence of background fcopies fileevent $s readable {} @@ -676,10 +696,15 @@ proc http::Write {token} { } elseif {$n > 0} { if {[regexp -nocase {^content-type:(.+)$} $line x type]} { set state(type) [string trim $type] + # grab the optional charset information + regexp -nocase {charset\s*=\s*(\S+)} $type x state(charset) } if {[regexp -nocase {^content-length:(.+)$} $line x length]} { set state(totalsize) [string trim $length] } + if {[regexp -nocase {^content-encoding:(.+)$} $line x coding]} { + set state(coding) [string trim $coding] + } if {[regexp -nocase {^([^:]+):(.+)$} $line x key value]} { lappend state(meta) $key [string trim $value] } elseif {[regexp ^HTTP $line]} { @@ -704,7 +729,8 @@ proc http::Write {token} { Finish $token $err } else { if {[info exists state(-progress)]} { - eval $state(-progress) {$token $state(totalsize) $state(currentsize)} + eval $state(-progress) \ + {$token $state(totalsize) $state(currentsize)} } } } @@ -721,7 +747,7 @@ proc http::Write {token} { # Side Effects # This closes the connection upon error - proc http::CopyStart {s token} { +proc http::CopyStart {s token} { variable $token upvar 0 $token state if {[catch { @@ -743,7 +769,7 @@ proc http::Write {token} { # Side Effects # Invokes callbacks - proc http::CopyDone {token count {error {}}} { +proc http::CopyDone {token count {error {}}} { variable $token upvar 0 $token state set s $state(sock) @@ -771,7 +797,7 @@ proc http::Write {token} { # Side Effects # Clean up the socket - proc http::Eof {token} { +proc http::Eof {token} { variable $token upvar 0 $token state if {[string equal $state(state) "header"]} { @@ -824,10 +850,10 @@ proc http::formatQuery {args} { set sep "" foreach i $args { append result $sep [mapReply $i] - if {[string compare $sep "="]} { - set sep = - } else { + if {[string equal $sep "="]} { set sep & + } else { + set sep = } } return $result @@ -843,8 +869,9 @@ proc http::formatQuery {args} { # Results: # The encoded string - proc http::mapReply {string} { +proc http::mapReply {string} { variable formMap + variable alphanumeric # The spec says: "non-alphanumeric characters are replaced by '%HH'" # 1 leave alphanumerics characters alone @@ -852,7 +879,6 @@ proc http::formatQuery {args} { # 3 Escape constructs that are "special" to the tcl parser # 4 "subst" the result, doing all the array substitutions - set alphanumeric a-zA-Z0-9 regsub -all \[^$alphanumeric\] $string {$formMap(&)} string regsub -all {[][{})\\]\)} $string {\\&} string return [subst -nocommand $string] @@ -867,14 +893,13 @@ proc http::formatQuery {args} { # Results: # The current proxy settings - proc http::ProxyRequired {host} { +proc http::ProxyRequired {host} { variable http if {[info exists http(-proxyhost)] && [string length $http(-proxyhost)]} { - if {![info exists http(-proxyport)] || ![string length $http(-proxyport)]} { + if {![info exists http(-proxyport)] || \ + ![string length $http(-proxyport)]} { set http(-proxyport) 8080 } return [list $http(-proxyhost) $http(-proxyport)] - } else { - return {} } } |