From 5fc06a7332a17e21c98691fdd8a3c9eaf82354ee Mon Sep 17 00:00:00 2001 From: hobbs Date: Fri, 7 Sep 2001 00:03:08 +0000 Subject: * doc/http.n: noted -binary, charset and coding state keys. * tests/http.test: * library/http/pkgIndex.tcl: * library/http/http.tcl (geturl): correctly get charset parameter and convert text according to specified encoding (if known). RFC iso8859-1 is used by default. Also recognize Content-encoding to see if we should do binary translation. Added a CYA -binary switch for the cases that were missed. [Bug #219211 #219399] --- doc/http.n | 28 ++++++++++----- library/http/http.tcl | 87 ++++++++++++++++++++++++++++++----------------- library/http/pkgIndex.tcl | 2 +- tests/http.test | 16 ++++++--- 4 files changed, 88 insertions(+), 45 deletions(-) diff --git a/doc/http.n b/doc/http.n index 4f3e8ab..07f3e46 100644 --- a/doc/http.n +++ b/doc/http.n @@ -5,7 +5,7 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" RCS: @(#) $Id: http.n,v 1.12 2000/09/07 14:27:48 poenitz Exp $ +'\" RCS: @(#) $Id: http.n,v 1.13 2001/09/07 00:03:08 hobbs Exp $ '\" .so man.macros .TH "Http" n 8.3 Tcl "Tcl Built-In Commands" @@ -14,7 +14,7 @@ .SH NAME Http \- Client-side implementation of the HTTP/1.0 protocol. .SH SYNOPSIS -\fBpackage require http ?2.3?\fP +\fBpackage require http ?2.3.3?\fP .sp \fB::http::config \fI?options?\fR .sp @@ -114,7 +114,7 @@ is \fB"Tcl http client package 2.2."\fR .RE .TP \fB::http::geturl\fP \fIurl\fP ?\fIoptions\fP? -The \fB::http::geturl \fR command is the main procedure in the package. +The \fB::http::geturl\fR command is the main procedure in the package. The \fB\-query\fR option causes a POST operation and the \fB\-validate\fR option causes a HEAD operation; otherwise, a GET operation is performed. The \fB::http::geturl\fR command @@ -126,13 +126,16 @@ that is invoked when the HTTP transaction completes. \fB::http::geturl\fR takes several options: .RS .TP +\fB\-binary\fP \fIboolean\fP +Specifies whether to force interpreting the url data as binary. Normally +this is auto-detected (anything not beginning with a \fBtext\fR content +type or whose content encoding is \fBgzip\fR or \fBcompress\fR is +considered binary data). +.TP \fB\-blocksize\fP \fIsize\fP The blocksize used when reading the URL. -At most -\fIsize\fR -bytes are read at once. After each block, a call to the -\fB\-progress\fR -callback is made (if that option is specified). +At most \fIsize\fR bytes are read at once. After each block, a call to the +\fB\-progress\fR callback is made (if that option is specified). .TP \fB\-channel\fP \fIname\fP Copy the URL contents to channel \fIname\fR instead of saving it in @@ -407,6 +410,15 @@ the array are supported: The contents of the URL. This will be empty if the \fB\-channel\fR option has been specified. This value is returned by the \fB::http::data\fP command. .TP +\fBcharset\fR +The value of the charset attribute from the \fBContent-Type\fR meta-data +value. If none was specified, this defaults to the RFC standard +\fBiso8859-1\fR, or the value of \fB$::http::defaultCharset\fR. Incoming +text data will be automatically converted from this charset to utf-8. +.TP +\fBcoding\fR +A copy of the \fBContent-Encoding\fR meta-data value. +.TP \fBcurrentsize\fR The current number of bytes fetched from the URL. This value is returned by the \fB::http::size\fP command. diff --git a/library/http/http.tcl b/library/http/http.tcl index fa5bd8f..314afd1 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -9,7 +9,7 @@ # See the file "license.terms" for information on usage and # redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: http.tcl,v 1.37 2001/08/21 01:09:13 hobbs Exp $ +# RCS: @(#) $Id: http.tcl,v 1.38 2001/09/07 00:03:08 hobbs Exp $ # Rough version history: # 1.0 Old http_get interface @@ -21,7 +21,8 @@ # "ioerror" status in favor of raising an error package require Tcl 8.2 -package provide http 2.3.2 +# keep this in sink with pkgIndex.tcl +package provide http 2.3.3 namespace eval http { variable http @@ -33,26 +34,29 @@ namespace eval http { } set http(-useragent) "Tcl http client package [package provide http]" - variable formMap - variable alphanumeric a-zA-Z0-9 - variable c - variable i 0 - for {} {$i <= 256} {incr i} { - set c [format %c $i] - if {![string match \[$alphanumeric\] $c]} { - set formMap($c) %[format %.2x $i] + proc init {} { + variable formMap + variable alphanumeric a-zA-Z0-9 + for {set i 0} {$i <= 256} {incr i} { + set c [format %c $i] + if {![string match \[$alphanumeric\] $c]} { + set formMap($c) %[format %.2x $i] + } } + # These are handled specially + array set formMap { " " + \n %0d%0a } } - # These are handled specially - array set formMap { - " " + \n %0d%0a - } + init variable urlTypes array set urlTypes { http {80 ::socket} } + variable encodings [string tolower [encoding names]] + # This can be changed, but iso8859-1 is the RFC standard. + variable defaultCharset "iso8859-1" + namespace export geturl config reset wait formatQuery register unregister # Useful, but not exported: data size status code } @@ -212,6 +216,7 @@ proc http::reset { token {why reset} } { proc http::geturl { url args } { variable http variable urlTypes + variable defaultCharset # Initialize the state variable, an array. We'll return the # name of this array as the token for the transaction. @@ -227,6 +232,7 @@ proc http::geturl { url args } { # Process command options. array set state { + -binary false -blocksize 8192 -queryblocksize 8192 -validate 0 @@ -236,6 +242,7 @@ proc http::geturl { url args } { -queryprogress {} state header meta {} + coding {} currentsize 0 totalsize 0 querylength 0 @@ -245,7 +252,8 @@ proc http::geturl { url args } { status "" http "" } - set options {-blocksize -channel -command -handler -headers \ + set state(charset) $defaultCharset + set options {-binary -blocksize -channel -command -handler -headers \ -progress -query -queryblocksize -querychannel -queryprogress\ -validate -timeout -type} set usage [join $options ", "] @@ -646,7 +654,7 @@ proc http::Write {token} { # Side Effects # Read the socket and handle callbacks. - proc http::Event {token} { +proc http::Event {token} { variable $token upvar 0 $token state set s $state(sock) @@ -659,15 +667,27 @@ proc http::Write {token} { if {[catch {gets $s line} n]} { Finish $token $n } elseif {$n == 0} { + variable encodings set state(state) body - if {![regexp -nocase ^text $state(type)]} { + if {$state(-binary) || ![regexp -nocase ^text $state(type)] || \ + [regexp gzip|compress $state(coding)]} { # Turn off conversions for non-text data fconfigure $s -translation binary if {[info exists state(-channel)]} { fconfigure $state(-channel) -translation binary } + } else { + # If we are getting text, set the incoming channel's + # encoding correctly. iso8859-1 is the RFC default, but + # this could be any IANA charset. However, we only know + # how to convert what we have encodings for. + set idx [lsearch -exact $encodings \ + [string tolower $state(charset)]] + if {$idx >= 0} { + fconfigure $s -encoding [lindex $encodings $idx] + } } - if {[info exists state(-channel)] && + if {[info exists state(-channel)] && \ ![info exists state(-handler)]} { # Initiate a sequence of background fcopies fileevent $s readable {} @@ -676,10 +696,15 @@ proc http::Write {token} { } elseif {$n > 0} { if {[regexp -nocase {^content-type:(.+)$} $line x type]} { set state(type) [string trim $type] + # grab the optional charset information + regexp -nocase {charset\s*=\s*(\S+)} $type x state(charset) } if {[regexp -nocase {^content-length:(.+)$} $line x length]} { set state(totalsize) [string trim $length] } + if {[regexp -nocase {^content-encoding:(.+)$} $line x coding]} { + set state(coding) [string trim $coding] + } if {[regexp -nocase {^([^:]+):(.+)$} $line x key value]} { lappend state(meta) $key [string trim $value] } elseif {[regexp ^HTTP $line]} { @@ -704,7 +729,8 @@ proc http::Write {token} { Finish $token $err } else { if {[info exists state(-progress)]} { - eval $state(-progress) {$token $state(totalsize) $state(currentsize)} + eval $state(-progress) \ + {$token $state(totalsize) $state(currentsize)} } } } @@ -721,7 +747,7 @@ proc http::Write {token} { # Side Effects # This closes the connection upon error - proc http::CopyStart {s token} { +proc http::CopyStart {s token} { variable $token upvar 0 $token state if {[catch { @@ -743,7 +769,7 @@ proc http::Write {token} { # Side Effects # Invokes callbacks - proc http::CopyDone {token count {error {}}} { +proc http::CopyDone {token count {error {}}} { variable $token upvar 0 $token state set s $state(sock) @@ -771,7 +797,7 @@ proc http::Write {token} { # Side Effects # Clean up the socket - proc http::Eof {token} { +proc http::Eof {token} { variable $token upvar 0 $token state if {[string equal $state(state) "header"]} { @@ -824,10 +850,10 @@ proc http::formatQuery {args} { set sep "" foreach i $args { append result $sep [mapReply $i] - if {[string compare $sep "="]} { - set sep = - } else { + if {[string equal $sep "="]} { set sep & + } else { + set sep = } } return $result @@ -843,8 +869,9 @@ proc http::formatQuery {args} { # Results: # The encoded string - proc http::mapReply {string} { +proc http::mapReply {string} { variable formMap + variable alphanumeric # The spec says: "non-alphanumeric characters are replaced by '%HH'" # 1 leave alphanumerics characters alone @@ -852,7 +879,6 @@ proc http::formatQuery {args} { # 3 Escape constructs that are "special" to the tcl parser # 4 "subst" the result, doing all the array substitutions - set alphanumeric a-zA-Z0-9 regsub -all \[^$alphanumeric\] $string {$formMap(&)} string regsub -all {[][{})\\]\)} $string {\\&} string return [subst -nocommand $string] @@ -867,14 +893,13 @@ proc http::formatQuery {args} { # Results: # The current proxy settings - proc http::ProxyRequired {host} { +proc http::ProxyRequired {host} { variable http if {[info exists http(-proxyhost)] && [string length $http(-proxyhost)]} { - if {![info exists http(-proxyport)] || ![string length $http(-proxyport)]} { + if {![info exists http(-proxyport)] || \ + ![string length $http(-proxyport)]} { set http(-proxyport) 8080 } return [list $http(-proxyhost) $http(-proxyport)] - } else { - return {} } } diff --git a/library/http/pkgIndex.tcl b/library/http/pkgIndex.tcl index 4186a43..3c5d514 100644 --- a/library/http/pkgIndex.tcl +++ b/library/http/pkgIndex.tcl @@ -9,4 +9,4 @@ # full path name of this file's directory. if {![package vsatisfies [package provide Tcl] 8.2]} {return} -package ifneeded http 2.3.2 [list tclPkgSetup $dir http 2.3.2 {{http.tcl source {::http::config ::http::formatQuery ::http::geturl ::http::reset ::http::wait ::http::register ::http::unregister}}}] +package ifneeded http 2.3.3 [list tclPkgSetup $dir http 2.3.3 {{http.tcl source {::http::config ::http::formatQuery ::http::geturl ::http::reset ::http::wait ::http::register ::http::unregister}}}] diff --git a/tests/http.test b/tests/http.test index 70ca2f6..c0f7821 100644 --- a/tests/http.test +++ b/tests/http.test @@ -12,7 +12,7 @@ # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # # -# RCS: @(#) $Id: http.test,v 1.23 2001/08/07 00:42:30 hobbs Exp $ +# RCS: @(#) $Id: http.test,v 1.24 2001/09/07 00:03:08 hobbs Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest @@ -99,16 +99,18 @@ test http-1.4 {http::config} { } {-accept */* -proxyfilter myFilter -proxyhost nowhere.come -proxyport 8080 -useragent {Tcl Test Suite}} test http-1.5 {http::config} { - catch {http::config -proxyhost {} -junk 8080} -} 1 + list [catch {http::config -proxyhost {} -junk 8080} msg] $msg +} {1 {Unknown option -junk, must be: -accept, -proxyfilter, -proxyhost, -proxyport, -useragent}} + test http-2.1 {http::reset} { catch {http::reset http#1} } 0 test http-3.1 {http::geturl} { - catch {http::geturl -bogus flag} -} 1 + list [catch {http::geturl -bogus flag} msg] $msg +} {1 {Unknown option flag, can be: -binary, -blocksize, -channel, -command, -handler, -headers, -progress, -query, -queryblocksize, -querychannel, -queryprogress, -validate, -timeout, -type}} + test http-3.2 {http::geturl} { catch {http::geturl http:junk} err set err @@ -464,6 +466,10 @@ test http-6.1 {http::ProxyRequired} {

GET http://$url

" +test http-7.1 {http::mapReply} { + http::mapReply "abc\$\[\]\"\\()\}\{" +} {abc%24%5b%5d%22%5c%28%29%7d%7b} + # cleanup catch {unset url} catch {unset badurl} -- cgit v0.12