summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorhobbs <hobbs>2001-09-07 00:03:08 (GMT)
committerhobbs <hobbs>2001-09-07 00:03:08 (GMT)
commit5fc06a7332a17e21c98691fdd8a3c9eaf82354ee (patch)
tree47368b22e97863dbd5d532cb02c96ce84b1a8df3
parent56b85d53e89fe89f7ea4416c45677e8f4630bc4e (diff)
downloadtcl-5fc06a7332a17e21c98691fdd8a3c9eaf82354ee.zip
tcl-5fc06a7332a17e21c98691fdd8a3c9eaf82354ee.tar.gz
tcl-5fc06a7332a17e21c98691fdd8a3c9eaf82354ee.tar.bz2
* doc/http.n: noted -binary, charset and coding state keys.
* tests/http.test: * library/http/pkgIndex.tcl: * library/http/http.tcl (geturl): correctly get charset parameter and convert text according to specified encoding (if known). RFC iso8859-1 is used by default. Also recognize Content-encoding to see if we should do binary translation. Added a CYA -binary switch for the cases that were missed. [Bug #219211 #219399]
-rw-r--r--doc/http.n28
-rw-r--r--library/http/http.tcl87
-rw-r--r--library/http/pkgIndex.tcl2
-rw-r--r--tests/http.test16
4 files changed, 88 insertions, 45 deletions
diff --git a/doc/http.n b/doc/http.n
index 4f3e8ab..07f3e46 100644
--- a/doc/http.n
+++ b/doc/http.n
@@ -5,7 +5,7 @@
'\" See the file "license.terms" for information on usage and redistribution
'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
'\"
-'\" RCS: @(#) $Id: http.n,v 1.12 2000/09/07 14:27:48 poenitz Exp $
+'\" RCS: @(#) $Id: http.n,v 1.13 2001/09/07 00:03:08 hobbs Exp $
'\"
.so man.macros
.TH "Http" n 8.3 Tcl "Tcl Built-In Commands"
@@ -14,7 +14,7 @@
.SH NAME
Http \- Client-side implementation of the HTTP/1.0 protocol.
.SH SYNOPSIS
-\fBpackage require http ?2.3?\fP
+\fBpackage require http ?2.3.3?\fP
.sp
\fB::http::config \fI?options?\fR
.sp
@@ -114,7 +114,7 @@ is \fB"Tcl http client package 2.2."\fR
.RE
.TP
\fB::http::geturl\fP \fIurl\fP ?\fIoptions\fP?
-The \fB::http::geturl \fR command is the main procedure in the package.
+The \fB::http::geturl\fR command is the main procedure in the package.
The \fB\-query\fR option causes a POST operation and
the \fB\-validate\fR option causes a HEAD operation;
otherwise, a GET operation is performed. The \fB::http::geturl\fR command
@@ -126,13 +126,16 @@ that is invoked when the HTTP transaction completes.
\fB::http::geturl\fR takes several options:
.RS
.TP
+\fB\-binary\fP \fIboolean\fP
+Specifies whether to force interpreting the url data as binary. Normally
+this is auto-detected (anything not beginning with a \fBtext\fR content
+type or whose content encoding is \fBgzip\fR or \fBcompress\fR is
+considered binary data).
+.TP
\fB\-blocksize\fP \fIsize\fP
The blocksize used when reading the URL.
-At most
-\fIsize\fR
-bytes are read at once. After each block, a call to the
-\fB\-progress\fR
-callback is made (if that option is specified).
+At most \fIsize\fR bytes are read at once. After each block, a call to the
+\fB\-progress\fR callback is made (if that option is specified).
.TP
\fB\-channel\fP \fIname\fP
Copy the URL contents to channel \fIname\fR instead of saving it in
@@ -407,6 +410,15 @@ the array are supported:
The contents of the URL. This will be empty if the \fB\-channel\fR
option has been specified. This value is returned by the \fB::http::data\fP command.
.TP
+\fBcharset\fR
+The value of the charset attribute from the \fBContent-Type\fR meta-data
+value. If none was specified, this defaults to the RFC standard
+\fBiso8859-1\fR, or the value of \fB$::http::defaultCharset\fR. Incoming
+text data will be automatically converted from this charset to utf-8.
+.TP
+\fBcoding\fR
+A copy of the \fBContent-Encoding\fR meta-data value.
+.TP
\fBcurrentsize\fR
The current number of bytes fetched from the URL.
This value is returned by the \fB::http::size\fP command.
diff --git a/library/http/http.tcl b/library/http/http.tcl
index fa5bd8f..314afd1 100644
--- a/library/http/http.tcl
+++ b/library/http/http.tcl
@@ -9,7 +9,7 @@
# See the file "license.terms" for information on usage and
# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: http.tcl,v 1.37 2001/08/21 01:09:13 hobbs Exp $
+# RCS: @(#) $Id: http.tcl,v 1.38 2001/09/07 00:03:08 hobbs Exp $
# Rough version history:
# 1.0 Old http_get interface
@@ -21,7 +21,8 @@
# "ioerror" status in favor of raising an error
package require Tcl 8.2
-package provide http 2.3.2
+# keep this in sink with pkgIndex.tcl
+package provide http 2.3.3
namespace eval http {
variable http
@@ -33,26 +34,29 @@ namespace eval http {
}
set http(-useragent) "Tcl http client package [package provide http]"
- variable formMap
- variable alphanumeric a-zA-Z0-9
- variable c
- variable i 0
- for {} {$i <= 256} {incr i} {
- set c [format %c $i]
- if {![string match \[$alphanumeric\] $c]} {
- set formMap($c) %[format %.2x $i]
+ proc init {} {
+ variable formMap
+ variable alphanumeric a-zA-Z0-9
+ for {set i 0} {$i <= 256} {incr i} {
+ set c [format %c $i]
+ if {![string match \[$alphanumeric\] $c]} {
+ set formMap($c) %[format %.2x $i]
+ }
}
+ # These are handled specially
+ array set formMap { " " + \n %0d%0a }
}
- # These are handled specially
- array set formMap {
- " " + \n %0d%0a
- }
+ init
variable urlTypes
array set urlTypes {
http {80 ::socket}
}
+ variable encodings [string tolower [encoding names]]
+ # This can be changed, but iso8859-1 is the RFC standard.
+ variable defaultCharset "iso8859-1"
+
namespace export geturl config reset wait formatQuery register unregister
# Useful, but not exported: data size status code
}
@@ -212,6 +216,7 @@ proc http::reset { token {why reset} } {
proc http::geturl { url args } {
variable http
variable urlTypes
+ variable defaultCharset
# Initialize the state variable, an array. We'll return the
# name of this array as the token for the transaction.
@@ -227,6 +232,7 @@ proc http::geturl { url args } {
# Process command options.
array set state {
+ -binary false
-blocksize 8192
-queryblocksize 8192
-validate 0
@@ -236,6 +242,7 @@ proc http::geturl { url args } {
-queryprogress {}
state header
meta {}
+ coding {}
currentsize 0
totalsize 0
querylength 0
@@ -245,7 +252,8 @@ proc http::geturl { url args } {
status ""
http ""
}
- set options {-blocksize -channel -command -handler -headers \
+ set state(charset) $defaultCharset
+ set options {-binary -blocksize -channel -command -handler -headers \
-progress -query -queryblocksize -querychannel -queryprogress\
-validate -timeout -type}
set usage [join $options ", "]
@@ -646,7 +654,7 @@ proc http::Write {token} {
# Side Effects
# Read the socket and handle callbacks.
- proc http::Event {token} {
+proc http::Event {token} {
variable $token
upvar 0 $token state
set s $state(sock)
@@ -659,15 +667,27 @@ proc http::Write {token} {
if {[catch {gets $s line} n]} {
Finish $token $n
} elseif {$n == 0} {
+ variable encodings
set state(state) body
- if {![regexp -nocase ^text $state(type)]} {
+ if {$state(-binary) || ![regexp -nocase ^text $state(type)] || \
+ [regexp gzip|compress $state(coding)]} {
# Turn off conversions for non-text data
fconfigure $s -translation binary
if {[info exists state(-channel)]} {
fconfigure $state(-channel) -translation binary
}
+ } else {
+ # If we are getting text, set the incoming channel's
+ # encoding correctly. iso8859-1 is the RFC default, but
+ # this could be any IANA charset. However, we only know
+ # how to convert what we have encodings for.
+ set idx [lsearch -exact $encodings \
+ [string tolower $state(charset)]]
+ if {$idx >= 0} {
+ fconfigure $s -encoding [lindex $encodings $idx]
+ }
}
- if {[info exists state(-channel)] &&
+ if {[info exists state(-channel)] && \
![info exists state(-handler)]} {
# Initiate a sequence of background fcopies
fileevent $s readable {}
@@ -676,10 +696,15 @@ proc http::Write {token} {
} elseif {$n > 0} {
if {[regexp -nocase {^content-type:(.+)$} $line x type]} {
set state(type) [string trim $type]
+ # grab the optional charset information
+ regexp -nocase {charset\s*=\s*(\S+)} $type x state(charset)
}
if {[regexp -nocase {^content-length:(.+)$} $line x length]} {
set state(totalsize) [string trim $length]
}
+ if {[regexp -nocase {^content-encoding:(.+)$} $line x coding]} {
+ set state(coding) [string trim $coding]
+ }
if {[regexp -nocase {^([^:]+):(.+)$} $line x key value]} {
lappend state(meta) $key [string trim $value]
} elseif {[regexp ^HTTP $line]} {
@@ -704,7 +729,8 @@ proc http::Write {token} {
Finish $token $err
} else {
if {[info exists state(-progress)]} {
- eval $state(-progress) {$token $state(totalsize) $state(currentsize)}
+ eval $state(-progress) \
+ {$token $state(totalsize) $state(currentsize)}
}
}
}
@@ -721,7 +747,7 @@ proc http::Write {token} {
# Side Effects
# This closes the connection upon error
- proc http::CopyStart {s token} {
+proc http::CopyStart {s token} {
variable $token
upvar 0 $token state
if {[catch {
@@ -743,7 +769,7 @@ proc http::Write {token} {
# Side Effects
# Invokes callbacks
- proc http::CopyDone {token count {error {}}} {
+proc http::CopyDone {token count {error {}}} {
variable $token
upvar 0 $token state
set s $state(sock)
@@ -771,7 +797,7 @@ proc http::Write {token} {
# Side Effects
# Clean up the socket
- proc http::Eof {token} {
+proc http::Eof {token} {
variable $token
upvar 0 $token state
if {[string equal $state(state) "header"]} {
@@ -824,10 +850,10 @@ proc http::formatQuery {args} {
set sep ""
foreach i $args {
append result $sep [mapReply $i]
- if {[string compare $sep "="]} {
- set sep =
- } else {
+ if {[string equal $sep "="]} {
set sep &
+ } else {
+ set sep =
}
}
return $result
@@ -843,8 +869,9 @@ proc http::formatQuery {args} {
# Results:
# The encoded string
- proc http::mapReply {string} {
+proc http::mapReply {string} {
variable formMap
+ variable alphanumeric
# The spec says: "non-alphanumeric characters are replaced by '%HH'"
# 1 leave alphanumerics characters alone
@@ -852,7 +879,6 @@ proc http::formatQuery {args} {
# 3 Escape constructs that are "special" to the tcl parser
# 4 "subst" the result, doing all the array substitutions
- set alphanumeric a-zA-Z0-9
regsub -all \[^$alphanumeric\] $string {$formMap(&)} string
regsub -all {[][{})\\]\)} $string {\\&} string
return [subst -nocommand $string]
@@ -867,14 +893,13 @@ proc http::formatQuery {args} {
# Results:
# The current proxy settings
- proc http::ProxyRequired {host} {
+proc http::ProxyRequired {host} {
variable http
if {[info exists http(-proxyhost)] && [string length $http(-proxyhost)]} {
- if {![info exists http(-proxyport)] || ![string length $http(-proxyport)]} {
+ if {![info exists http(-proxyport)] || \
+ ![string length $http(-proxyport)]} {
set http(-proxyport) 8080
}
return [list $http(-proxyhost) $http(-proxyport)]
- } else {
- return {}
}
}
diff --git a/library/http/pkgIndex.tcl b/library/http/pkgIndex.tcl
index 4186a43..3c5d514 100644
--- a/library/http/pkgIndex.tcl
+++ b/library/http/pkgIndex.tcl
@@ -9,4 +9,4 @@
# full path name of this file's directory.
if {![package vsatisfies [package provide Tcl] 8.2]} {return}
-package ifneeded http 2.3.2 [list tclPkgSetup $dir http 2.3.2 {{http.tcl source {::http::config ::http::formatQuery ::http::geturl ::http::reset ::http::wait ::http::register ::http::unregister}}}]
+package ifneeded http 2.3.3 [list tclPkgSetup $dir http 2.3.3 {{http.tcl source {::http::config ::http::formatQuery ::http::geturl ::http::reset ::http::wait ::http::register ::http::unregister}}}]
diff --git a/tests/http.test b/tests/http.test
index 70ca2f6..c0f7821 100644
--- a/tests/http.test
+++ b/tests/http.test
@@ -12,7 +12,7 @@
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
#
-# RCS: @(#) $Id: http.test,v 1.23 2001/08/07 00:42:30 hobbs Exp $
+# RCS: @(#) $Id: http.test,v 1.24 2001/09/07 00:03:08 hobbs Exp $
if {[lsearch [namespace children] ::tcltest] == -1} {
package require tcltest
@@ -99,16 +99,18 @@ test http-1.4 {http::config} {
} {-accept */* -proxyfilter myFilter -proxyhost nowhere.come -proxyport 8080 -useragent {Tcl Test Suite}}
test http-1.5 {http::config} {
- catch {http::config -proxyhost {} -junk 8080}
-} 1
+ list [catch {http::config -proxyhost {} -junk 8080} msg] $msg
+} {1 {Unknown option -junk, must be: -accept, -proxyfilter, -proxyhost, -proxyport, -useragent}}
+
test http-2.1 {http::reset} {
catch {http::reset http#1}
} 0
test http-3.1 {http::geturl} {
- catch {http::geturl -bogus flag}
-} 1
+ list [catch {http::geturl -bogus flag} msg] $msg
+} {1 {Unknown option flag, can be: -binary, -blocksize, -channel, -command, -handler, -headers, -progress, -query, -queryblocksize, -querychannel, -queryprogress, -validate, -timeout, -type}}
+
test http-3.2 {http::geturl} {
catch {http::geturl http:junk} err
set err
@@ -464,6 +466,10 @@ test http-6.1 {http::ProxyRequired} {
<h2>GET http://$url</h2>
</body></html>"
+test http-7.1 {http::mapReply} {
+ http::mapReply "abc\$\[\]\"\\()\}\{"
+} {abc%24%5b%5d%22%5c%28%29%7d%7b}
+
# cleanup
catch {unset url}
catch {unset badurl}