summaryrefslogtreecommitdiffstats
path: root/library/http/http.tcl
diff options
context:
space:
mode:
authorhobbs <hobbs>2001-09-07 00:03:08 (GMT)
committerhobbs <hobbs>2001-09-07 00:03:08 (GMT)
commit5fc06a7332a17e21c98691fdd8a3c9eaf82354ee (patch)
tree47368b22e97863dbd5d532cb02c96ce84b1a8df3 /library/http/http.tcl
parent56b85d53e89fe89f7ea4416c45677e8f4630bc4e (diff)
downloadtcl-5fc06a7332a17e21c98691fdd8a3c9eaf82354ee.zip
tcl-5fc06a7332a17e21c98691fdd8a3c9eaf82354ee.tar.gz
tcl-5fc06a7332a17e21c98691fdd8a3c9eaf82354ee.tar.bz2
* doc/http.n: noted -binary, charset and coding state keys.
* tests/http.test: * library/http/pkgIndex.tcl: * library/http/http.tcl (geturl): correctly get charset parameter and convert text according to specified encoding (if known). RFC iso8859-1 is used by default. Also recognize Content-encoding to see if we should do binary translation. Added a CYA -binary switch for the cases that were missed. [Bug #219211 #219399]
Diffstat (limited to 'library/http/http.tcl')
-rw-r--r--library/http/http.tcl87
1 files changed, 56 insertions, 31 deletions
diff --git a/library/http/http.tcl b/library/http/http.tcl
index fa5bd8f..314afd1 100644
--- a/library/http/http.tcl
+++ b/library/http/http.tcl
@@ -9,7 +9,7 @@
# See the file "license.terms" for information on usage and
# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: http.tcl,v 1.37 2001/08/21 01:09:13 hobbs Exp $
+# RCS: @(#) $Id: http.tcl,v 1.38 2001/09/07 00:03:08 hobbs Exp $
# Rough version history:
# 1.0 Old http_get interface
@@ -21,7 +21,8 @@
# "ioerror" status in favor of raising an error
package require Tcl 8.2
-package provide http 2.3.2
+# keep this in sink with pkgIndex.tcl
+package provide http 2.3.3
namespace eval http {
variable http
@@ -33,26 +34,29 @@ namespace eval http {
}
set http(-useragent) "Tcl http client package [package provide http]"
- variable formMap
- variable alphanumeric a-zA-Z0-9
- variable c
- variable i 0
- for {} {$i <= 256} {incr i} {
- set c [format %c $i]
- if {![string match \[$alphanumeric\] $c]} {
- set formMap($c) %[format %.2x $i]
+ proc init {} {
+ variable formMap
+ variable alphanumeric a-zA-Z0-9
+ for {set i 0} {$i <= 256} {incr i} {
+ set c [format %c $i]
+ if {![string match \[$alphanumeric\] $c]} {
+ set formMap($c) %[format %.2x $i]
+ }
}
+ # These are handled specially
+ array set formMap { " " + \n %0d%0a }
}
- # These are handled specially
- array set formMap {
- " " + \n %0d%0a
- }
+ init
variable urlTypes
array set urlTypes {
http {80 ::socket}
}
+ variable encodings [string tolower [encoding names]]
+ # This can be changed, but iso8859-1 is the RFC standard.
+ variable defaultCharset "iso8859-1"
+
namespace export geturl config reset wait formatQuery register unregister
# Useful, but not exported: data size status code
}
@@ -212,6 +216,7 @@ proc http::reset { token {why reset} } {
proc http::geturl { url args } {
variable http
variable urlTypes
+ variable defaultCharset
# Initialize the state variable, an array. We'll return the
# name of this array as the token for the transaction.
@@ -227,6 +232,7 @@ proc http::geturl { url args } {
# Process command options.
array set state {
+ -binary false
-blocksize 8192
-queryblocksize 8192
-validate 0
@@ -236,6 +242,7 @@ proc http::geturl { url args } {
-queryprogress {}
state header
meta {}
+ coding {}
currentsize 0
totalsize 0
querylength 0
@@ -245,7 +252,8 @@ proc http::geturl { url args } {
status ""
http ""
}
- set options {-blocksize -channel -command -handler -headers \
+ set state(charset) $defaultCharset
+ set options {-binary -blocksize -channel -command -handler -headers \
-progress -query -queryblocksize -querychannel -queryprogress\
-validate -timeout -type}
set usage [join $options ", "]
@@ -646,7 +654,7 @@ proc http::Write {token} {
# Side Effects
# Read the socket and handle callbacks.
- proc http::Event {token} {
+proc http::Event {token} {
variable $token
upvar 0 $token state
set s $state(sock)
@@ -659,15 +667,27 @@ proc http::Write {token} {
if {[catch {gets $s line} n]} {
Finish $token $n
} elseif {$n == 0} {
+ variable encodings
set state(state) body
- if {![regexp -nocase ^text $state(type)]} {
+ if {$state(-binary) || ![regexp -nocase ^text $state(type)] || \
+ [regexp gzip|compress $state(coding)]} {
# Turn off conversions for non-text data
fconfigure $s -translation binary
if {[info exists state(-channel)]} {
fconfigure $state(-channel) -translation binary
}
+ } else {
+ # If we are getting text, set the incoming channel's
+ # encoding correctly. iso8859-1 is the RFC default, but
+ # this could be any IANA charset. However, we only know
+ # how to convert what we have encodings for.
+ set idx [lsearch -exact $encodings \
+ [string tolower $state(charset)]]
+ if {$idx >= 0} {
+ fconfigure $s -encoding [lindex $encodings $idx]
+ }
}
- if {[info exists state(-channel)] &&
+ if {[info exists state(-channel)] && \
![info exists state(-handler)]} {
# Initiate a sequence of background fcopies
fileevent $s readable {}
@@ -676,10 +696,15 @@ proc http::Write {token} {
} elseif {$n > 0} {
if {[regexp -nocase {^content-type:(.+)$} $line x type]} {
set state(type) [string trim $type]
+ # grab the optional charset information
+ regexp -nocase {charset\s*=\s*(\S+)} $type x state(charset)
}
if {[regexp -nocase {^content-length:(.+)$} $line x length]} {
set state(totalsize) [string trim $length]
}
+ if {[regexp -nocase {^content-encoding:(.+)$} $line x coding]} {
+ set state(coding) [string trim $coding]
+ }
if {[regexp -nocase {^([^:]+):(.+)$} $line x key value]} {
lappend state(meta) $key [string trim $value]
} elseif {[regexp ^HTTP $line]} {
@@ -704,7 +729,8 @@ proc http::Write {token} {
Finish $token $err
} else {
if {[info exists state(-progress)]} {
- eval $state(-progress) {$token $state(totalsize) $state(currentsize)}
+ eval $state(-progress) \
+ {$token $state(totalsize) $state(currentsize)}
}
}
}
@@ -721,7 +747,7 @@ proc http::Write {token} {
# Side Effects
# This closes the connection upon error
- proc http::CopyStart {s token} {
+proc http::CopyStart {s token} {
variable $token
upvar 0 $token state
if {[catch {
@@ -743,7 +769,7 @@ proc http::Write {token} {
# Side Effects
# Invokes callbacks
- proc http::CopyDone {token count {error {}}} {
+proc http::CopyDone {token count {error {}}} {
variable $token
upvar 0 $token state
set s $state(sock)
@@ -771,7 +797,7 @@ proc http::Write {token} {
# Side Effects
# Clean up the socket
- proc http::Eof {token} {
+proc http::Eof {token} {
variable $token
upvar 0 $token state
if {[string equal $state(state) "header"]} {
@@ -824,10 +850,10 @@ proc http::formatQuery {args} {
set sep ""
foreach i $args {
append result $sep [mapReply $i]
- if {[string compare $sep "="]} {
- set sep =
- } else {
+ if {[string equal $sep "="]} {
set sep &
+ } else {
+ set sep =
}
}
return $result
@@ -843,8 +869,9 @@ proc http::formatQuery {args} {
# Results:
# The encoded string
- proc http::mapReply {string} {
+proc http::mapReply {string} {
variable formMap
+ variable alphanumeric
# The spec says: "non-alphanumeric characters are replaced by '%HH'"
# 1 leave alphanumerics characters alone
@@ -852,7 +879,6 @@ proc http::formatQuery {args} {
# 3 Escape constructs that are "special" to the tcl parser
# 4 "subst" the result, doing all the array substitutions
- set alphanumeric a-zA-Z0-9
regsub -all \[^$alphanumeric\] $string {$formMap(&)} string
regsub -all {[][{})\\]\)} $string {\\&} string
return [subst -nocommand $string]
@@ -867,14 +893,13 @@ proc http::formatQuery {args} {
# Results:
# The current proxy settings
- proc http::ProxyRequired {host} {
+proc http::ProxyRequired {host} {
variable http
if {[info exists http(-proxyhost)] && [string length $http(-proxyhost)]} {
- if {![info exists http(-proxyport)] || ![string length $http(-proxyport)]} {
+ if {![info exists http(-proxyport)] || \
+ ![string length $http(-proxyport)]} {
set http(-proxyport) 8080
}
return [list $http(-proxyhost) $http(-proxyport)]
- } else {
- return {}
}
}