From 05c97af25931eb72c7feb658c8b1d50d5056361a Mon Sep 17 00:00:00 2001 From: kjnash Date: Thu, 8 Sep 2022 17:51:56 +0000 Subject: Add command http::reason to return the recommended reason phrase for a HTTP server status code (bug [1214322]). --- doc/http.n | 49 ++++++++++++++++++++++++--- library/http/http.tcl | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+), 4 deletions(-) diff --git a/doc/http.n b/doc/http.n index 2c9f809..f0018e3 100644 --- a/doc/http.n +++ b/doc/http.n @@ -36,6 +36,8 @@ http \- Client-side implementation of the HTTP/1.1 protocol .sp \fB::http::ncode \fItoken\fR .sp +\fB::http::reason \fIcode\fR +.sp \fB::http::meta \fItoken\fR .sp \fB::http::data \fItoken\fR @@ -492,13 +494,52 @@ the state array. .TP \fB::http::code\fR \fItoken\fR . -This is a convenience procedure that returns the \fBhttp\fR element of the -state array. +This command returns the "status line" of the server response (which is stored +as element \fBhttp\fR of the state array). +The "status line" is the first line of a HTTP server response, and has three +elements separated by spaces: the HTTP version, a three-digit numerical +"status code", and a "reason phrase". Only the reason phrase may contain +spaces. Examples: +.PP +.DS +.RS +HTTP/1.1 200 OK +HTTP/1.0 404 Not Found +.RE +.DE +.PP +.RS +The "reason phrase" for a given status code may vary from server to server, +and can be changed without affecting the HTTP protocol. The recommended +values (RFC 7231 and IANA assignments) for each code are provided by the +command \fB::http::reason\fR. +.RE .TP \fB::http::ncode\fR \fItoken\fR . -This is a convenience procedure that returns just the numeric return -code (200, 404, etc.) from the \fBhttp\fR element of the state array. +This command returns the "status code" (200, 404, etc.) of the server response. +The full status line can be obtained with command \fB::http::code\fR. +.TP +\fB::http::reason\fR \fIcode\fR +. +This command returns the IANA recommended "reason phrase" for a particular +"status code" returned by a HTTP server. The argument \fIcode\fR is a valid +status code, and therefore is an integer in the range 100 to 599 inclusive. +For numbers in this range with no assigned meaning, the command returns the +value "Unassigned". Several status codes are used only in response to the +methods defined by HTTP extensions such as WebDAV, and not in response to a +HEAD, GET, or POST request method. +.PP +.RS +The "reason phrase" returned by a HTTP server may differ from the recommended +value, without affecting the HTTP protocol. The value returned by +\fB::http::geturl\fR can be obtained by calling either command +\fB::http::code\fR (which returns the full status line) or command +\fB::http::ncode\fR (for the status code only). +.PP +A registry of valid status codes is maintained at +https://www.iana.org/assignments/http-status-codes/http-status-codes.xhtml +.RE .TP \fB::http::size\fR \fItoken\fR . diff --git a/library/http/http.tcl b/library/http/http.tcl index 38e07cc..fe4d302 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -151,6 +151,72 @@ namespace eval http { variable TmpSockCounter 0 variable ThreadCounter 0 + variable reasonDict [dict create {*}{ + 100 Continue + 101 {Switching Protocols} + 102 Processing + 103 {Early Hints} + 200 OK + 201 Created + 202 Accepted + 203 {Non-Authoritative Information} + 204 {No Content} + 205 {Reset Content} + 206 {Partial Content} + 207 Multi-Status + 208 {Already Reported} + 226 {IM Used} + 300 {Multiple Choices} + 301 {Moved Permanently} + 302 Found + 303 {See Other} + 304 {Not Modified} + 305 {Use Proxy} + 306 (Unused) + 307 {Temporary Redirect} + 308 {Permanent Redirect} + 400 {Bad Request} + 401 Unauthorized + 402 {Payment Required} + 403 Forbidden + 404 {Not Found} + 405 {Method Not Allowed} + 406 {Not Acceptable} + 407 {Proxy Authentication Required} + 408 {Request Timeout} + 409 Conflict + 410 Gone + 411 {Length Required} + 412 {Precondition Failed} + 413 {Content Too Large} + 414 {URI Too Long} + 415 {Unsupported Media Type} + 416 {Range Not Satisfiable} + 417 {Expectation Failed} + 418 (Unused) + 421 {Misdirected Request} + 422 {Unprocessable Content} + 423 Locked + 424 {Failed Dependency} + 425 {Too Early} + 426 {Upgrade Required} + 428 {Precondition Required} + 429 {Too Many Requests} + 431 {Request Header Fields Too Large} + 451 {Unavailable For Legal Reasons} + 500 {Internal Server Error} + 501 {Not Implemented} + 502 {Bad Gateway} + 503 {Service Unavailable} + 504 {Gateway Timeout} + 505 {HTTP Version Not Supported} + 506 {Variant Also Negotiates} + 507 {Insufficient Storage} + 508 {Loop Detected} + 510 {Not Extended (OBSOLETED)} + 511 {Network Authentication Required} + }] + namespace export geturl config reset wait formatQuery quoteString namespace export register unregister registerError # - Useful, but not exported: data, size, status, code, cleanup, error, @@ -251,6 +317,33 @@ proc http::config {args} { } } +# ------------------------------------------------------------------------------ +# Proc http::reason +# ------------------------------------------------------------------------------ +# Command to return the IANA-recommended "reason phrase" for a HTTP Status Code. +# Information obtained from: +# https://www.iana.org/assignments/http-status-codes/http-status-codes.xhtml +# +# Arguments: +# code - A valid HTTP Status Code (integer from 100 to 599) +# +# Return Value: the reason phrase +# ------------------------------------------------------------------------------ + +proc http::reason {code} { + variable reasonDict + if {![regexp -- {^[1-5][0-9][0-9]$} $code]} { + set msg {argument must be a three-digit integer from 100 to 599} + return -code error $msg + } + if {[dict exists $reasonDict $code]} { + set reason [dict get $reasonDict $code] + } else { + set reason Unassigned + } + return $reason +} + # http::Finish -- # # Clean up the socket and eval close time callbacks -- cgit v0.12 From 1a8f9e37f48db662f43a0d1169c729f2c6ce3eb9 Mon Sep 17 00:00:00 2001 From: kjnash Date: Thu, 8 Sep 2022 21:16:57 +0000 Subject: Fix bug [2841176]. Evaluate all callbacks (and urlTypes commands) in global namespace. --- library/http/http.tcl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/library/http/http.tcl b/library/http/http.tcl index fe4d302..67f0309 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -461,7 +461,7 @@ proc http::Finish {token {errormsg ""} {skipCB 0}} { if {[info exists state(-command)] && (!$skipCB) && (![info exists state(done-command-cb)])} { set state(done-command-cb) yes - if {[catch {eval $state(-command) {$token}} err] && $errormsg eq ""} { + if {[catch {namespace eval :: $state(-command) $token} err] && $errormsg eq ""} { set state(error) [list $err $errorInfo $errorCode] set state(status) error } @@ -1642,7 +1642,7 @@ proc http::OpenSocket {token DoLater} { set pre [clock milliseconds] ##Log pre socket opened, - token $token ##Log $state(openCmd) - token $token - if {[catch {eval $state(openCmd)} sock errdict]} { + if {[catch {namespace eval :: $state(openCmd)} sock errdict]} { # ERROR CASE # Something went wrong while trying to establish the connection. # Tidy up after events and such, but DON'T call the command @@ -3159,7 +3159,7 @@ proc http::Write {token} { # Callback to the client after we've completely handled everything. if {[string length $state(-queryprogress)]} { - eval $state(-queryprogress) \ + namespace eval :: $state(-queryprogress) \ [list $token $state(querylength) $state(queryoffset)] } return @@ -3493,7 +3493,7 @@ proc http::Event {sock token} { ##Log body - token $token if {[catch { if {[info exists state(-handler)]} { - set n [eval $state(-handler) [list $sock $token]] + set n [namespace eval :: $state(-handler) [list $sock $token]] ##Log handler $n - token $token # N.B. the protocol has been set to 1.0 because the -handler # logic is not expected to handle chunked encoding. @@ -3658,7 +3658,7 @@ proc http::Event {sock token} { return } else { if {[info exists state(-progress)]} { - eval $state(-progress) \ + namespace eval :: $state(-progress) \ [list $token $state(totalsize) $state(currentsize)] } } @@ -3977,7 +3977,7 @@ proc http::CopyChunk {token chunk} { } puts -nonewline $state(-channel) $chunk if {[info exists state(-progress)]} { - eval [linsert $state(-progress) end \ + namespace eval :: [linsert $state(-progress) end \ $token $state(totalsize) $state(currentsize)] } } else { @@ -4013,7 +4013,7 @@ proc http::CopyDone {token count {error {}}} { set sock $state(sock) incr state(currentsize) $count if {[info exists state(-progress)]} { - eval $state(-progress) \ + namespace eval :: $state(-progress) \ [list $token $state(totalsize) $state(currentsize)] } # At this point the token may have been reset. -- cgit v0.12 From d28f51fb1ea027faf72a3dd2ad64d76209d57e89 Mon Sep 17 00:00:00 2001 From: kjnash Date: Fri, 9 Sep 2022 08:04:55 +0000 Subject: Fix bug [2927221] - revised http::meta, new http::metaValue, header names are case-insensitive so convert to lower case. --- doc/http.n | 71 ++++++++++++++++++++++++++++++++++----------------- library/http/http.tcl | 41 ++++++++++++++++++++++++++--- tests/http.test | 8 +++--- tests/http11.test | 22 ++++++++-------- 4 files changed, 99 insertions(+), 43 deletions(-) diff --git a/doc/http.n b/doc/http.n index f0018e3..135774d 100644 --- a/doc/http.n +++ b/doc/http.n @@ -38,7 +38,9 @@ http \- Client-side implementation of the HTTP/1.1 protocol .sp \fB::http::reason \fIcode\fR .sp -\fB::http::meta \fItoken\fR +\fB::http::meta \fItoken\fR ?\fIheaderName\fR? +.sp +\fB::http::metaValue\fR \fItoken\fR \fIheaderName\fR .sp \fB::http::data \fItoken\fR .sp @@ -58,7 +60,8 @@ Namespace \fBhttp\fR exports the commands \fBconfig\fR, \fBformatQuery\fR, \fBreset\fR, \fBunregister\fR, and \fBwait\fR. .PP It does not export the commands \fBcleanup\fR, \fBcode\fR, \fBdata\fR, -\fBerror\fR, \fBmeta\fR, \fBncode\fR, \fBsize\fR, or \fBstatus\fR. +\fBerror\fR, \fBmeta\fR, \fBmetaValue\fR, \fBncode\fR, \fBreason\fR, +\fBsize\fR, or \fBstatus\fR. .BE .SH DESCRIPTION .PP @@ -357,7 +360,7 @@ multiple interfaces are available. The \fIcallback\fR is made after each transfer of data from the URL. The callback gets three additional arguments: the \fItoken\fR from \fB::http::geturl\fR, the expected total size of the contents from the -\fBContent-Length\fR meta-data, and the current number of bytes +\fBContent-Length\fR metadata, and the current number of bytes transferred so far. The expected total size may be unknown, in which case zero is passed to the callback. Here is a template for the progress callback: @@ -547,11 +550,31 @@ This is a convenience procedure that returns the \fBcurrentsize\fR element of the state array, which represents the number of bytes received from the URL in the \fB::http::geturl\fR call. .TP -\fB::http::meta\fR \fItoken\fR -. -This is a convenience procedure that returns the \fBmeta\fR -element of the state array which contains the HTTP response -headers. See below for an explanation of this element. +\fB::http::meta\fR \fItoken\fR ?\fIheaderName\fR? +. +This command returns a list of HTTP response header names and values, in the +order that they were received from the server: a Tcl list of the form +?name value ...? Header names are case-insensitive and are converted to lower +case. The return value is not a \fBdict\fR because some header names may occur +more than once, notably \fIset-cookie\fR. If one argument is supplied, all +response headers are returned: the value is that of the \fBmeta\fR element +of the state array (described below). If two arguments are supplied, the +second provides the value of a header name. Only headers with the requested +name (converted to lower case) are returned. If no such headers are found, +an empty list is returned. +.TP +\fB::http::metaValue\fR \fItoken\fR \fIheaderName\fR +. +This command returns the value of the HTTP response header named +\fIheaderName\fR. Header names are case-insensitive and are converted to +lower case. If no such header exists, the return value is the empty string. +If there are multiple headers named \fIheaderName\fR, the result is obtained +by joining the individual values with the string ", " (comma and space), +preserving their order. Multiple headers with the same name may be processed +in this manner, except \fIset-cookie\fR which does not conform to the +comma-separated-list syntax and cannot be combined into a single value. +Each \fIset-cookie\fR header must be treated individually, e.g. by processing +the return value of \fB::http::meta\fR \fIset-cookie\fR. .TP \fB::http::cleanup\fR \fItoken\fR . @@ -713,14 +736,14 @@ command. .TP \fBcharset\fR . -The value of the charset attribute from the \fBContent-Type\fR meta-data +The value of the charset attribute from the \fBContent-Type\fR metadata value. If none was specified, this defaults to the RFC standard \fBiso8859-1\fR, or the value of \fB$::http::defaultCharset\fR. Incoming text data will be automatically converted from this charset to utf-8. .TP \fBcoding\fR . -A copy of the \fBContent-Encoding\fR meta-data value. +A copy of the \fBContent-Encoding\fR metadata value. .TP \fBcurrentsize\fR . @@ -745,23 +768,23 @@ is returned by the \fB::http::code\fR command. The format of this value is: The \fIcode\fR is a three-digit number defined in the HTTP standard. A code of 200 is OK. Codes beginning with 4 or 5 indicate errors. Codes beginning with 3 are redirection errors. In this case the -\fBLocation\fR meta-data specifies a new URL that contains the +\fBLocation\fR metadata specifies a new URL that contains the requested information. .RE .TP \fBmeta\fR . -The HTTP protocol returns meta-data that describes the URL contents. -The \fBmeta\fR element of the state array is a list of the keys and -values of the meta-data. This is in a format useful for initializing -an array that just contains the meta-data: -.RS +The response from a HTTP server includes metadata headers that describe the +response body and the message from the server. The \fBmeta\fR element of the +state array is a list of the keys (header names) and values (header values) of +the metadata. Header names are case-insensitive and are converted to lower +case. The value of meta is not a \fBdict\fR because some header names may +occur more than once, notably "set-cookie". If the value \fBmeta\fR is read +into a dict or into an array (using array set), only the last header with each +name will be preserved. .PP -.CS -array set meta $state(meta) -.CE -.PP -Some of the meta-data keys are listed below, but the HTTP standard defines +.RS +Some of the metadata keys are listed below, but the HTTP standard defines more, and servers are free to add their own. .TP \fBContent-Type\fR @@ -793,11 +816,11 @@ During the transaction this value is the empty string. .TP \fBtotalsize\fR . -A copy of the \fBContent-Length\fR meta-data value. +A copy of the \fBContent-Length\fR metadata value. .TP \fBtype\fR . -A copy of the \fBContent-Type\fR meta-data value. +A copy of the \fBContent-Type\fR metadata value. .TP \fBurl\fR . @@ -1057,7 +1080,7 @@ The peer thread can transfer the socket only to the main interpreter of the scri .SH EXAMPLE .PP This example creates a procedure to copy a URL to a file while printing a -progress meter, and prints the meta-data associated with the URL. +progress meter, and prints the metadata associated with the URL. .PP .CS proc httpcopy { url file {chunk 4096} } { diff --git a/library/http/http.tcl b/library/http/http.tcl index 67f0309..ba8e1ab 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -2962,11 +2962,45 @@ proc http::size {token} { upvar 0 $token state return $state(currentsize) } -proc http::meta {token} { +proc http::meta {token args} { + set lenny [llength $args] + if {$lenny > 1} { + return -code error {usage: ::http::meta token ?headerName?} + } else { + return [Meta $token {*}$args] + } +} +proc http::metaValue {token header} { + Meta $token $header VALUE +} +proc http::Meta {token args} { variable $token upvar 0 $token state - return $state(meta) + + set header [string tolower [lindex $args 0]] + set how [string tolower [lindex $args 1]] + set lenny [llength $args] + if {$lenny == 0} { + return $state(meta) + } elseif {($lenny > 2) || (($lenny == 2) && ($how ne {value}))} { + return -code error {usage: ::http::Meta token ?headerName ?VALUE??} + } else { + set result {} + set combined {} + foreach {key value} $state(meta) { + if {$key eq $header} { + lappend result $key $value + append combined $value {, } + } + } + if {$lenny == 1} { + return $result + } else { + return [string range $combined 0 end-2] + } + } } + proc http::error {token} { variable $token upvar 0 $token state @@ -3445,7 +3479,8 @@ proc http::Event {sock token} { # Process header lines. ##Log header - token $token - $line if {[regexp -nocase {^([^:]+):(.+)$} $line x key value]} { - switch -- [string tolower $key] { + set key [string tolower $key] + switch -- $key { content-type { set state(type) [string trim [string tolower $value]] # Grab the optional charset information. diff --git a/tests/http.test b/tests/http.test index 26ba710..08f6311 100644 --- a/tests/http.test +++ b/tests/http.test @@ -390,7 +390,7 @@ test http-3.25 {http::meta} -setup { } -cleanup { http::cleanup $token unset -nocomplain m token -} -result {Content-Length Content-Type Date} +} -result {content-length content-type date} test http-3.26 {http::meta} -setup { unset -nocomplain m token } -body { @@ -400,7 +400,7 @@ test http-3.26 {http::meta} -setup { } -cleanup { http::cleanup $token unset -nocomplain m token -} -result {Content-Length Content-Type Date X-Check} +} -result {content-length content-type date x-check} test http-3.27 {http::geturl: -headers override -type} -body { set token [http::geturl $url/headers -type "text/plain" -query dummy \ -headers [list "Content-Type" "text/plain;charset=utf-8"]] @@ -485,7 +485,7 @@ test http-4.1 {http::Event} -body { set token [http::geturl $url -keepalive 0] upvar #0 $token data array set meta $data(meta) - expr {($data(totalsize) == $meta(Content-Length))} + expr {($data(totalsize) == $meta(content-length))} } -cleanup { http::cleanup $token } -result 1 @@ -493,7 +493,7 @@ test http-4.2 {http::Event} -body { set token [http::geturl $url] upvar #0 $token data array set meta $data(meta) - string compare $data(type) [string trim $meta(Content-Type)] + string compare $data(type) [string trim $meta(content-type)] } -cleanup { http::cleanup $token } -result 0 diff --git a/tests/http11.test b/tests/http11.test index 346e334..912e069 100644 --- a/tests/http11.test +++ b/tests/http11.test @@ -51,15 +51,11 @@ proc halt_httpd {} { } proc meta {tok {key ""}} { - set meta [http::meta $tok] - if {$key ne ""} { - if {[dict exists $meta $key]} { - return [dict get $meta $key] - } else { - return "" - } + if {$key eq ""} { + return [http::meta $tok] + } else { + return [http::metaValue $tok $key] } - return $meta } proc state {tok {key ""}} { @@ -128,11 +124,12 @@ test http11-1.1 "normal,gzip,non-chunked" -setup { -timeout 10000 -headers {accept-encoding gzip}] http::wait $tok list [http::status $tok] [http::code $tok] [check_crc $tok] \ - [meta $tok content-encoding] [meta $tok transfer-encoding] + [meta $tok content-encoding] [meta $tok transfer-encoding] \ + [http::meta $tok content-encoding] [http::meta $tok transfer-encoding] } -cleanup { http::cleanup $tok halt_httpd -} -result {ok {HTTP/1.1 200 OK} ok gzip {}} +} -result {ok {HTTP/1.1 200 OK} ok gzip {} {content-encoding gzip} {}} test http11-1.2 "normal,deflated,non-chunked" -setup { variable httpd [create_httpd] @@ -193,11 +190,12 @@ test http11-1.6 "normal, specify 1.1 " -setup { -protocol 1.1 -timeout 10000] http::wait $tok list [http::status $tok] [http::code $tok] [check_crc $tok] \ - [meta $tok connection] [meta $tok transfer-encoding] + [meta $tok connection] [meta $tok transfer-encoding] \ + [http::meta $tok connection] [http::meta $tok transfer-encoding] } -cleanup { http::cleanup $tok halt_httpd -} -result {ok {HTTP/1.1 200 OK} ok close chunked} +} -result {ok {HTTP/1.1 200 OK} ok close chunked {connection close} {transfer-encoding chunked}} test http11-1.7 "normal, 1.1 and keepalive " -setup { variable httpd [create_httpd] -- cgit v0.12 From 34d4a98d7cf24421a7b65fbff7ff06a7d535579c Mon Sep 17 00:00:00 2001 From: kjnash Date: Fri, 9 Sep 2022 08:28:16 +0000 Subject: Add missing "else" clauses to "if" commands in http::Event. --- library/http/http.tcl | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/library/http/http.tcl b/library/http/http.tcl index ba8e1ab..be991fc 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -3237,11 +3237,14 @@ proc http::Event {sock token} { if {[set d [read $sock]] ne ""} { Log "WARNING: additional data left on closed socket\ - token $token" + } else { } + } else { } Log ^X$tk end of response (token error) - token $token CloseSocket $sock return + } else { } if {$state(state) eq "connecting"} { ##Log - connecting - token $token @@ -3252,6 +3255,7 @@ proc http::Event {sock token} { } { set state(after) [after $state(-timeout) \ [list http::reset $token timeout]] + } else { } if {[catch {gets $sock state(http)} nsl]} { @@ -3263,8 +3267,8 @@ proc http::Event {sock token} { if {[TestForReplay $token read $nsl c]} { return + } else { } - # else: # This is NOT a persistent socket that has been closed since # its last use. @@ -3288,6 +3292,7 @@ proc http::Event {sock token} { if {[TestForReplay $token read {} d]} { return + } else { } # else: @@ -3295,6 +3300,7 @@ proc http::Event {sock token} { # last use. # If any other requests are in flight or pipelined/queued, they # will be discarded. + } else { } } elseif {$state(state) eq "header"} { if {[catch {gets $sock line} nhl]} { @@ -3313,6 +3319,7 @@ proc http::Event {sock token} { set state(state) "connecting" continue # This was a "return" in the pre-coroutine code. + } else { } if { ([info exists state(connection)]) @@ -3328,6 +3335,7 @@ proc http::Event {sock token} { # Previous value is $token. It cannot be "pending". set socketWrState($state(socketinfo)) Wready http::NextPipelinedWrite $token + } else { } # Once a "close" has been signaled, the client MUST NOT send any @@ -3358,6 +3366,7 @@ proc http::Event {sock token} { Log Move $tok from socketCoEvent to socketWrQueue and cancel its after idle coro } set socketCoEvent($state(socketinfo)) {} + } else { } if { ($socketRdQueue($state(socketinfo)) ne {}) @@ -3386,6 +3395,7 @@ proc http::Event {sock token} { if {[info exists ${tokenVal}(after)]} { after cancel [set ${tokenVal}(after)] unset ${tokenVal}(after) + } else { } # Tokens in the read queue have no (socketcoro) to # cancel. @@ -3398,6 +3408,7 @@ proc http::Event {sock token} { # Do not allow further connections on this socket (but # geturl can add new requests to the replay). set socketClosing($state(socketinfo)) 1 + } else { } set state(state) body @@ -3413,6 +3424,7 @@ proc http::Event {sock token} { && ("keep-alive" ni $state(connection)) } { lappend state(connection) "keep-alive" + } else { } # If doing a HEAD, then we won't get any body @@ -3421,6 +3433,7 @@ proc http::Event {sock token} { set state(state) complete Eot $token return + } else { } # - For non-chunked transfer we may have no body - in this case @@ -3451,6 +3464,7 @@ proc http::Event {sock token} { set state(state) complete Eot $token return + } else { } # We have to use binary translation to count bytes properly. @@ -3462,10 +3476,12 @@ proc http::Event {sock token} { } { # Turn off conversions for non-text data. set state(binary) 1 + } else { } if {[info exists state(-channel)]} { if {$state(binary) || [llength [ContentEncoding $token]]} { fconfigure $state(-channel) -translation binary + } else { } if {![info exists state(-handler)]} { # Initiate a sequence of background fcopies. @@ -3473,7 +3489,9 @@ proc http::Event {sock token} { rename ${token}--EventCoroutine {} CopyStart $sock $token return + } else { } + } else { } } elseif {$nhl > 0} { # Process header lines. @@ -3517,11 +3535,14 @@ proc http::Event {sock token} { set-cookie { if {$http(-cookiejar) ne ""} { ParseCookie $token [string trim $value] + } else { } } } lappend state(meta) $key [string trim $value] + } else { } + } else { } } else { # Now reading body @@ -3537,6 +3558,7 @@ proc http::Event {sock token} { # We know the transfer is complete only when the server # closes the connection - i.e. eof is not an error. set state(state) complete + } else { } if {![string is integer -strict $n]} { if 1 { @@ -3566,6 +3588,7 @@ proc http::Event {sock token} { set n 0 set state(state) complete } + } else { } } elseif {[info exists state(transfer_final)]} { # This code forgives EOF in place of the final CRLF. @@ -3605,6 +3628,7 @@ proc http::Event {sock token} { incr state(log_size) [string length $chunk] ##Log chunk $n cumul $state(log_size) -\ token $token + } else { } if {$size != [string length $chunk]} { Log "WARNING: mis-sized chunk:\ @@ -3617,6 +3641,7 @@ proc http::Event {sock token} { set msg {error in chunked encoding - fetch\ terminated} Eot $token $msg + } else { } # CRLF that follows chunk. # If eof, this is handled at the end of this proc. @@ -3664,6 +3689,7 @@ proc http::Event {sock token} { append state(body) $block ##Log non-chunk [string length $state(body)] -\ token $token + } else { } } # This calculation uses n from the -handler, chunked, or @@ -3675,6 +3701,7 @@ proc http::Event {sock token} { set t $state(totalsize) ##Log another $n currentsize $c totalsize $t -\ token $token + } else { } # If Content-Length - check for end of data. if { @@ -3685,7 +3712,9 @@ proc http::Event {sock token} { token $token set state(state) complete Eot $token + } else { } + } else { } } err]} { Log ^X$tk end of response (error ${err}) - token $token @@ -3695,6 +3724,7 @@ proc http::Event {sock token} { if {[info exists state(-progress)]} { namespace eval :: $state(-progress) \ [list $token $state(totalsize) $state(currentsize)] + } else { } } } -- cgit v0.12 From 6ed82bc4e590af32ae2a7742913dbd3388d6ce81 Mon Sep 17 00:00:00 2001 From: kjnash Date: Fri, 9 Sep 2022 15:32:00 +0000 Subject: Fix bug [338d979f5b] - default content-type is application/octet-stream --- library/http/http.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/http/http.tcl b/library/http/http.tcl index be991fc..8e4c205 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -1001,7 +1001,7 @@ proc http::CreateToken {url args} { totalsize 0 querylength 0 queryoffset 0 - type text/html + type application/octet-stream body {} status "" http "" -- cgit v0.12 From d288bedb47342cb10920f38467bcbfcded335e97 Mon Sep 17 00:00:00 2001 From: kjnash Date: Sat, 10 Sep 2022 11:47:34 +0000 Subject: Add option to detect XML files and their encoding when the server supplies no content-type. Fix for bugs [2998307] and [3165071]. --- doc/http.n | 13 ++++++++ library/http/http.tcl | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++- tests/http.test | 2 +- 3 files changed, 97 insertions(+), 2 deletions(-) diff --git a/doc/http.n b/doc/http.n index 135774d..5b70671 100644 --- a/doc/http.n +++ b/doc/http.n @@ -273,6 +273,19 @@ not call the \fBbgerror\fR handler. See the \fBERRORS\fR section for details. .RE .TP +\fB\-guesstype\fR \fIboolean\fR +. +Attempt to guess the Content-Type and character set when a misconfigured +server provides no information. The default value is \fIfalse\fR (do +nothing). If boolean \fItrue\fR then, if the server does not send a +"Content-Type" header, or if it sends the value "application/octet-stream", +\fBhttp::geturl\fR will attempt to guess appropriate values. This is not +intended to become a general-purpose tool, and currently it is limited to +detecting XML documents that begin with an XML declaration. In this case +the Content-Type is changed to "application/xml", and the character set to +the one specified by the "encoding" tag of the XML line, or to utf-8 if no +encoding is specified. +.TP \fB\-handler\fR \fIcallback\fR . Invoke \fIcallback\fR whenever HTTP data is available; if present, nothing diff --git a/library/http/http.tcl b/library/http/http.tcl index 8e4c205..a76ce15 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -992,6 +992,7 @@ proc http::CreateToken {url args} { -type application/x-www-form-urlencoded -queryprogress {} -protocol 1.1 + -guesstype 0 binary 0 state created meta {} @@ -1014,6 +1015,7 @@ proc http::CreateToken {url args} { array set type { -binary boolean -blocksize integer + -guesstype boolean -queryblocksize integer -strict boolean -timeout integer @@ -1022,7 +1024,7 @@ proc http::CreateToken {url args} { } set state(charset) $defaultCharset set options { - -binary -blocksize -channel -command -handler -headers -keepalive + -binary -blocksize -channel -command -guesstype -handler -headers -keepalive -method -myaddr -progress -protocol -query -queryblocksize -querychannel -queryprogress -strict -timeout -type -validate } @@ -4157,11 +4159,91 @@ proc http::Eot {token {reason {}}} { # Translate text line endings. set state(body) [string map {\r\n \n \r \n} $state(body)] } + if {[info exists state(-guesstype)] && $state(-guesstype)} { + GuessType $token + } } Finish $token $reason return } + +# ------------------------------------------------------------------------------ +# Proc http::GuessType +# ------------------------------------------------------------------------------ +# Command to attempt limited analysis of a resource with undetermined +# Content-Type, i.e. "application/octet-stream". This value can be set for two +# reasons: +# (a) by the server, in a Content-Type header +# (b) by http::geturl, as the default value if the server does not supply a +# Content-Type header. +# +# This command converts a resource if: +# (1) it has type application/octet-stream +# (2) it begins with an XML declaration "?" +# (3) one tag is named "encoding" and has a recognised value; or no "encoding" +# tag exists (defaulting to utf-8) +# +# RFC 9110 Sec. 8.3 states: +# "If a Content-Type header field is not present, the recipient MAY either +# assume a media type of "application/octet-stream" ([RFC2046], Section 4.5.1) +# or examine the data to determine its type." +# +# The RFC goes on to describe the pitfalls of "MIME sniffing", including +# possible security risks. +# +# Arguments: +# token - connection token +# +# Return Value: (boolean) true iff a change has been made +# ------------------------------------------------------------------------------ + +proc http::GuessType {token} { + variable $token + upvar 0 $token state + + if {$state(type) ne {application/octet-stream}} { + return 0 + } + + set body $state(body) + # e.g. { ...} + + if {![regexp -nocase -- {^<[?]xml[[:space:]][^>?]*[?]>} $body match]} { + return 0 + } + # e.g. {} + + set contents [regsub -- {[[:space:]]+} $match { }] + set contents [string range [string tolower $contents] 6 end-2] + # e.g. {version="1.0" encoding="utf-8"} + # without excess whitespace or upper-case letters + + if {![regexp -- {^([^=" ]+="[^"]+" )+$} "$contents "]} { + return 0 + } + # The application/xml default encoding: + set res utf-8 + + set tagList [regexp -all -inline -- {[^=" ]+="[^"]+"} $contents] + foreach tag $tagList { + regexp -- {([^=" ]+)="([^"]+)"} $tag -> name value + if {$name eq {encoding}} { + set res $value + } + } + set enc [CharsetToEncoding $res] + if {$enc eq "binary"} { + return 0 + } + set state(body) [encoding convertfrom $enc $state(body)] + set state(body) [string map {\r\n \n \r \n} $state(body)] + set state(type) application/xml + set state(charset) $res + return 1 +} + + # http::wait -- # # See documentation for details. diff --git a/tests/http.test b/tests/http.test index 08f6311..e88210a 100644 --- a/tests/http.test +++ b/tests/http.test @@ -145,7 +145,7 @@ test http-2.8 {http::CharsetToEncoding} { test http-3.1 {http::geturl} -returnCodes error -body { http::geturl -bogus flag -} -result {Unknown option flag, can be: -binary, -blocksize, -channel, -command, -handler, -headers, -keepalive, -method, -myaddr, -progress, -protocol, -query, -queryblocksize, -querychannel, -queryprogress, -strict, -timeout, -type, -validate} +} -result {Unknown option flag, can be: -binary, -blocksize, -channel, -command, -guesstype, -handler, -headers, -keepalive, -method, -myaddr, -progress, -protocol, -query, -queryblocksize, -querychannel, -queryprogress, -strict, -timeout, -type, -validate} test http-3.2 {http::geturl} -returnCodes error -body { http::geturl http:junk -- cgit v0.12 From b34f06a4afa5f57846efbe55f8dccb29e4611e2b Mon Sep 17 00:00:00 2001 From: kjnash Date: Sun, 11 Sep 2022 10:57:34 +0000 Subject: Fix bug [a13b9d0ce1] on HTTP compression: remove "compress", amend "deflate". --- doc/http.n | 6 +-- library/http/http.tcl | 62 ++++++++++++++++++++++++++--- tests/http11.test | 107 +++++++++++++++++++++++++++++++++++++++++++++++--- tests/httpd11.tcl | 27 +++++++++++-- 4 files changed, 185 insertions(+), 17 deletions(-) diff --git a/doc/http.n b/doc/http.n index 5b70671..e61f52f 100644 --- a/doc/http.n +++ b/doc/http.n @@ -210,7 +210,7 @@ numbers of \fBhttp\fR and \fBTcl\fR. \fB\-zip\fR \fIboolean\fR . If the value is boolean \fBtrue\fR, then by default requests will send a header -.QW "\fBAccept-Encoding: gzip,deflate,compress\fR" . +.QW "\fBAccept-Encoding: gzip,deflate\fR" . If the value is boolean \fBfalse\fR, then by default this header will not be sent. In either case the default can be overridden for an individual request by supplying a custom \fBAccept-Encoding\fR header in the \fB\-headers\fR option @@ -236,7 +236,7 @@ that is invoked when the HTTP transaction completes. . Specifies whether to force interpreting the URL data as binary. Normally this is auto-detected (anything not beginning with a \fBtext\fR content -type or whose content encoding is \fBgzip\fR or \fBcompress\fR is +type or whose content encoding is \fBgzip\fR or \fBdeflate\fR is considered binary data). .TP \fB\-blocksize\fR \fIsize\fR @@ -314,7 +314,7 @@ The \fBhttp::geturl\fR code for the \fB\-handler\fR option is not compatible with either compression or chunked transfer-encoding. If \fB\-handler\fR is specified, then to work around these issues \fBhttp::geturl\fR will reduce the HTTP protocol to 1.0, and override the \fB\-zip\fR option (i.e. it will not -send the header "\fBAccept-Encoding: gzip,deflate,compress\fR"). +send the header "\fBAccept-Encoding: gzip,deflate\fR"). .PP If options \fB\-handler\fR and \fB\-channel\fR are used together, the handler is responsible for copying the data from the HTTP socket to the specified diff --git a/library/http/http.tcl b/library/http/http.tcl index a76ce15..691355c 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -2128,7 +2128,7 @@ proc http::Connected {token proto phost srvurl} { && (![info exists state(-handler)]) && $http(-zip) } { - puts $sock "Accept-Encoding: gzip,deflate,compress" + puts $sock "Accept-Encoding: gzip,deflate" } if {$isQueryChannel && ($state(querylength) == 0)} { # Try to determine size of data in channel. If we cannot seek, the @@ -4010,13 +4010,25 @@ proc http::CopyStart {sock token {initial 1}} { upvar 0 $token state if {[info exists state(transfer)] && $state(transfer) eq "chunked"} { foreach coding [ContentEncoding $token] { - lappend state(zlib) [zlib stream $coding] + if {$coding eq {deflateX}} { + # Use the standards-compliant choice. + set coding2 decompress + } else { + set coding2 $coding + } + lappend state(zlib) [zlib stream $coding2] } make-transformation-chunked $sock [namespace code [list CopyChunk $token]] } else { if {$initial} { foreach coding [ContentEncoding $token] { - zlib push $coding $sock + if {$coding eq {deflateX}} { + # Use the standards-compliant choice. + set coding2 decompress + } else { + set coding2 $coding + } + zlib push $coding2 $sock } } if {[catch { @@ -4137,7 +4149,20 @@ proc http::Eot {token {reason {}}} { if {[string length $state(body)] > 0} { if {[catch { foreach coding [ContentEncoding $token] { - set state(body) [zlib $coding $state(body)] + if {$coding eq {deflateX}} { + # First try the standards-compliant choice. + set coding2 decompress + if {[catch {zlib $coding2 $state(body)} result]} { + # If that fails, try the MS non-compliant choice. + set coding2 inflate + set state(body) [zlib $coding2 $state(body)] + } else { + # error {failed at standards-compliant deflate} + set state(body) $result + } + } else { + set state(body) [zlib $coding $state(body)] + } } } err]} { Log "error doing decompression for token $token: $err" @@ -4387,16 +4412,41 @@ proc http::CharsetToEncoding {charset} { } } + +# ------------------------------------------------------------------------------ +# Proc http::ContentEncoding +# ------------------------------------------------------------------------------ # Return the list of content-encoding transformations we need to do in order. +# + # -------------------------------------------------------------------------- + # Options for Accept-Encoding, Content-Encoding: the switch command + # -------------------------------------------------------------------------- + # The symbol deflateX allows http to attempt both versions of "deflate", + # unless there is a -channel - for a -channel, only "decompress" is tried. + # Alternative/extra lines for switch: + # The standards-compliant version of "deflate" can be chosen with: + # deflate { lappend r decompress } + # The Microsoft non-compliant version of "deflate" can be chosen with: + # deflate { lappend r inflate } + # The previously used implementation of "compress", which appears to be + # incorrect and is rarely used by web servers, can be chosen with: + # compress - x-compress { lappend r decompress } + # -------------------------------------------------------------------------- +# +# Arguments: +# token - Connection token. +# +# Return Value: list +# ------------------------------------------------------------------------------ + proc http::ContentEncoding {token} { upvar 0 $token state set r {} if {[info exists state(coding)]} { foreach coding [split $state(coding) ,] { switch -exact -- $coding { - deflate { lappend r inflate } + deflate { lappend r deflateX } gzip - x-gzip { lappend r gunzip } - compress - x-compress { lappend r decompress } identity {} br { return -code error\ diff --git a/tests/http11.test b/tests/http11.test index 912e069..b3d9edb 100644 --- a/tests/http11.test +++ b/tests/http11.test @@ -144,7 +144,22 @@ test http11-1.2 "normal,deflated,non-chunked" -setup { halt_httpd } -result {ok {HTTP/1.1 200 OK} ok deflate {}} -test http11-1.3 "normal,compressed,non-chunked" -setup { +test http11-1.2.1 "normal,deflated,non-chunked,msdeflate" -setup { + variable httpd [create_httpd] +} -body { + set tok [http::geturl http://localhost:$httpd_port/testdoc.html?close=1&msdeflate=1 \ + -timeout 10000 -headers {accept-encoding deflate}] + http::wait $tok + list [http::status $tok] [http::code $tok] [check_crc $tok] \ + [meta $tok content-encoding] [meta $tok transfer-encoding] +} -cleanup { + http::cleanup $tok + halt_httpd +} -result {ok {HTTP/1.1 200 OK} ok deflate {}} + +test http11-1.3 "normal,compressed,non-chunked" -constraints badCompress -setup { + # The Tcl "compress" algorithm appears to be incorrect and has been removed. + # Bug [a13b9d0ce1]. variable httpd [create_httpd] } -body { set tok [http::geturl http://localhost:$httpd_port/testdoc.html?close=1 \ @@ -249,7 +264,22 @@ test http11-1.10 "normal,deflate,chunked" -setup { halt_httpd } -result {ok {HTTP/1.1 200 OK} ok deflate chunked} -test http11-1.11 "normal,compress,chunked" -setup { +test http11-1.10.1 "normal,deflate,chunked,msdeflate" -setup { + variable httpd [create_httpd] +} -body { + set tok [http::geturl http://localhost:$httpd_port/testdoc.html?msdeflate=1 \ + -timeout 10000 -headers {accept-encoding deflate}] + http::wait $tok + list [http::status $tok] [http::code $tok] [check_crc $tok] \ + [meta $tok content-encoding] [meta $tok transfer-encoding] +} -cleanup { + http::cleanup $tok + halt_httpd +} -result {ok {HTTP/1.1 200 OK} ok deflate chunked} + +test http11-1.11 "normal,compress,chunked" -constraints badCompress -setup { + # The Tcl "compress" algorithm appears to be incorrect and has been removed. + # Bug [a13b9d0ce1]. variable httpd [create_httpd] } -body { set tok [http::geturl http://localhost:$httpd_port/testdoc.html \ @@ -370,7 +400,28 @@ test http11-2.2 "-channel, encoding deflate" -setup { halt_httpd } -result {ok {HTTP/1.1 200 OK} ok close deflate chunked} -test http11-2.3 "-channel,encoding compress" -setup { +test http11-2.2.1 "-channel, encoding deflate,msdeflate" -setup { + variable httpd [create_httpd] + set chan [open [makeFile {} testfile.tmp] wb+] +} -body { + set tok [http::geturl http://localhost:$httpd_port/testdoc.html?msdeflate=1 \ + -timeout 5000 -channel $chan -headers {accept-encoding deflate}] + http::wait $tok + seek $chan 0 + set data [read $chan] + list [http::status $tok] [http::code $tok] [check_crc $tok $data]\ + [meta $tok connection] [meta $tok content-encoding]\ + [meta $tok transfer-encoding] +} -cleanup { + http::cleanup $tok + close $chan + removeFile testfile.tmp + halt_httpd +} -result {ok {HTTP/1.1 200 OK} ok close deflate chunked} + +test http11-2.3 "-channel,encoding compress" -constraints badCompress -setup { + # The Tcl "compress" algorithm appears to be incorrect and has been removed. + # Bug [a13b9d0ce1]. variable httpd [create_httpd] set chan [open [makeFile {} testfile.tmp] wb+] } -body { @@ -522,7 +573,32 @@ test http11-2.7 "-channel,encoding deflate,non-chunked" -setup { halt_httpd } -result {ok {HTTP/1.1 200 OK} ok close deflate {} 0} -test http11-2.8 "-channel,encoding compress,non-chunked" -setup { +test http11-2.7.1 "-channel,encoding deflate,non-chunked,msdeflate" -constraints knownBug -setup { + # Test fails because a -channel can only try one un-deflate algorithm, and the + # compliant "decompress" is tried, not the non-compliant "inflate" of + # the MS browser implementation. + variable httpd [create_httpd] + set chan [open [makeFile {} testfile.tmp] wb+] +} -body { + set tok [http::geturl http://localhost:$httpd_port/testdoc.html?close=1&msdeflate=1 \ + -timeout 5000 -channel $chan -headers {accept-encoding deflate}] + http::wait $tok + seek $chan 0 + set data [read $chan] + list [http::status $tok] [http::code $tok] [check_crc $tok $data]\ + [meta $tok connection] [meta $tok content-encoding]\ + [meta $tok transfer-encoding]\ + [expr {[file size testdoc.html]-[file size testfile.tmp]}] +} -cleanup { + http::cleanup $tok + close $chan + removeFile testfile.tmp + halt_httpd +} -result {ok {HTTP/1.1 200 OK} ok close deflate {} 0} + +test http11-2.8 "-channel,encoding compress,non-chunked" -constraints badCompress -setup { + # The Tcl "compress" algorithm appears to be incorrect and has been removed. + # Bug [a13b9d0ce1]. variable httpd [create_httpd] set chan [open [makeFile {} testfile.tmp] wb+] } -body { @@ -583,6 +659,27 @@ test http11-2.10 "-channel,deflate,keepalive" -setup { halt_httpd } -result {ok {HTTP/1.1 200 OK} ok {} deflate chunked 0} +test http11-2.10.1 "-channel,deflate,keepalive,msdeflate" -setup { + variable httpd [create_httpd] + set chan [open [makeFile {} testfile.tmp] wb+] +} -body { + set tok [http::geturl http://localhost:$httpd_port/testdoc.html?msdeflate=1 \ + -timeout 5000 -channel $chan -keepalive 1 \ + -headers {accept-encoding deflate}] + http::wait $tok + seek $chan 0 + set data [read $chan] + list [http::status $tok] [http::code $tok] [check_crc $tok $data]\ + [meta $tok connection] [meta $tok content-encoding]\ + [meta $tok transfer-encoding]\ + [expr {[file size testdoc.html]-[file size testfile.tmp]}] +} -cleanup { + http::cleanup $tok + close $chan + removeFile testfile.tmp + halt_httpd +} -result {ok {HTTP/1.1 200 OK} ok {} deflate chunked 0} + test http11-2.11 "-channel,identity,keepalive" -setup { variable httpd [create_httpd] set chan [open [makeFile {} testfile.tmp] wb+] @@ -621,7 +718,7 @@ test http11-2.12 "-channel,negotiate,keepalive" -setup { close $chan removeFile testfile.tmp halt_httpd -} -result {ok {HTTP/1.1 200 OK} ok {} gzip chunked gzip,deflate,compress 0} +} -result {ok {HTTP/1.1 200 OK} ok {} gzip chunked gzip,deflate 0} # ------------------------------------------------------------------------- diff --git a/tests/httpd11.tcl b/tests/httpd11.tcl index d0624f8..6570ee9 100644 --- a/tests/httpd11.tcl +++ b/tests/httpd11.tcl @@ -160,6 +160,12 @@ proc Service {chan addr port} { if {$protocol eq "HTTP/1.1"} { foreach enc [split [dict get? $meta accept-encoding] ,] { set enc [string trim $enc] + # The current implementation of "compress" appears to be + # incorrect (bug [a13b9d0ce1]). Keep it here for + # experimentation only. The tests that use it have the + # constraint "badCompress". The client code in http has + # been removed, but can be restored from comments if + # experimentation is desired. if {$enc in {deflate gzip compress}} { set encoding $enc break @@ -171,6 +177,7 @@ proc Service {chan addr port} { } set nosendclose 0 + set msdeflate 0 foreach pair [split $query &] { if {[scan $pair {%[^=]=%s} key val] != 2} {set val ""} switch -exact -- $key { @@ -178,6 +185,7 @@ proc Service {chan addr port} { close {set close 1 ; set transfer 0} transfer {set transfer $val} content-type {set type $val} + msdeflate {set msdeflate $val} } } if {$protocol eq "HTTP/1.1"} { @@ -211,10 +219,23 @@ proc Service {chan addr port} { flush $chan chan configure $chan -buffering full -translation binary + if {$encoding eq {deflate}} { + # When http.tcl uses the correct decoder (bug [a13b9d0ce1]) for + # "accept-encoding deflate", i.e. "zlib decompress", this choice of + # encoding2 allows the tests to pass. It appears to do "deflate" + # correctly, but this has not been verified with a non-Tcl client. + set encoding2 compress + } else { + set encoding2 $encoding + } if {$transfer eq "chunked"} { - blow-chunks $data $chan $encoding - } elseif {$encoding ne "identity"} { - puts -nonewline $chan [zlib $encoding $data] + blow-chunks $data $chan $encoding2 + } elseif {$encoding2 ne "identity" && $msdeflate eq {1}} { + puts -nonewline $chan [string range [zlib $encoding2 $data] 2 end-4] + # Used in some tests of "deflate" to produce the non-RFC-compliant + # Microsoft version of "deflate". + } elseif {$encoding2 ne "identity"} { + puts -nonewline $chan [zlib $encoding2 $data] } else { puts -nonewline $chan $data } -- cgit v0.12 From e681ade127e237c8ebf20bbaf02f6c5757671b71 Mon Sep 17 00:00:00 2001 From: kjnash Date: Sun, 11 Sep 2022 16:32:30 +0000 Subject: Fix bug [3610253] - apply the patch supplied with the ticket, and add a test. REVIEW REQUESTED! Also fix -zip 0 so it sends "Accept-Encoding: identity". --- library/http/http.tcl | 10 +++++++++- tests/http11.test | 32 ++++++++++++++++++++++++++++++-- tests/httpd11.tcl | 4 ++-- 3 files changed, 41 insertions(+), 5 deletions(-) diff --git a/library/http/http.tcl b/library/http/http.tcl index 691355c..551b323 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -2129,6 +2129,9 @@ proc http::Connected {token proto phost srvurl} { && $http(-zip) } { puts $sock "Accept-Encoding: gzip,deflate" + } elseif {!$accept_encoding_seen} { + puts $sock "Accept-Encoding: identity" + } else { } if {$isQueryChannel && ($state(querylength) == 0)} { # Try to determine size of data in channel. If we cannot seek, the @@ -4064,7 +4067,12 @@ proc http::CopyChunk {token chunk} { if {[info exists state(zlib)]} { set excess "" foreach stream $state(zlib) { - catch {set excess [$stream add -finalize $excess]} + catch { + $stream put -finalize $excess + set excess "" + set overflood "" + while {[set overflood [$stream get]] ne ""} { append excess $overflood } + } } puts -nonewline $state(-channel) $excess foreach stream $state(zlib) { $stream close } diff --git a/tests/http11.test b/tests/http11.test index b3d9edb..71ef4c7 100644 --- a/tests/http11.test +++ b/tests/http11.test @@ -84,6 +84,8 @@ proc check_crc {tok args} { makeFile "test

this is a test

\n[string repeat {

This is a tcl test file.

} 4192]\n" testdoc.html +makeFile "test

this is a test

\n[string repeat {

This is a tcl test file.

} 5000]\n" largedoc.html + if {![info exists ThreadLevel]} { if {[catch {package require Thread}] == 0} { set ValueRange {0 1 2} @@ -371,15 +373,40 @@ test http11-2.1 "-channel, encoding gzip" -setup { http::wait $tok seek $chan 0 set data [read $chan] + set diff [expr {[file size testdoc.html] - [file size testfile.tmp]}] list [http::status $tok] [http::code $tok] [check_crc $tok $data]\ [meta $tok connection] [meta $tok content-encoding]\ - [meta $tok transfer-encoding] + [meta $tok transfer-encoding] -- $diff bytes lost +} -cleanup { + http::cleanup $tok + close $chan + removeFile testfile.tmp + halt_httpd +} -result {ok {HTTP/1.1 200 OK} ok close gzip chunked -- 0 bytes lost} + +# Cf. Bug [3610253] "CopyChunk does not drain decompressor(s)" +# This test failed before the bugfix. +# The pass/fail depended on file size. +test http11-2.1.1 "-channel, encoding gzip" -setup { + variable httpd [create_httpd] + set chan [open [makeFile {} testfile.tmp] wb+] + set fileName largedoc.html +} -body { + set tok [http::geturl http://localhost:$httpd_port/$fileName \ + -timeout 5000 -channel $chan -headers {accept-encoding gzip}] + http::wait $tok + seek $chan 0 + set data [read $chan] + set diff [expr {[file size $fileName] - [file size testfile.tmp]}] + list [http::status $tok] [http::code $tok] [check_crc $tok $data]\ + [meta $tok connection] [meta $tok content-encoding]\ + [meta $tok transfer-encoding] -- $diff bytes lost } -cleanup { http::cleanup $tok close $chan removeFile testfile.tmp halt_httpd -} -result {ok {HTTP/1.1 200 OK} ok close gzip chunked} +} -result {ok {HTTP/1.1 200 OK} ok close gzip chunked -- 0 bytes lost} test http11-2.2 "-channel, encoding deflate" -setup { variable httpd [create_httpd] @@ -1033,6 +1060,7 @@ foreach p {create_httpd httpd_read halt_httpd meta check_crc} { if {[llength [info proc $p]]} {rename $p {}} } removeFile testdoc.html +removeFile largedoc.html unset -nocomplain httpd_port httpd p ::tcltest::cleanupTests diff --git a/tests/httpd11.tcl b/tests/httpd11.tcl index 6570ee9..55b52fd 100644 --- a/tests/httpd11.tcl +++ b/tests/httpd11.tcl @@ -46,7 +46,7 @@ proc get-chunks {data {compression gzip}} { } set data "" - set chunker [make-chunk-generator $data 512] + set chunker [make-chunk-generator $data 671] while {[string length [set chunk [$chunker]]]} { append data $chunk } @@ -60,7 +60,7 @@ proc blow-chunks {data {ochan stdout} {compression gzip}} { compress { set data [zlib compress $data] } } - set chunker [make-chunk-generator $data 512] + set chunker [make-chunk-generator $data 671] while {[string length [set chunk [$chunker]]]} { puts -nonewline $ochan $chunk } -- cgit v0.12 From f18eb707ea105404984b5741a6ff4f5953e5eeb6 Mon Sep 17 00:00:00 2001 From: kjnash Date: Mon, 12 Sep 2022 13:22:14 +0000 Subject: Record HTTP request line and request headers for debugging purposes. --- library/http/http.tcl | 61 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 15 deletions(-) diff --git a/library/http/http.tcl b/library/http/http.tcl index 551b323..ba9d920 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -1008,6 +1008,8 @@ proc http::CreateToken {url args} { http "" connection keep-alive tid {} + requestHeaders {} + requestLine {} } set state(-keepalive) $defaultKeepalive set state(-strict) $strict @@ -1096,6 +1098,9 @@ proc http::CreateToken {url args} { # Note that the RE actually combines the user and password parts, as # recommended in RFC 3986. Indeed, that RFC states that putting passwords # in URLs is a Really Bad Idea, something with which I would agree utterly. + # RFC 9110 Sec 4.2.4 goes further than this, and deprecates the format + # "user:password@". It is retained here for backward compatibility, + # but its use is not recommended. # # From a validation perspective, we need to ensure that the parts of the # URL that are going to the server are correctly encoded. This is only @@ -1975,6 +1980,30 @@ proc http::ScheduleRequest {token} { } +# ------------------------------------------------------------------------------ +# Proc http::SendHeader +# ------------------------------------------------------------------------------ +# Command to send a request header, and keep a copy in state(requestHeaders) +# for debugging purposes. +# +# Arguments: +# token - connection token (name of an array) +# key - header name +# value - header value +# +# Return Value: none +# ------------------------------------------------------------------------------ + +proc http::SendHeader {token key value} { + variable $token + upvar 0 $token state + set tk [namespace tail $token] + set sock $state(sock) + lappend state(requestHeaders) [string tolower $key] $value + puts $sock "$key: $value" + return +} + # http::Connected -- # # Callback used when the connection to the HTTP server is actually @@ -2059,29 +2088,31 @@ proc http::Connected {token proto phost srvurl} { if {[catch { set state(method) $how - puts $sock "$how $srvurl HTTP/$state(-protocol)" + set state(requestHeaders) {} + set state(requestLine) "$how $srvurl HTTP/$state(-protocol)" + puts $sock $state(requestLine) set hostValue [GetFieldValue $state(-headers) Host] if {$hostValue ne {}} { # Allow Host spoofing. [Bug 928154] regexp {^[^:]+} $hostValue state(host) - puts $sock "Host: $hostValue" + SendHeader $token Host $hostValue } elseif {$port == $defport} { # Don't add port in this case, to handle broken servers. [Bug # #504508] set state(host) $host - puts $sock "Host: $host" + SendHeader $token Host $host } else { set state(host) $host - puts $sock "Host: $host:$port" + SendHeader $token Host "$host:$port" } - puts $sock "User-Agent: $http(-useragent)" + SendHeader $token User-Agent $http(-useragent) if {($state(-protocol) > 1.0) && $state(-keepalive)} { # Send this header, because a 1.1 server is not compelled to treat # this as the default. - puts $sock "Connection: keep-alive" + SendHeader $token Connection keep-alive } if {($state(-protocol) > 1.0) && !$state(-keepalive)} { - puts $sock "Connection: close" ;# RFC2616 sec 8.1.2.1 + SendHeader $token Connection close ;# RFC2616 sec 8.1.2.1 } if {($state(-protocol) < 1.1)} { # RFC7230 A.1 @@ -2090,7 +2121,7 @@ proc http::Connected {token proto phost srvurl} { # Don't leave this to chance. # For HTTP/1.0 we have already "set state(connection) close" # and "state(-keepalive) 0". - puts $sock "Connection: close" + SendHeader $token Connection close } # RFC7230 A.1 - "clients are encouraged not to send the # Proxy-Connection header field in any requests" @@ -2116,21 +2147,21 @@ proc http::Connected {token proto phost srvurl} { set state(querylength) $value } if {[string length $key]} { - puts $sock "$key: $value" + SendHeader $token $key $value } } # Allow overriding the Accept header on a per-connection basis. Useful # for working with REST services. [Bug c11a51c482] if {!$accept_types_seen} { - puts $sock "Accept: $state(accept-types)" + SendHeader $token Accept $state(accept-types) } if { (!$accept_encoding_seen) && (![info exists state(-handler)]) && $http(-zip) } { - puts $sock "Accept-Encoding: gzip,deflate" + SendHeader $token Accept-Encoding gzip,deflate } elseif {!$accept_encoding_seen} { - puts $sock "Accept-Encoding: identity" + SendHeader $token Accept-Encoding identity } else { } if {$isQueryChannel && ($state(querylength) == 0)} { @@ -2156,7 +2187,7 @@ proc http::Connected {token proto phost srvurl} { set separator "; " } if {$cookies ne ""} { - puts $sock "Cookie: $cookies" + SendHeader $token Cookie $cookies } } @@ -2180,10 +2211,10 @@ proc http::Connected {token proto phost srvurl} { if {$isQuery || $isQueryChannel} { # POST method. if {!$content_type_seen} { - puts $sock "Content-Type: $state(-type)" + SendHeader $token Content-Type $state(-type) } if {!$contDone} { - puts $sock "Content-Length: $state(querylength)" + SendHeader $token Content-Length $state(querylength) } puts $sock "" flush $sock -- cgit v0.12 From a194e805e604396e9ad9a39a4e7c569334b2d35c Mon Sep 17 00:00:00 2001 From: kjnash Date: Mon, 12 Sep 2022 17:12:12 +0000 Subject: Add new commands http::requestLine, requestHeaders, requestHeaderValue. Add aliases http::responseBody -> http::data, responseLine -> code, responseCode -> ncode, responseHeaders -> meta, responseHeaderValue -> metaValue. --- doc/http.n | 124 +++++++++++++++++++++++++++++++++++++++++++------- library/http/http.tcl | 53 ++++++++++++++++----- 2 files changed, 148 insertions(+), 29 deletions(-) diff --git a/doc/http.n b/doc/http.n index e61f52f..5a7009e 100644 --- a/doc/http.n +++ b/doc/http.n @@ -32,17 +32,23 @@ http \- Client-side implementation of the HTTP/1.1 protocol .sp \fB::http::size \fItoken\fR .sp -\fB::http::code \fItoken\fR +\fB::http::responseLine\fR \fItoken\fR .sp -\fB::http::ncode \fItoken\fR +\fB::http::responseCode\fR \fItoken\fR .sp \fB::http::reason \fIcode\fR .sp -\fB::http::meta \fItoken\fR ?\fIheaderName\fR? +\fB::http::requestLine\fR \fItoken\fR .sp -\fB::http::metaValue\fR \fItoken\fR \fIheaderName\fR +\fB::http::requestHeaders\fR \fItoken\fR ?\fIheaderName\fR? .sp -\fB::http::data \fItoken\fR +\fB::http::requestHeaderValue\fR \fItoken\fR \fIheaderName\fR +.sp +\fB::http::responseHeaders\fR \fItoken\fR ?\fIheaderName\fR? +.sp +\fB::http::responseHeaderValue\fR \fItoken\fR \fIheaderName\fR +.sp +\fB::http::responseBody\fR \fItoken\fR .sp \fB::http::error \fItoken\fR .sp @@ -53,6 +59,16 @@ http \- Client-side implementation of the HTTP/1.1 protocol \fB::http::registerError \fIport\fR ?\fImessage\fR? .sp \fB::http::unregister \fIproto\fR +.sp +\fB::http::code \fItoken\fR +.sp +\fB::http::data \fItoken\fR +.sp +\fB::http::meta \fItoken\fR ?\fIheaderName\fR? +.sp +\fB::http::metaValue\fR \fItoken\fR \fIheaderName\fR +.sp +\fB::http::ncode \fItoken\fR .SH "EXPORTED COMMANDS" .PP Namespace \fBhttp\fR exports the commands \fBconfig\fR, \fBformatQuery\fR, @@ -493,10 +509,18 @@ because in this case the \fB::http::geturl\fR call does not return until the HTTP transaction is complete, and thus there is nothing to wait for. .TP -\fB::http::data\fR \fItoken\fR +\fB::http::responseBody\fR \fItoken\fR . -This is a convenience procedure that returns the \fBbody\fR element -(i.e., the URL data) of the state array. +This command returns the entity sent by the HTTP server (unless +\fI-channel\fR was used, in which case the entity was delivered to the +channel, and the command returns the empty string). +.RS +.PP +Other terms for +"entity", with varying precision, include "representation of resource", +"resource", "response body after decoding", "payload", +"message body after decoding", "content", and "file". +.RE .TP \fB::http::error\fR \fItoken\fR . @@ -508,7 +532,7 @@ of the state array. This is a convenience procedure that returns the \fBstatus\fR element of the state array. .TP -\fB::http::code\fR \fItoken\fR +\fB::http::responseLine\fR \fItoken\fR . This command returns the "status line" of the server response (which is stored as element \fBhttp\fR of the state array). @@ -523,18 +547,27 @@ HTTP/1.1 200 OK HTTP/1.0 404 Not Found .RE .DE -.PP .RS -The "reason phrase" for a given status code may vary from server to server, +The "status code" is a three-digit number in the range 100 to 599. +A value of 200 is the normal return from a GET request, and its matching +"reason phrase" is "OK". Codes beginning with 4 or 5 indicate errors. +Codes beginning with 3 are redirection errors. In this case the +\fBLocation\fR response header specifies a new URL that contains the +requested information. +.PP +The "reason phrase" is a textual description of the "status code": it may +vary from server to server, and can be changed without affecting the HTTP protocol. The recommended values (RFC 7231 and IANA assignments) for each code are provided by the command \fB::http::reason\fR. .RE .TP -\fB::http::ncode\fR \fItoken\fR +\fB::http::responseCode\fR \fItoken\fR . -This command returns the "status code" (200, 404, etc.) of the server response. -The full status line can be obtained with command \fB::http::code\fR. +This command returns the "status code" (200, 404, etc.) of the server +"status line". If a three-digit code cannot be found, the full status +line is returned. See command \fB::http::code\fR for more information +on the "status line". .TP \fB::http::reason\fR \fIcode\fR . @@ -563,7 +596,44 @@ This is a convenience procedure that returns the \fBcurrentsize\fR element of the state array, which represents the number of bytes received from the URL in the \fB::http::geturl\fR call. .TP -\fB::http::meta\fR \fItoken\fR ?\fIheaderName\fR? +\fB::http::requestLine\fR \fItoken\fR +. +This command returns the "request line" sent to the server. +The "request line" is the first line of a HTTP client request, and has three +elements separated by spaces: the HTTP method, the URL relative to the server, +and the HTTP version. Examples: +.PP +.DS +.RS +GET / HTTP/1.1 +GET /introduction.html?subject=plumbing HTTP/1.1 +POST /forms/order.html HTTP/1.1 +.RE +.DE +.TP +\fB::http::requestHeaders\fR \fItoken\fR ?\fIheaderName\fR? +. +This command returns a list of the HTTP request header names and values, in the +order that they were sent to the server: a Tcl list of the form +?name value ...? Header names are case-insensitive and are converted to lower +case. The return value is not a \fBdict\fR because some header names may occur +more than once. If one argument is supplied, all request headers +are returned: the value is that of the \fBrequestHeaders\fR element +of the state array (described below). If two arguments are supplied, the +second provides the value of a header name. Only headers with the requested +name (converted to lower case) are returned. If no such headers are found, +an empty list is returned. +.TP +\fB::http::requestHeaderValue\fR \fItoken\fR \fIheaderName\fR +. +This command returns the value of the HTTP request header named +\fIheaderName\fR. Header names are case-insensitive and are converted to +lower case. If no such header exists, the return value is the empty string. +If there are multiple headers named \fIheaderName\fR, the result is obtained +by joining the individual values with the string ", " (comma and space), +preserving their order. +.TP +\fB::http::responseHeaders\fR \fItoken\fR ?\fIheaderName\fR? . This command returns a list of HTTP response header names and values, in the order that they were received from the server: a Tcl list of the form @@ -576,7 +646,7 @@ second provides the value of a header name. Only headers with the requested name (converted to lower case) are returned. If no such headers are found, an empty list is returned. .TP -\fB::http::metaValue\fR \fItoken\fR \fIheaderName\fR +\fB::http::responseHeaderValue\fR \fItoken\fR \fIheaderName\fR . This command returns the value of the HTTP response header named \fIheaderName\fR. Header names are case-insensitive and are converted to @@ -587,7 +657,7 @@ preserving their order. Multiple headers with the same name may be processed in this manner, except \fIset-cookie\fR which does not conform to the comma-separated-list syntax and cannot be combined into a single value. Each \fIset-cookie\fR header must be treated individually, e.g. by processing -the return value of \fB::http::meta\fR \fIset-cookie\fR. +the return value of \fB::http::responseHeaders\fR \fItoken\fR \fIset-cookie\fR. .TP \fB::http::cleanup\fR \fItoken\fR . @@ -635,6 +705,26 @@ registered via \fB::http::register\fR, returning a two-item list of the default port and handler command that was previously installed (via \fB::http::register\fR) if there was such a handler, and an error if there was no such handler. +.TP +\fB::http::code\fR \fItoken\fR +. +An alternative name for the command \fB::http::responseLine\fR +.TP +\fB::http::data\fR \fItoken\fR +. +An alternative name for the command \fB::http::responseBody\fR. +.TP +\fB::http::meta\fR \fItoken\fR ?\fIheaderName\fR? +. +An alternative name for the command \fB::http::responseHeaders\fR +.TP +\fB::http::metaValue\fR \fItoken\fR \fIheaderName\fR +. +An alternative name for the command \fB::http::responseHeaderValue\fR +.TP +\fB::http::ncode\fR \fItoken\fR +. +An alternative name for the command \fB::http::responseCode\fR .SH ERRORS The \fB::http::geturl\fR procedure will raise errors in the following cases: invalid command line options, diff --git a/library/http/http.tcl b/library/http/http.tcl index ba9d920..359666d 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -2966,7 +2966,7 @@ proc http::ReplayCore {newQueue} { # Code - the HTTP transaction code, e.g., 200 # Size - the size of the URL data -proc http::data {token} { +proc http::responseBody {token} { variable $token upvar 0 $token state return $state(body) @@ -2979,12 +2979,17 @@ proc http::status {token} { upvar 0 $token state return $state(status) } -proc http::code {token} { +proc http::responseLine {token} { variable $token upvar 0 $token state return $state(http) } -proc http::ncode {token} { +proc http::requestLine {token} { + variable $token + upvar 0 $token state + return $state(requestLine) +} +proc http::responseCode {token} { variable $token upvar 0 $token state if {[regexp {[0-9]{3}} $state(http) numeric_code]} { @@ -2998,32 +3003,51 @@ proc http::size {token} { upvar 0 $token state return $state(currentsize) } -proc http::meta {token args} { +proc http::requestHeaders {token args} { + set lenny [llength $args] + if {$lenny > 1} { + return -code error {usage: ::http::requestHeaders token ?headerName?} + } else { + return [Meta $token request {*}$args] + } +} +proc http::responseHeaders {token args} { set lenny [llength $args] if {$lenny > 1} { - return -code error {usage: ::http::meta token ?headerName?} + return -code error {usage: ::http::responseHeaders token ?headerName?} } else { - return [Meta $token {*}$args] + return [Meta $token response {*}$args] } } -proc http::metaValue {token header} { - Meta $token $header VALUE +proc http::requestHeaderValue {token header} { + Meta $token request $header VALUE +} +proc http::responseHeaderValue {token header} { + Meta $token response $header VALUE } -proc http::Meta {token args} { +proc http::Meta {token who args} { variable $token upvar 0 $token state + if {$who eq {request}} { + set whom requestHeaders + } elseif {$who eq {response}} { + set whom meta + } else { + return -code error {usage: ::http::Meta token request|response ?headerName ?VALUE??} + } + set header [string tolower [lindex $args 0]] set how [string tolower [lindex $args 1]] set lenny [llength $args] if {$lenny == 0} { - return $state(meta) + return $state($whom) } elseif {($lenny > 2) || (($lenny == 2) && ($how ne {value}))} { - return -code error {usage: ::http::Meta token ?headerName ?VALUE??} + return -code error {usage: ::http::Meta token request|response ?headerName ?VALUE??} } else { set result {} set combined {} - foreach {key value} $state(meta) { + foreach {key value} $state($whom) { if {$key eq $header} { lappend result $key $value append combined $value {, } @@ -4582,6 +4606,11 @@ proc http::make-transformation-chunked {chan command} { return } +interp alias {} http::data {} http::responseBody +interp alias {} http::code {} http::responseLine +interp alias {} http::meta {} http::responseHeaders +interp alias {} http::metaValue {} http::responseHeaderValue +interp alias {} http::ncode {} http::responseCode # ------------------------------------------------------------------------------ # Proc http::socket -- cgit v0.12 From 83b951fbc6b973b4d850c1293cd82559a9c96228 Mon Sep 17 00:00:00 2001 From: kjnash Date: Mon, 12 Sep 2022 17:33:50 +0000 Subject: Rearrange groups of lines in http.n without other changes to content. --- doc/http.n | 148 ++++++++++++++++++++++++++++++------------------------------- 1 file changed, 74 insertions(+), 74 deletions(-) diff --git a/doc/http.n b/doc/http.n index 5a7009e..dcd65ae 100644 --- a/doc/http.n +++ b/doc/http.n @@ -32,11 +32,9 @@ http \- Client-side implementation of the HTTP/1.1 protocol .sp \fB::http::size \fItoken\fR .sp -\fB::http::responseLine\fR \fItoken\fR -.sp -\fB::http::responseCode\fR \fItoken\fR +\fB::http::error \fItoken\fR .sp -\fB::http::reason \fIcode\fR +\fB::http::cleanup \fItoken\fR .sp \fB::http::requestLine\fR \fItoken\fR .sp @@ -44,16 +42,18 @@ http \- Client-side implementation of the HTTP/1.1 protocol .sp \fB::http::requestHeaderValue\fR \fItoken\fR \fIheaderName\fR .sp +\fB::http::responseLine\fR \fItoken\fR +.sp +\fB::http::responseCode\fR \fItoken\fR +.sp +\fB::http::reason \fIcode\fR +.sp \fB::http::responseHeaders\fR \fItoken\fR ?\fIheaderName\fR? .sp \fB::http::responseHeaderValue\fR \fItoken\fR \fIheaderName\fR .sp \fB::http::responseBody\fR \fItoken\fR .sp -\fB::http::error \fItoken\fR -.sp -\fB::http::cleanup \fItoken\fR -.sp \fB::http::register \fIproto port command\fR .sp \fB::http::registerError \fIport\fR ?\fImessage\fR? @@ -509,28 +509,69 @@ because in this case the \fB::http::geturl\fR call does not return until the HTTP transaction is complete, and thus there is nothing to wait for. .TP -\fB::http::responseBody\fR \fItoken\fR +\fB::http::status\fR \fItoken\fR . -This command returns the entity sent by the HTTP server (unless -\fI-channel\fR was used, in which case the entity was delivered to the -channel, and the command returns the empty string). -.RS -.PP -Other terms for -"entity", with varying precision, include "representation of resource", -"resource", "response body after decoding", "payload", -"message body after decoding", "content", and "file". -.RE +This is a convenience procedure that returns the \fBstatus\fR element of +the state array. +.TP +\fB::http::size\fR \fItoken\fR +. +This is a convenience procedure that returns the \fBcurrentsize\fR +element of the state array, which represents the number of bytes +received from the URL in the \fB::http::geturl\fR call. .TP \fB::http::error\fR \fItoken\fR . This is a convenience procedure that returns the \fBerror\fR element of the state array. .TP -\fB::http::status\fR \fItoken\fR +\fB::http::cleanup\fR \fItoken\fR . -This is a convenience procedure that returns the \fBstatus\fR element of -the state array. +This procedure cleans up the state associated with the connection +identified by \fItoken\fR. After this call, the procedures +like \fB::http::data\fR cannot be used to get information +about the operation. It is \fIstrongly\fR recommended that you call +this function after you are done with a given HTTP request. Not doing +so will result in memory not being freed, and if your app calls +\fB::http::geturl\fR enough times, the memory leak could cause a +performance hit...or worse. +.TP +\fB::http::requestLine\fR \fItoken\fR +. +This command returns the "request line" sent to the server. +The "request line" is the first line of a HTTP client request, and has three +elements separated by spaces: the HTTP method, the URL relative to the server, +and the HTTP version. Examples: +.PP +.DS +.RS +GET / HTTP/1.1 +GET /introduction.html?subject=plumbing HTTP/1.1 +POST /forms/order.html HTTP/1.1 +.RE +.DE +.TP +\fB::http::requestHeaders\fR \fItoken\fR ?\fIheaderName\fR? +. +This command returns a list of the HTTP request header names and values, in the +order that they were sent to the server: a Tcl list of the form +?name value ...? Header names are case-insensitive and are converted to lower +case. The return value is not a \fBdict\fR because some header names may occur +more than once. If one argument is supplied, all request headers +are returned: the value is that of the \fBrequestHeaders\fR element +of the state array (described below). If two arguments are supplied, the +second provides the value of a header name. Only headers with the requested +name (converted to lower case) are returned. If no such headers are found, +an empty list is returned. +.TP +\fB::http::requestHeaderValue\fR \fItoken\fR \fIheaderName\fR +. +This command returns the value of the HTTP request header named +\fIheaderName\fR. Header names are case-insensitive and are converted to +lower case. If no such header exists, the return value is the empty string. +If there are multiple headers named \fIheaderName\fR, the result is obtained +by joining the individual values with the string ", " (comma and space), +preserving their order. .TP \fB::http::responseLine\fR \fItoken\fR . @@ -590,49 +631,6 @@ A registry of valid status codes is maintained at https://www.iana.org/assignments/http-status-codes/http-status-codes.xhtml .RE .TP -\fB::http::size\fR \fItoken\fR -. -This is a convenience procedure that returns the \fBcurrentsize\fR -element of the state array, which represents the number of bytes -received from the URL in the \fB::http::geturl\fR call. -.TP -\fB::http::requestLine\fR \fItoken\fR -. -This command returns the "request line" sent to the server. -The "request line" is the first line of a HTTP client request, and has three -elements separated by spaces: the HTTP method, the URL relative to the server, -and the HTTP version. Examples: -.PP -.DS -.RS -GET / HTTP/1.1 -GET /introduction.html?subject=plumbing HTTP/1.1 -POST /forms/order.html HTTP/1.1 -.RE -.DE -.TP -\fB::http::requestHeaders\fR \fItoken\fR ?\fIheaderName\fR? -. -This command returns a list of the HTTP request header names and values, in the -order that they were sent to the server: a Tcl list of the form -?name value ...? Header names are case-insensitive and are converted to lower -case. The return value is not a \fBdict\fR because some header names may occur -more than once. If one argument is supplied, all request headers -are returned: the value is that of the \fBrequestHeaders\fR element -of the state array (described below). If two arguments are supplied, the -second provides the value of a header name. Only headers with the requested -name (converted to lower case) are returned. If no such headers are found, -an empty list is returned. -.TP -\fB::http::requestHeaderValue\fR \fItoken\fR \fIheaderName\fR -. -This command returns the value of the HTTP request header named -\fIheaderName\fR. Header names are case-insensitive and are converted to -lower case. If no such header exists, the return value is the empty string. -If there are multiple headers named \fIheaderName\fR, the result is obtained -by joining the individual values with the string ", " (comma and space), -preserving their order. -.TP \fB::http::responseHeaders\fR \fItoken\fR ?\fIheaderName\fR? . This command returns a list of HTTP response header names and values, in the @@ -659,16 +657,18 @@ comma-separated-list syntax and cannot be combined into a single value. Each \fIset-cookie\fR header must be treated individually, e.g. by processing the return value of \fB::http::responseHeaders\fR \fItoken\fR \fIset-cookie\fR. .TP -\fB::http::cleanup\fR \fItoken\fR +\fB::http::responseBody\fR \fItoken\fR . -This procedure cleans up the state associated with the connection -identified by \fItoken\fR. After this call, the procedures -like \fB::http::data\fR cannot be used to get information -about the operation. It is \fIstrongly\fR recommended that you call -this function after you are done with a given HTTP request. Not doing -so will result in memory not being freed, and if your app calls -\fB::http::geturl\fR enough times, the memory leak could cause a -performance hit...or worse. +This command returns the entity sent by the HTTP server (unless +\fI-channel\fR was used, in which case the entity was delivered to the +channel, and the command returns the empty string). +.RS +.PP +Other terms for +"entity", with varying precision, include "representation of resource", +"resource", "response body after decoding", "payload", +"message body after decoding", "content", and "file". +.RE .TP \fB::http::register\fR \fIproto port command\fR . -- cgit v0.12 From 55d9cca97fb558444ff53d71b4aef4ba99ef0274 Mon Sep 17 00:00:00 2001 From: kjnash Date: Tue, 13 Sep 2022 12:54:26 +0000 Subject: In namespace ::http, add new commands postError, responseInfo. Rename (the unreleased public API) reason to reasonPhrase. Rename private commands make-transformation-chunked to MakeTransformationChunked, getTextLine to GetTextLine. Rename mapReply to quoteString (and reverse the aliasing). Update namespace exports. Conventional use of fully-qualified command names. Initialise some members of state array. --- doc/http.n | 137 ++++++++++++++++++++++++++++++++++++++++-- library/http/http.tcl | 160 ++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 260 insertions(+), 37 deletions(-) diff --git a/doc/http.n b/doc/http.n index dcd65ae..8a9c35b 100644 --- a/doc/http.n +++ b/doc/http.n @@ -34,6 +34,8 @@ http \- Client-side implementation of the HTTP/1.1 protocol .sp \fB::http::error \fItoken\fR .sp +\fB::http::postError \fItoken\fR +.sp \fB::http::cleanup \fItoken\fR .sp \fB::http::requestLine\fR \fItoken\fR @@ -46,12 +48,14 @@ http \- Client-side implementation of the HTTP/1.1 protocol .sp \fB::http::responseCode\fR \fItoken\fR .sp -\fB::http::reason \fIcode\fR +\fB::http::reasonPhrase\fR \fIcode\fR .sp \fB::http::responseHeaders\fR \fItoken\fR ?\fIheaderName\fR? .sp \fB::http::responseHeaderValue\fR \fItoken\fR \fIheaderName\fR .sp +\fB::http::responseInfo\fR \fItoken\fR +.sp \fB::http::responseBody\fR \fItoken\fR .sp \fB::http::register \fIproto port command\fR @@ -72,11 +76,15 @@ http \- Client-side implementation of the HTTP/1.1 protocol .SH "EXPORTED COMMANDS" .PP Namespace \fBhttp\fR exports the commands \fBconfig\fR, \fBformatQuery\fR, -\fBgeturl\fR, \fBquoteString\fR, \fBregister\fR, \fBregisterError\fR, +\fBgeturl\fR, \fBpostError\fR, \fBquoteString\fR, \fBreasonPhrase\fR, +\fBregister\fR, +\fBregisterError\fR, \fBrequestHeaders\fR, \fBrequestHeaderValue\fR, +\fBrequestLine\fR, \fBresponseBody\fR, \fBresponseCode\fR, +\fBresponseHeaders\fR, \fBresponseHeaderValue\fR, \fBresponseInfo\fR, \fBresponseLine\fR, \fBreset\fR, \fBunregister\fR, and \fBwait\fR. .PP It does not export the commands \fBcleanup\fR, \fBcode\fR, \fBdata\fR, -\fBerror\fR, \fBmeta\fR, \fBmetaValue\fR, \fBncode\fR, \fBreason\fR, +\fBerror\fR, \fBmeta\fR, \fBmetaValue\fR, \fBncode\fR, \fBsize\fR, or \fBstatus\fR. .BE .SH DESCRIPTION @@ -525,6 +533,10 @@ received from the URL in the \fB::http::geturl\fR call. This is a convenience procedure that returns the \fBerror\fR element of the state array. .TP +\fB::http::postError\fR \fItoken\fR +. +A POST request is a call to \fB::http::geturl\fR with either the \fB\-query\fR or \fB\-querychannel\fR option. The \fB::http::postError\fR command returns the error string generated when a HTTP POST request sends its request-body to the server; or the empty string if there was no error. When this type of error occurs, the \fB::http::geturl\fR command continues the transaction and attempts to receive a response from the server. +.TP \fB::http::cleanup\fR \fItoken\fR . This procedure cleans up the state associated with the connection @@ -600,7 +612,7 @@ The "reason phrase" is a textual description of the "status code": it may vary from server to server, and can be changed without affecting the HTTP protocol. The recommended values (RFC 7231 and IANA assignments) for each code are provided by the -command \fB::http::reason\fR. +command \fB::http::reasonPhrase\fR. .RE .TP \fB::http::responseCode\fR \fItoken\fR @@ -610,7 +622,7 @@ This command returns the "status code" (200, 404, etc.) of the server line is returned. See command \fB::http::code\fR for more information on the "status line". .TP -\fB::http::reason\fR \fIcode\fR +\fB::http::reasonPhrase\fR \fIcode\fR . This command returns the IANA recommended "reason phrase" for a particular "status code" returned by a HTTP server. The argument \fIcode\fR is a valid @@ -657,6 +669,14 @@ comma-separated-list syntax and cannot be combined into a single value. Each \fIset-cookie\fR header must be treated individually, e.g. by processing the return value of \fB::http::responseHeaders\fR \fItoken\fR \fIset-cookie\fR. .TP +\fB::http::responseInfo\fR \fItoken\fR +. +This command returns a \fBdict\fR of selected response metadata that are essential for identifying a successful transaction and making use of the response, along with other metadata that are informational. The keys of the \fBdict\fR are \fIstage\fR, \fIstatus\fR, \fIncode\fR, \fIreason\fR, \fItype\fR, \fIbinary\fR, \fIredirection\fR, \fIcharset\fR, \fIcoding\fR, \fIhttpRequest\fR, \fIhttpResponse\fR, \fIurl\fR, \fIconnRequest\fR, \fIconnResponse\fR, \fIconnection\fR, \fItransfer\fR, \fItotalsize\fR, and \fIcurrentsize\fR. The meaning of these keys is described in the chapter \fBMETADATA\fR below. +.RS +.PP +It is always worth checking the value of \fIbinary\fR after a HTTP transaction, to determine whether a misconfigured server has caused http to interpret a text resource as a binary, or vice versa. +.RE +.TP \fB::http::responseBody\fR \fItoken\fR . This command returns the entity sent by the HTTP server (unless @@ -807,6 +827,113 @@ transaction. If it can read the server's response it will end up with an \fBok\fR status, otherwise it will have an \fBeof\fR status. +.SH "METADATA" +When a HTTP server responds to a request, it supplies not only the entity requested, but also metadata. This is provided by the first line (the "status line") of the response, and by a number of HTTP headers. Further metadata relates to how \fB::http::geturl\fR has processed the response from the server. +.PP +The most important metadata can be accessed with the command +\fB::http::responseInfo\fR. +This command returns a \fBdict\fR of metadata that are essential for identifying a successful transaction and making use of the response, along with other metadata that are informational. The keys of the \fBdict\fR are: +.RS +.RS +\fB===== Essential Values =====\fR +.RE +.RE +.TP +\fBstage\fR +. +This value, set by \fB::http::geturl\fR, describes the stage that the transaction has reached. Values, in order of the transaction lifecycle, are: "created", "connecting", "header", "body", and "complete". Other \fBdict\fR keys are available when the value of stage is "body" or "complete". The key \fBconnection\fR has its final value only when \fBstage\fR is "complete". +.TP +\fBstatus\fR +. +This value, set by \fB::http::geturl\fR, is "ok" for a successful transaction; "eof", "error", "timeout", or "reset" for an unsuccessful transaction; or "" if the transaction is still in progress. In the last case the values for other dictionary keys may not be available. The meaning of these values is described in the chapter \fBERRORS\fR (above). +.TP +\fBncode\fR +. +The "HTTP status code" for the response. +.TP +\fBreason\fR +. +The "reason phrase" sent by the server. +.TP +\fBcontent-type\fR +. +The value of the \fBContent-Type\fR response header or, if the header was not supplied, the default value "application/octet-stream". +.TP +\fBbinary\fR +. +This boolean value, set by \fB::http::geturl\fR, describes how the command has interpreted the entity returned by the server (after decoding any compression specified +by the +.QW "Content-Encoding" +response header). This decoded entity is accessible as the return value of the command \fB::http::responseBody\fR. + +The value is \fBtrue\fR if http has interpreted the decoded entity as binary. The value returned by ::http::responseBody is a Tcl binary string. This is a suitable format for image data, zip files, etc. +\fB::http::geturl\fR chooses this value if the user has requested a binary interpretation by passing the option \fI\-binary\fR to the command, or if the server has supplied a binary content type in a Content-Type response header, or if the server has not supplied any Content-Type header. + +The value is \fBfalse\fR if http has interpreted the decoded entity as text. The text has been converted from the character set notified by the server into Tcl's internal Unicode format, and the value returned by ::http::responseBody is an ordinary Tcl string. + +It is always worth checking the value of "binary" after a HTTP transaction, to determine whether a misconfigured server has caused http to interpret a text resource as a binary, or vice versa. +.TP +\fBredirection\fR +. +The URL that is the redirection target. The value is that of the Location response header. This header is sent when a response has status code 3XX (redirection). +.RS +.RS +\fB===== Informational Values =====\fR +.RE +.RE +.TP +\fBcharset\fR +. +The value of the charset attribute of the \fBContent-Type\fR response header. The charset value is used only for a text resource. If the server did not specify a charset, the value defaults to that of the variable \fB::http::defaultCharset\fR, which unless it has been deliberately modified by the caller is \fBiso8859-1\fR. Incoming text data is automatically converted from the character set defined by \fBcharset\fR to Tcl's internal Unicode representation, i.e. to a Tcl string. +.TP +\fBcoding\fR +. +A copy of the \fBContent-Encoding\fR response-header value. +.TP +\fBhttpRequest\fR +. +The version of HTTP specified in the request (i.e. sent in the request line). +.TP +\fBhttpResponse\fR +. +The version of HTTP used by the server (obtained from the response "status line"). The server uses this version of HTTP in its response, but ensures that this response is compatible with the HTTP version specified in the client's request. +.TP +\fBurl\fR +. +The requested URL, typically the URL supplied as an argument to \fB::http::geturl\fR but without its "fragment" (the final part of the URL beginning with "#". +.TP +\fBconnRequest\fR +. +The value, if any, sent to the server in "Connection" request header(s). +.TP +\fBconnResponse\fR +. +The value, if any, received from the server in "Connection" response header(s). +.TP +\fBconnection\fR +. +This value, set by \fB::http::geturl\fR, reports whether the connection was closed after the transaction (value "close"), or left open (value "keep-alive"). +.TP +\fBtransfer\fR +. +The value of the Transfer-Encoding response header, if it is present. The value is either "chunked" (indicating HTTP/1.1 "chunked encoding") or the empty string. +.TP +\fBquerylength\fR +. +The total length of the request body in a POST request. +.TP +\fBqueryoffset\fR +. +The number of bytes of the POST request body sent to the server so far. +.TP +\fBtotalsize\fR +. +A copy of the \fBContent-Length\fR response-header value. +The number of bytes specified in a Content-Length header, if one was sent. If none was sent, the value is 0. A correctly configured server omits this header if the transfer-encoding is "chunked", or (for older servers) if the server closes the connection when it reaches the end of the resource. +.TP +\fBcurrentsize\fR +. +The number of bytes fetched from the server so far. .SH "STATE ARRAY" The \fB::http::geturl\fR procedure returns a \fItoken\fR that can be used to get to the state of the HTTP transaction in the form of a Tcl array. diff --git a/library/http/http.tcl b/library/http/http.tcl index 359666d..15fd031 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -217,13 +217,22 @@ namespace eval http { 511 {Network Authentication Required} }] - namespace export geturl config reset wait formatQuery quoteString + namespace export geturl config reset wait formatQuery postError quoteString namespace export register unregister registerError - # - Useful, but not exported: data, size, status, code, cleanup, error, - # meta, ncode, mapReply, init. Comments suggest that "init" can be used - # for re-initialisation, although the command is undocumented. - # - Not exported, probably should be upper-case initial letter as part - # of the internals: getTextLine, make-transformation-chunked. + namespace export requestLine requestHeaders requestHeaderValue + namespace export responseLine responseHeaders responseHeaderValue + namespace export responseCode responseBody responseInfo reasonPhrase + # - Legacy aliases, were never exported: + # data, code, mapReply, meta, ncode + # - Callable from outside (e.g. from TLS) by fully-qualified name, but + # not exported: + # socket + # - Useful, but never exported (and likely to have naming collisions): + # size, status, cleanup, error, init + # Comments suggest that "init" can be used for re-initialisation, + # although the command is undocumented. + # - Never exported, renamed from lower-case names: + # GetTextLine, MakeTransformationChunked. } # http::Log -- @@ -318,7 +327,7 @@ proc http::config {args} { } # ------------------------------------------------------------------------------ -# Proc http::reason +# Proc http::reasonPhrase # ------------------------------------------------------------------------------ # Command to return the IANA-recommended "reason phrase" for a HTTP Status Code. # Information obtained from: @@ -330,7 +339,7 @@ proc http::config {args} { # Return Value: the reason phrase # ------------------------------------------------------------------------------ -proc http::reason {code} { +proc http::reasonPhrase {code} { variable reasonDict if {![regexp -- {^[1-5][0-9][0-9]$} $code]} { set msg {argument must be a three-digit integer from 100 to 599} @@ -1006,10 +1015,14 @@ proc http::CreateToken {url args} { body {} status "" http "" + httpResponse {} + ncode {} + reason {} connection keep-alive tid {} requestHeaders {} requestLine {} + transfer {} } set state(-keepalive) $defaultKeepalive set state(-strict) $strict @@ -2441,7 +2454,7 @@ proc http::EventGateway {sock token} { # http::reset or http::cleanup, or if the caller set option -channel # but not option -handler: in the last case reading from the socket is # now managed by commands ::http::Copy*, http::ReceiveChunked, and - # http::make-transformation-chunked. + # http::MakeTransformationChunked. # # Catch in case the coroutine has closed the socket. catch {fileevent $sock readable [list http::EventGateway $sock $token]} @@ -3061,6 +3074,61 @@ proc http::Meta {token who args} { } } + +# ------------------------------------------------------------------------------ +# Proc http::responseInfo +# ------------------------------------------------------------------------------ +# Command to return a dictionary of the most useful metadata of a HTTP +# response. +# +# Arguments: +# token - connection token (name of an array) +# +# Return Value: a dict +# ------------------------------------------------------------------------------ + +proc http::responseInfo {token} { + variable $token + upvar 0 $token state + set result {} + foreach key { + stage + status + ncode + reason + type + binary + redirection + charset + coding + httpRequest + httpResponse + url + connRequest + connResponse + connection + transfer + querylength + queryoffset + totalsize + currentsize + } { + if {$key eq {stage}} { + dict set result $key $state(state) + } elseif {$key eq {redirection}} { + dict set result $key [responseHeaderValue $token Location] + } elseif {$key eq {httpRequest}} { + dict set result $key $state(-protocol) + } elseif {$key eq {connRequest}} { + dict set result $key [requestHeaderValue $token connection] + } elseif {$key eq {connResponse}} { + dict set result $key [responseHeaderValue $token connection] + } else { + dict set result $key $state($key) + } + } + return $result +} proc http::error {token} { variable $token upvar 0 $token state @@ -3069,6 +3137,14 @@ proc http::error {token} { } return } +proc http::postError {token} { + variable $token + upvar 0 $token state + if {[info exists state(posterror)]} { + return $state(posterror) + } + return +} # http::cleanup # @@ -3382,6 +3458,19 @@ proc http::Event {sock token} { } else { } + # We have $state(http) so let's split it into its components. + if {[regexp {^HTTP/(\S+) ([0-9]{3}) (.*)$} $state(http) \ + -> httpResponse ncode reason] + } { + set state(httpResponse) $httpResponse + set state(ncode) $ncode + set state(reason) $reason + } else { + set state(httpResponse) $state(http) + set state(ncode) $state(http) + set state(reason) $state(http) + } + if { ([info exists state(connection)]) && ([info exists socketMapping($state(socketinfo))]) && ("keep-alive" in $state(connection)) @@ -3514,7 +3603,7 @@ proc http::Event {sock token} { && ("close" in $state(connection)) ) ) - && (![info exists state(transfer)]) + && ($state(transfer) eq {}) && ($state(totalsize) == 0) } { set msg {body size is 0 and no events likely - complete} @@ -3585,6 +3674,13 @@ proc http::Event {sock token} { connection { # RFC 7230 Section 6.1 states that a comma-separated # list is an acceptable value. + if {![info exists state(connectionResponse)]} { + # This is the first "Connection" response header. + # Scrub the earlier value set by iniitialisation. + set state(connectionResponse) {} + set state(connection) {} + } + set state(connOrig[incr ::countConn]) [string trim $value] foreach el [SplitCommaSeparatedFieldValue $value] { lappend state(connection) [string tolower $el] } @@ -3652,7 +3748,7 @@ proc http::Event {sock token} { } } elseif {[info exists state(transfer_final)]} { # This code forgives EOF in place of the final CRLF. - set line [getTextLine $sock] + set line [GetTextLine $sock] set n [string length $line] set state(state) complete if {$n > 0} { @@ -3675,7 +3771,7 @@ proc http::Event {sock token} { } { ##Log chunked - token $token set size 0 - set hexLenChunk [getTextLine $sock] + set hexLenChunk [GetTextLine $sock] #set ntl [string length $hexLenChunk] if {[string trim $hexLenChunk] ne ""} { scan $hexLenChunk %x size @@ -3705,7 +3801,7 @@ proc http::Event {sock token} { } # CRLF that follows chunk. # If eof, this is handled at the end of this proc. - getTextLine $sock + GetTextLine $sock } else { set n 0 set state(transfer_final) {} @@ -3986,7 +4082,7 @@ proc http::ParseCookie {token value} { {*}$http(-cookiejar) storeCookie $realopts } -# http::getTextLine -- +# http::GetTextLine -- # # Get one line with the stream in crlf mode. # Used if Transfer-Encoding is chunked, to read the line that @@ -4000,7 +4096,7 @@ proc http::ParseCookie {token value} { # Results: # The line of text, without trailing newline -proc http::getTextLine {sock} { +proc http::GetTextLine {sock} { set tr [fconfigure $sock -translation] lassign $tr trRead trWrite fconfigure $sock -translation [list crlf $trWrite] @@ -4076,7 +4172,7 @@ proc http::CopyStart {sock token {initial 1}} { } lappend state(zlib) [zlib stream $coding2] } - make-transformation-chunked $sock [namespace code [list CopyChunk $token]] + MakeTransformationChunked $sock [namespace code [list CopyChunk $token]] } else { if {$initial} { foreach coding [ContentEncoding $token] { @@ -4376,7 +4472,7 @@ proc http::formatQuery {args} { set result "" set sep "" foreach i $args { - append result $sep [mapReply $i] + append result $sep [quoteString $i] if {$sep eq "="} { set sep & } else { @@ -4386,7 +4482,7 @@ proc http::formatQuery {args} { return $result } -# http::mapReply -- +# http::quoteString -- # # Do x-www-urlencoded character mapping # @@ -4396,7 +4492,7 @@ proc http::formatQuery {args} { # Results: # The encoded string -proc http::mapReply {string} { +proc http::quoteString {string} { variable http variable formMap @@ -4407,7 +4503,6 @@ proc http::mapReply {string} { set string [encoding convertto $http(-urlencoding) $string] return [string map $formMap $string] } -interp alias {} http::quoteString {} http::mapReply # http::ProxyRequired -- # Default proxy filter. @@ -4600,7 +4695,7 @@ proc http::GetFieldValue {headers fieldName} { return $r } -proc http::make-transformation-chunked {chan command} { +proc http::MakeTransformationChunked {chan command} { coroutine [namespace current]::dechunk$chan ::http::ReceiveChunked $chan $command chan event $chan readable [namespace current]::dechunk$chan return @@ -4608,6 +4703,7 @@ proc http::make-transformation-chunked {chan command} { interp alias {} http::data {} http::responseBody interp alias {} http::code {} http::responseLine +interp alias {} http::mapReply {} http::quoteString interp alias {} http::meta {} http::responseHeaders interp alias {} http::metaValue {} http::responseHeaderValue interp alias {} http::ncode {} http::responseCode @@ -4660,7 +4756,7 @@ proc http::socket {args} { set defcmd ::socket set sockargs $args set script " - [list proc ::SockInThread {caller defcmd sockargs} [info body http::SockInThread]] + [list proc ::SockInThread {caller defcmd sockargs} [info body ::http::SockInThread]] [list ::SockInThread [thread::id] $defcmd $sockargs] " @@ -4750,7 +4846,7 @@ proc http::SockInThread {caller defcmd sockargs} { # ------------------------------------------------------------------------------ -# Proc ::http::cwaiter::cwait +# Proc http::cwaiter::cwait # ------------------------------------------------------------------------------ # Command to substitute for vwait, without the ordering issues. # A command that uses cwait must be a coroutine that is launched by an event, @@ -4769,13 +4865,13 @@ proc http::SockInThread {caller defcmd sockargs} { # Return Value: none # ------------------------------------------------------------------------------ -namespace eval ::http::cwaiter { +namespace eval http::cwaiter { namespace export cwait variable log {} variable logOn 0 } -proc ::http::cwaiter::cwait { +proc http::cwaiter::cwait { varName {coroName {}} {timeout {}} {timeoutValue {}} } { set thisCoro [info coroutine] @@ -4806,7 +4902,7 @@ proc ::http::cwaiter::cwait { # ------------------------------------------------------------------------------ -# Proc ::http::cwaiter::CwaitHelper +# Proc http::cwaiter::CwaitHelper # ------------------------------------------------------------------------------ # Helper command called by the trace set by cwait. # - Ignores the arguments added by trace. @@ -4817,7 +4913,7 @@ proc ::http::cwaiter::cwait { # - Remove the trace immediately. We don't want multiple calls. # ------------------------------------------------------------------------------ -proc ::http::cwaiter::CwaitHelper {varName coroName toe args} { +proc http::cwaiter::CwaitHelper {varName coroName toe args} { CoLog "got $varName for $coroName" set cmd [list ::http::cwaiter::CwaitHelper $varName $coroName $toe] trace remove variable $varName write $cmd @@ -4829,12 +4925,12 @@ proc ::http::cwaiter::CwaitHelper {varName coroName toe args} { # ------------------------------------------------------------------------------ -# Proc ::http::cwaiter::LogInit +# Proc http::cwaiter::LogInit # ------------------------------------------------------------------------------ # Call this command to initiate debug logging and clear the log. # ------------------------------------------------------------------------------ -proc ::http::cwaiter::LogInit {} { +proc http::cwaiter::LogInit {} { variable log variable logOn set log {} @@ -4842,12 +4938,12 @@ proc ::http::cwaiter::LogInit {} { return } -proc ::http::cwaiter::LogRead {} { +proc http::cwaiter::LogRead {} { variable log return $log } -proc ::http::cwaiter::CoLog {msg} { +proc http::cwaiter::CoLog {msg} { variable log variable logOn if {$logOn} { @@ -4856,7 +4952,7 @@ proc ::http::cwaiter::CoLog {msg} { return } -namespace eval ::http { +namespace eval http { namespace import ::http::cwaiter::* } -- cgit v0.12 From aca6eed0f36ee531e3e3c8eeb6c6c966ad80057f Mon Sep 17 00:00:00 2001 From: kjnash Date: Thu, 15 Sep 2022 14:39:41 +0000 Subject: Minor bugfixes and refactoring of new code. Handle more errors in OpenSocket. Rename some keys of dict returned by ::http::responseInfo, and add more keys. Improve error reporting from ::http::OpenSocket and ::http::socket, especially when using threads. Adapt http(n) to emphasize using http::* commands rather than peeking at internal state array. --- doc/http.n | 774 ++++++++++++++++++++++++++++++++++---------------- library/http/http.tcl | 200 +++++++------ 2 files changed, 646 insertions(+), 328 deletions(-) diff --git a/doc/http.n b/doc/http.n index 8a9c35b..5ba4813 100644 --- a/doc/http.n +++ b/doc/http.n @@ -13,7 +13,7 @@ .SH NAME http \- Client-side implementation of the HTTP/1.1 protocol .SH SYNOPSIS -\fBpackage require http\fI ?\fB2.10\fR? +\fBpackage require http\fR ?\fB2.10\fR? .\" See Also -useragent option documentation in body! .sp \fB::http::config\fR ?\fI\-option value\fR ...? @@ -80,7 +80,8 @@ Namespace \fBhttp\fR exports the commands \fBconfig\fR, \fBformatQuery\fR, \fBregister\fR, \fBregisterError\fR, \fBrequestHeaders\fR, \fBrequestHeaderValue\fR, \fBrequestLine\fR, \fBresponseBody\fR, \fBresponseCode\fR, -\fBresponseHeaders\fR, \fBresponseHeaderValue\fR, \fBresponseInfo\fR, \fBresponseLine\fR, +\fBresponseHeaders\fR, \fBresponseHeaderValue\fR, \fBresponseInfo\fR, +\fBresponseLine\fR, \fBreset\fR, \fBunregister\fR, and \fBwait\fR. .PP It does not export the commands \fBcleanup\fR, \fBcode\fR, \fBdata\fR, @@ -90,7 +91,8 @@ It does not export the commands \fBcleanup\fR, \fBcode\fR, \fBdata\fR, .SH DESCRIPTION .PP The \fBhttp\fR package provides the client side of the HTTP/1.1 -protocol, as defined in RFC 7230 to RFC 7235, which supersede RFC 2616. +protocol, as defined in RFC 9110 to 9112, which supersede RFC 7230 +to RFC 7235, which in turn supersede RFC 2616. The package implements the GET, POST, and HEAD operations of HTTP/1.1. It allows configuration of a proxy host to get through firewalls. The package is compatible with the \fBSafesock\fR security @@ -103,14 +105,13 @@ The \fB::http::geturl\fR procedure does a HTTP transaction. Its \fIoptions \fR determine whether a GET, POST, or HEAD transaction is performed. The return value of \fB::http::geturl\fR is a token for the transaction. -The value is also the name of an array in the ::http namespace -that contains state information about the transaction. The elements -of this array are described in the \fBSTATE ARRAY\fR section. +The token can be supplied as an argument to other commands, to manage the +transaction and examine its results. .PP If the \fB\-command\fR option is specified, then the HTTP operation is done in the background. \fB::http::geturl\fR returns immediately after generating the -HTTP request and the callback is invoked +HTTP request and the \fB\-command\fR callback is invoked when the transaction completes. For this to work, the Tcl event loop must be active. In Tk applications this is always true. For pure-Tcl applications, the caller can use \fB::http::wait\fR after calling @@ -119,6 +120,15 @@ applications, the caller can use \fB::http::wait\fR after calling \fBNote:\fR The event queue is even used without the \fB\-command\fR option. As a side effect, arbitrary commands may be processed while \fBhttp::geturl\fR is running. +.PP +When the HTTP server has replied to the request, call the command +\fB::http::responseInfo\fR, which +returns a \fBdict\fR of metadata that is essential for identifying a +successful transaction and making use of the response. See +section \fBMETADATA\fR for details of the information returned. +The response itself is returned by command \fB::http::responseBody\fR, +unless it has been redirected to a file by the \fI\-channel\fR option +of \fB::http::geturl\fR. .SH COMMANDS .TP \fB::http::config\fR ?\fIoptions\fR? @@ -204,7 +214,8 @@ default is 0. .TP \fB\-threadlevel\fR \fIlevel\fR . -Specifies whether and how to use the \fBThread\fR package. Possible values of \fIlevel\fR are 0, 1 or 2. +Specifies whether and how to use the \fBThread\fR package. Possible values +of \fIlevel\fR are 0, 1 or 2. .RS .PP .DS @@ -212,7 +223,11 @@ Specifies whether and how to use the \fBThread\fR package. Possible values of \ 1 - use Thread if it is available, do not use it if it is unavailable 2 - use Thread if it is available, raise an error if it is unavailable .DE -The Tcl \fBsocket -async\fR command can block in adverse cases (e.g. a slow DNS lookup). Using the Thread package works around this problem, for both HTTP and HTTPS transactions. Values of \fIlevel\fR other than 0 are available only to the main interpreter in each thread. See section \fBTHREADS\fR for more information. +The Tcl \fBsocket -async\fR command can block in adverse cases (e.g. a slow +DNS lookup). Using the Thread package works around this problem, for both +HTTP and HTTPS transactions. Values of \fIlevel\fR other than 0 are +available only to the main interpreter in each thread. See +section \fBTHREADS\fR for more information. .RE .TP \fB\-urlencoding\fR \fIencoding\fR @@ -235,20 +250,21 @@ numbers of \fBhttp\fR and \fBTcl\fR. . If the value is boolean \fBtrue\fR, then by default requests will send a header .QW "\fBAccept-Encoding: gzip,deflate\fR" . -If the value is boolean \fBfalse\fR, then by default this header will not be -sent. In either case the default can be overridden for an individual request by +If the value is boolean \fBfalse\fR, then by default requests will send a header +.QW "\fBAccept-Encoding: identity\fR" . +In either case the default can be overridden for an individual request by supplying a custom \fBAccept-Encoding\fR header in the \fB\-headers\fR option -of \fBhttp::geturl\fR. The default is 1. +of \fBhttp::geturl\fR. The default value is 1. .RE .TP \fB::http::geturl\fR \fIurl\fR ?\fIoptions\fR? . The \fB::http::geturl\fR command is the main procedure in the package. -The \fB\-query\fR option causes a POST operation and +The \fB\-query\fR or \fB\-querychannel\fR option causes a POST operation and the \fB\-validate\fR option causes a HEAD operation; otherwise, a GET operation is performed. The \fB::http::geturl\fR command -returns a \fItoken\fR value that can be used to get -information about the transaction. See the \fBSTATE ARRAY\fR and +returns a \fItoken\fR value that can be passed as an argument to other commands +to get information about the transaction. See the \fBMETADATA\fR and \fBERRORS\fR section for details. The \fB::http::geturl\fR command blocks until the operation completes, unless the \fB\-command\fR option specifies a callback @@ -272,13 +288,14 @@ At most \fIsize\fR bytes are read at once. After each block, a call to the \fB\-channel\fR \fIname\fR . Copy the URL contents to channel \fIname\fR instead of saving it in -\fBstate(body)\fR. +a Tcl variable for retrieval by \fB::http::responseBody\fR. .TP \fB\-command\fR \fIcallback\fR . -Invoke \fIcallback\fR after the HTTP transaction completes. -This option causes \fB::http::geturl\fR to return immediately. -The \fIcallback\fR gets an additional argument that is the \fItoken\fR returned +The presence of this option causes \fB::http::geturl\fR to return immediately. +After the HTTP transaction completes, the value of \fIcallback\fR is expanded, +an additional argument is added, and the resulting command is evaluated. +The additional argument is the \fItoken\fR returned from \fB::http::geturl\fR. This token is the name of an array that is described in the \fBSTATE ARRAY\fR section. Here is a template for the callback: @@ -286,8 +303,10 @@ callback: .PP .CS proc httpCallback {token} { - upvar #0 $token state - # Access state as a Tcl array + upvar 0 $token state + # Access state as a Tcl array defined in this proc + ... + return } .CE .PP @@ -299,22 +318,28 @@ details. .TP \fB\-guesstype\fR \fIboolean\fR . -Attempt to guess the Content-Type and character set when a misconfigured +Attempt to guess the \fBContent-Type\fR and character set when a misconfigured server provides no information. The default value is \fIfalse\fR (do nothing). If boolean \fItrue\fR then, if the server does not send a -"Content-Type" header, or if it sends the value "application/octet-stream", +\fBContent-Type\fR header, or if it sends the value "application/octet-stream", \fBhttp::geturl\fR will attempt to guess appropriate values. This is not intended to become a general-purpose tool, and currently it is limited to detecting XML documents that begin with an XML declaration. In this case -the Content-Type is changed to "application/xml", and the character set to +the \fBContent-Type\fR is changed to "application/xml", the binary flag +state(binary) is changed to 0, and the character set is changed to the one specified by the "encoding" tag of the XML line, or to utf-8 if no -encoding is specified. +encoding is specified. Not used if a \fI\-channel\fR is specified. .TP \fB\-handler\fR \fIcallback\fR . -Invoke \fIcallback\fR whenever HTTP data is available; if present, nothing -else will be done with the HTTP data. This procedure gets two additional -arguments: the socket for the HTTP data and the \fItoken\fR returned from +If this option is absent, \fBhttp::geturl\fR processes incoming data itself, +either appending it to the state(body) variable or writing it to the -channel. +But if the \fB\-handler\fR option is present, \fBhttp::geturl\fR does not do +this processing and instead calls \fIcallback\fR. +Whenever HTTP data is available, the value of \fIcallback\fR is expanded, an +additional two arguments are added, and the resulting command is evaluated. +The two additional +arguments are: the socket for the HTTP data and the \fItoken\fR returned from \fB::http::geturl\fR. The token is the name of a global array that is described in the \fBSTATE ARRAY\fR section. The procedure is expected to return the number of bytes read from the socket. Here is a @@ -323,8 +348,8 @@ template for the callback: .PP .CS proc httpHandlerCallback {socket token} { - upvar #0 $token state - # Access socket, and state as a Tcl array + upvar 0 $token state + # Access socket, and state as a Tcl array defined in this proc # For example... ... set data [read $socket 1000] @@ -337,8 +362,9 @@ proc httpHandlerCallback {socket token} { The \fBhttp::geturl\fR code for the \fB\-handler\fR option is not compatible with either compression or chunked transfer-encoding. If \fB\-handler\fR is specified, then to work around these issues \fBhttp::geturl\fR will reduce the -HTTP protocol to 1.0, and override the \fB\-zip\fR option (i.e. it will not -send the header "\fBAccept-Encoding: gzip,deflate\fR"). +HTTP protocol to 1.0, and override the \fB\-zip\fR option (i.e. it will +send the header \fBAccept-Encoding: identity\fR instead +of \fBAccept-Encoding: gzip,deflate\fR). .PP If options \fB\-handler\fR and \fB\-channel\fR are used together, the handler is responsible for copying the data from the HTTP socket to the specified @@ -384,7 +410,10 @@ It is the caller's responsibility to ensure that the headers and request body (if any) conform to the requirements of the request method. For example, if using \fB\-method\fR \fIPOST\fR to send a POST with an empty request body, the caller must also supply the option -.QW "\-headers {Content-Length 0}" . +.PP +.CS +\-headers {Content-Length 0} +.CE .RE .TP \fB\-myaddr\fR \fIaddress\fR @@ -394,18 +423,26 @@ multiple interfaces are available. .TP \fB\-progress\fR \fIcallback\fR . -The \fIcallback\fR is made after each transfer of data from the URL. -The callback gets three additional arguments: the \fItoken\fR from +If the \fB\-progress\fR option is present, +then the \fIcallback\fR is made after each transfer of data from the URL. +The value of \fIcallback\fR is expanded, an additional three arguments are +added, and the resulting command is evaluated. +The three additional arguments are: the \fItoken\fR returned from \fB::http::geturl\fR, the expected total size of the contents from the -\fBContent-Length\fR metadata, and the current number of bytes -transferred so far. The expected total size may be unknown, in which +\fBContent-Length\fR response header, and the current number of bytes +transferred so far. The token is the name of a global array that is +described in the \fBSTATE ARRAY\fR section. The expected total size may +be unknown, in which case zero is passed to the callback. Here is a template for the progress callback: .RS .PP .CS proc httpProgress {token total current} { - upvar #0 $token state + upvar 0 $token state + # Access state as a Tcl array defined in this proc + ... + return } .CE .RE @@ -449,20 +486,24 @@ This flag causes \fB::http::geturl\fR to do a POST request that passes the data contained in \fIchannelID\fR to the server. The data contained in \fIchannelID\fR must be an x-url-encoding formatted query unless the \fB\-type\fR option below is used. -If a Content-Length header is not specified via the \fB\-headers\fR options, -\fB::http::geturl\fR attempts to determine the size of the post data +If a \fBContent-Length\fR header is not specified via the \fB\-headers\fR +options, \fB::http::geturl\fR attempts to determine the size of the post data in order to create that header. If it is unable to determine the size, it returns an error. .TP \fB\-queryprogress\fR \fIcallback\fR . -The \fIcallback\fR is made after each transfer of data to the URL -(i.e. POST) and acts exactly like the \fB\-progress\fR option (the -callback format is the same). +If the \fB\-queryprogress\fR option is present, +then the \fIcallback\fR is made after each transfer of data to the URL +in a POST request (i.e. a call to \fB::http::geturl\fR with +option \fB\-query\fR or \fB\-querychannel\fR) and acts exactly like +the \fB\-progress\fR option (the callback format is the same). .TP \fB\-strict\fR \fIboolean\fR . -Whether to enforce RFC 3986 URL validation on the request. Default is 1. +If true then the command will test that the URL complies with RFC 3986, i.e. +that it has no characters that should be "x-url-encoded" (e.g. a space should +be encoded to "%20"). Default value is 1. .TP \fB\-timeout\fR \fImilliseconds\fR . @@ -470,7 +511,8 @@ If \fImilliseconds\fR is non-zero, then \fB::http::geturl\fR sets up a timeout to occur after the specified number of milliseconds. A timeout results in a call to \fB::http::reset\fR and to the \fB\-command\fR callback, if specified. -The return value of \fB::http::status\fR is \fBtimeout\fR +The return value of \fB::http::status\fR (and the value of the \fIstatus\fR key +in the dictionary returned by \fB::http::responseInfo\fR) is \fBtimeout\fR after a timeout has occurred. .TP \fB\-type\fR \fImime-type\fR @@ -482,10 +524,11 @@ POST operation. \fB\-validate\fR \fIboolean\fR . If \fIboolean\fR is non-zero, then \fB::http::geturl\fR does an HTTP HEAD -request. This request returns meta information about the URL, but the -contents are not returned. The meta information is available in the -\fBstate(meta) \fR variable after the transaction. See the -\fBSTATE ARRAY\fR section for details. +request. This server returns the same status line and response headers as it +would for a HTTP GET request, but omits the response entity +(the URL "contents"). The response headers are available after the +transaction using command \fB::http::responseHeaders\fR or, for selected +information, \fB::http::responseInfo\fR. .RE .TP \fB::http::formatQuery\fR \fIkey value\fR ?\fIkey value\fR ...? @@ -509,7 +552,7 @@ This sets the \fBstate(status)\fR value to \fIwhy\fR, which defaults to .TP \fB::http::wait\fR \fItoken\fR . -This is a convenience procedure that blocks and waits for the +This command blocks and waits for the transaction to complete. This only works in trusted code because it uses \fBvwait\fR. Also, it is not useful for the case where \fB::http::geturl\fR is called \fIwithout\fR the \fB\-command\fR option @@ -519,29 +562,44 @@ wait for. .TP \fB::http::status\fR \fItoken\fR . -This is a convenience procedure that returns the \fBstatus\fR element of -the state array. +This command returns a description of the status of the HTTP transaction. +The return value is the empty string until the HTTP transaction is +completed; after completion it has one of the values ok, eof, error, +timeout, and reset. The meaning of these values is described in the +section \fBERRORS\fR (below). +.PP +.RS +The name "status" is not related to the terms "status line" and +"status code" that are defined for a HTTP response. +.RE .TP \fB::http::size\fR \fItoken\fR . -This is a convenience procedure that returns the \fBcurrentsize\fR -element of the state array, which represents the number of bytes -received from the URL in the \fB::http::geturl\fR call. +This command returns the number of bytes +received so far from the URL in the \fB::http::geturl\fR call. .TP \fB::http::error\fR \fItoken\fR . -This is a convenience procedure that returns the \fBerror\fR element -of the state array. +This command returns the error information if the HTTP transaction failed, +or the empty string if there was no error. The information is a Tcl list of +the error message, stack trace, and error code. .TP \fB::http::postError\fR \fItoken\fR . -A POST request is a call to \fB::http::geturl\fR with either the \fB\-query\fR or \fB\-querychannel\fR option. The \fB::http::postError\fR command returns the error string generated when a HTTP POST request sends its request-body to the server; or the empty string if there was no error. When this type of error occurs, the \fB::http::geturl\fR command continues the transaction and attempts to receive a response from the server. +A POST request is a call to \fB::http::geturl\fR with either +the \fB\-query\fR or \fB\-querychannel\fR option. +The \fB::http::postError\fR command returns the error information generated +when a HTTP POST request sends its request-body to the server; or the empty +string if there was no error. The information is a Tcl list of the error +message, stack trace, and error code. When this type of error occurs, +the \fB::http::geturl\fR command continues the transaction and attempts to +receive a response from the server. .TP \fB::http::cleanup\fR \fItoken\fR . This procedure cleans up the state associated with the connection identified by \fItoken\fR. After this call, the procedures -like \fB::http::data\fR cannot be used to get information +like \fB::http::responseBody\fR cannot be used to get information about the operation. It is \fIstrongly\fR recommended that you call this function after you are done with a given HTTP request. Not doing so will result in memory not being freed, and if your app calls @@ -565,13 +623,12 @@ POST /forms/order.html HTTP/1.1 .TP \fB::http::requestHeaders\fR \fItoken\fR ?\fIheaderName\fR? . -This command returns a list of the HTTP request header names and values, in the -order that they were sent to the server: a Tcl list of the form +This command returns the HTTP request header names and values, in the +order that they were sent to the server, as a Tcl list of the form ?name value ...? Header names are case-insensitive and are converted to lower case. The return value is not a \fBdict\fR because some header names may occur more than once. If one argument is supplied, all request headers -are returned: the value is that of the \fBrequestHeaders\fR element -of the state array (described below). If two arguments are supplied, the +are returned. If two arguments are supplied, the second provides the value of a header name. Only headers with the requested name (converted to lower case) are returned. If no such headers are found, an empty list is returned. @@ -587,9 +644,8 @@ preserving their order. .TP \fB::http::responseLine\fR \fItoken\fR . -This command returns the "status line" of the server response (which is stored -as element \fBhttp\fR of the state array). -The "status line" is the first line of a HTTP server response, and has three +This command returns the first line of the server response: the +HTTP "status line". The "status line" has three elements separated by spaces: the HTTP version, a three-digit numerical "status code", and a "reason phrase". Only the reason phrase may contain spaces. Examples: @@ -619,7 +675,7 @@ command \fB::http::reasonPhrase\fR. . This command returns the "status code" (200, 404, etc.) of the server "status line". If a three-digit code cannot be found, the full status -line is returned. See command \fB::http::code\fR for more information +line is returned. See command \fB::http::responseLine\fR for more information on the "status line". .TP \fB::http::reasonPhrase\fR \fIcode\fR @@ -636,8 +692,9 @@ HEAD, GET, or POST request method. The "reason phrase" returned by a HTTP server may differ from the recommended value, without affecting the HTTP protocol. The value returned by \fB::http::geturl\fR can be obtained by calling either command -\fB::http::code\fR (which returns the full status line) or command -\fB::http::ncode\fR (for the status code only). +\fB::http::responseLine\fR (which returns the full status line) or command +\fB::http::responseInfo\fR (which returns a dictionary, with +the "reason phrase" stored in key \fIreasonPhrase\fR). .PP A registry of valid status codes is maintained at https://www.iana.org/assignments/http-status-codes/http-status-codes.xhtml @@ -645,16 +702,18 @@ https://www.iana.org/assignments/http-status-codes/http-status-codes.xhtml .TP \fB::http::responseHeaders\fR \fItoken\fR ?\fIheaderName\fR? . -This command returns a list of HTTP response header names and values, in the -order that they were received from the server: a Tcl list of the form +The response from a HTTP server includes metadata headers that describe the +response body and the transaction itself. +This command returns the HTTP response header names and values, in the +order that they were received from the server, as a Tcl list of the form ?name value ...? Header names are case-insensitive and are converted to lower case. The return value is not a \fBdict\fR because some header names may occur -more than once, notably \fIset-cookie\fR. If one argument is supplied, all -response headers are returned: the value is that of the \fBmeta\fR element -of the state array (described below). If two arguments are supplied, the -second provides the value of a header name. Only headers with the requested -name (converted to lower case) are returned. If no such headers are found, -an empty list is returned. +more than once, notably \fBSet-Cookie\fR. If the second argument is not +supplied, all response headers are returned. If the second argument is +supplied, it provides the value of a header name. Only headers with the +requested name (converted to lower case) are returned. If no such headers +are found, an empty list is returned. See section \fBMETADATA\fR for more +information. .TP \fB::http::responseHeaderValue\fR \fItoken\fR \fIheaderName\fR . @@ -664,17 +723,32 @@ lower case. If no such header exists, the return value is the empty string. If there are multiple headers named \fIheaderName\fR, the result is obtained by joining the individual values with the string ", " (comma and space), preserving their order. Multiple headers with the same name may be processed -in this manner, except \fIset-cookie\fR which does not conform to the +in this manner, except \fBSet-Cookie\fR which does not conform to the comma-separated-list syntax and cannot be combined into a single value. -Each \fIset-cookie\fR header must be treated individually, e.g. by processing -the return value of \fB::http::responseHeaders\fR \fItoken\fR \fIset-cookie\fR. +Each \fBSet-Cookie\fR header must be treated individually, e.g. by processing +the return value of \fB::http::responseHeaders\fR \fItoken\fR \fBSet-Cookie\fR. .TP \fB::http::responseInfo\fR \fItoken\fR . -This command returns a \fBdict\fR of selected response metadata that are essential for identifying a successful transaction and making use of the response, along with other metadata that are informational. The keys of the \fBdict\fR are \fIstage\fR, \fIstatus\fR, \fIncode\fR, \fIreason\fR, \fItype\fR, \fIbinary\fR, \fIredirection\fR, \fIcharset\fR, \fIcoding\fR, \fIhttpRequest\fR, \fIhttpResponse\fR, \fIurl\fR, \fIconnRequest\fR, \fIconnResponse\fR, \fIconnection\fR, \fItransfer\fR, \fItotalsize\fR, and \fIcurrentsize\fR. The meaning of these keys is described in the chapter \fBMETADATA\fR below. +This command returns a \fBdict\fR of selected response metadata that are +essential for identifying a successful transaction and making use of the +response, along with other metadata that are informational. The keys of +the \fBdict\fR are \fIstage\fR, \fIstatus\fR, \fIresponseCode\fR, +\fIreasonPhrase\fR, \fIcontentType\fR, \fIbinary\fR, \fIredirection\fR, +\fIupgrade\fR, \fIerror\fR, \fIpostError\fR, \fImethod\fR, \fIcharset\fR, +\fIcompression\fR, \fIhttpRequest\fR, \fIhttpResponse\fR, \fIurl\fR, +\fIconnectionRequest\fR, \fIconnectionResponse\fR, \fIconnectionActual\fR, +\fItransferEncoding\fR, \fItotalPost\fR, \fIcurrentPost\fR, \fItotalSize\fR, +and \fIcurrentSize\fR. The meaning of these keys is described in the +section \fBMETADATA\fR below. .RS .PP -It is always worth checking the value of \fIbinary\fR after a HTTP transaction, to determine whether a misconfigured server has caused http to interpret a text resource as a binary, or vice versa. +It is always worth checking the value of \fIbinary\fR after a HTTP transaction, +to determine whether a misconfigured server has caused http to interpret a +text resource as a binary, or vice versa. +.PP +After a POST transaction, check the value of \fIpostError\fR to verify that +the request body was uploaded without error. .RE .TP \fB::http::responseBody\fR \fItoken\fR @@ -687,7 +761,7 @@ channel, and the command returns the empty string). Other terms for "entity", with varying precision, include "representation of resource", "resource", "response body after decoding", "payload", -"message body after decoding", "content", and "file". +"message body after decoding", "content(s)", and "file". .RE .TP \fB::http::register\fR \fIproto port command\fR @@ -738,25 +812,21 @@ An alternative name for the command \fB::http::responseBody\fR. . An alternative name for the command \fB::http::responseHeaders\fR .TP -\fB::http::metaValue\fR \fItoken\fR \fIheaderName\fR -. -An alternative name for the command \fB::http::responseHeaderValue\fR -.TP \fB::http::ncode\fR \fItoken\fR . An alternative name for the command \fB::http::responseCode\fR .SH ERRORS The \fB::http::geturl\fR procedure will raise errors in the following cases: invalid command line options, -an invalid URL, -a URL on a non-existent host, -or a URL at a bad port on an existing host. +or an invalid URL. These errors mean that it cannot even start the network transaction. -It will also raise an error if it gets an I/O error while -writing out the HTTP request header. For synchronous \fB::http::geturl\fR calls (where \fB\-command\fR is -not specified), it will raise an error if it gets an I/O error while +not specified), it will raise an error if +the URL is on a non-existent host +or at a bad port on an existing host. +It will also raise an error for any I/O errors while +writing out the HTTP request line and headers, or reading the HTTP reply headers or data. Because \fB::http::geturl\fR does not return a token in these cases, it does all the required cleanup and there is no issue of your app having to call @@ -768,13 +838,12 @@ HTTP reply headers or data, no exception is thrown. This is because after writing the HTTP headers, \fB::http::geturl\fR returns, and the rest of the HTTP transaction occurs in the background. The command callback can check if any error occurred during the read by calling -\fB::http::status\fR to check the status and if its \fIerror\fR, -calling \fB::http::error\fR to get the error message. +\fB::http::responseInfo\fR to check the transaction status. .PP Alternatively, if the main program flow reaches a point where it needs to know the result of the asynchronous HTTP request, it can call \fB::http::wait\fR and then check status and error, just as the -callback does. +synchronous call does. .PP The \fB::http::geturl\fR command runs the \fB\-command\fR, \fB\-handler\fR, and \fB\-proxyfilter\fR callbacks inside a \fBcatch\fR command. Therefore @@ -788,15 +857,17 @@ In any case, you must still call \fB::http::cleanup\fR to delete the state array when you are done. .PP There are other possible results of the HTTP transaction -determined by examining the status from \fB::http::status\fR. +determined by examining the status from \fB::http::status\fR (or the value +of the \fIstatus\fR key in the dictionary returned +by \fB::http::responseInfo\fR). These are described below. .TP \fBok\fR . If the HTTP transaction completes entirely, then status will be \fBok\fR. -However, you should still check the \fB::http::code\fR value to get -the HTTP status. The \fB::http::ncode\fR procedure provides just -the numeric error (e.g., 200, 404 or 500) while the \fB::http::code\fR +However, you should still check the \fB::http::responseLine\fR value to get +the HTTP status. The \fB::http::responseCode\fR procedure provides just +the numeric error (e.g., 200, 404 or 500) while the \fB::http::responseLine\fR procedure returns a value like .QW "HTTP 404 File not found" . .TP @@ -807,32 +878,47 @@ is raised, but the status of the transaction will be \fBeof\fR. .TP \fBerror\fR . -The error message will also be stored in the \fBerror\fR status -array element, accessible via \fB::http::error\fR. +The error message, stack trace, and error code are accessible +via \fB::http::error\fR. The error message is also provided by the value of +the \fIerror\fR key in the dictionary returned by \fB::http::responseInfo\fR. .TP \fBtimeout\fR . -A timeout occurred before the transaction could complete +A timeout occurred before the transaction could complete. .TP \fBreset\fR . -user-reset -.PP -Another error possibility is that \fB::http::geturl\fR is unable to -write all the post query data to the server before the server -responds and closes the socket. -The error message is saved in the \fBposterror\fR status array -element and then \fB::http::geturl\fR attempts to complete the -transaction. -If it can read the server's response -it will end up with an \fBok\fR status, otherwise it will have -an \fBeof\fR status. +The user has called \fB::http::reset\fR. +.TP +\fB""\fR +. +(empty string) The transaction has not yet finished. +.PP +Another error possibility is that \fB::http::geturl\fR failed to +write the whole of the POST request body (\fB-query\fR or \fB-querychannel\fR +data) to the server. \fB::http::geturl\fR stores the error message for later +retrieval by the \fB::http::postError\fR or \fB::http::responseInfo\fR +commands, and then attempts to complete the transaction. +If it can read the server's response the status will be \fBok\fR, but it is +important to call \fB::http::postError\fR or \fB::http::responseInfo\fR after +every POST to check that the data was sent in full. +If the server has closed the connection the status will be \fBeof\fR. .SH "METADATA" -When a HTTP server responds to a request, it supplies not only the entity requested, but also metadata. This is provided by the first line (the "status line") of the response, and by a number of HTTP headers. Further metadata relates to how \fB::http::geturl\fR has processed the response from the server. +.PP +.SS "MOST USEFUL METADATA" +When a HTTP server responds to a request, it supplies not only the entity +requested, but also metadata. This is provided by the first line (the +"status line") of the response, and by a number of HTTP headers. Further +metadata relates to how \fB::http::geturl\fR has processed the response +from the server. .PP The most important metadata can be accessed with the command \fB::http::responseInfo\fR. -This command returns a \fBdict\fR of metadata that are essential for identifying a successful transaction and making use of the response, along with other metadata that are informational. The keys of the \fBdict\fR are: +This command returns a \fBdict\fR of metadata that are essential for +identifying a successful transaction and making use of the response, +along with other metadata that are informational. The keys of +the \fBdict\fR are: +.PP .RS .RS \fB===== Essential Values =====\fR @@ -841,220 +927,398 @@ This command returns a \fBdict\fR of metadata that are essential for identifying .TP \fBstage\fR . -This value, set by \fB::http::geturl\fR, describes the stage that the transaction has reached. Values, in order of the transaction lifecycle, are: "created", "connecting", "header", "body", and "complete". Other \fBdict\fR keys are available when the value of stage is "body" or "complete". The key \fBconnection\fR has its final value only when \fBstage\fR is "complete". +This value, set by \fB::http::geturl\fR, describes the stage that the +transaction has reached. Values, in order of the transaction lifecycle, +are: "created", "connecting", "header", "body", and "complete". The +other \fBdict\fR keys will not be available until the value of \fBstage\fR +is "body" or "complete". The key \fBcurrentSize\fR has its final value only +when \fBstage\fR is "complete". .TP \fBstatus\fR . -This value, set by \fB::http::geturl\fR, is "ok" for a successful transaction; "eof", "error", "timeout", or "reset" for an unsuccessful transaction; or "" if the transaction is still in progress. In the last case the values for other dictionary keys may not be available. The meaning of these values is described in the chapter \fBERRORS\fR (above). +This value, set by \fB::http::geturl\fR, is "ok" for a successful transaction; +"eof", "error", "timeout", or "reset" for an unsuccessful transaction; or "" +if the transaction is still in progress. The value is the same as that +returned by command \fB::http::status\fR. The meaning of these values is +described in the section \fBERRORS\fR (above). .TP -\fBncode\fR +\fBresponseCode\fR . -The "HTTP status code" for the response. +The "HTTP status code" sent by the server in the first line (the "status line") +of the response. If the value cannot be extracted from the status line, the +full status line is returned. .TP -\fBreason\fR +\fBreasonPhrase\fR . -The "reason phrase" sent by the server. +The "reason phrase" sent by the server as a description of the HTTP status code. +If the value cannot be extracted from the status line, the full status +line is returned. .TP -\fBcontent-type\fR +\fBcontentType\fR . -The value of the \fBContent-Type\fR response header or, if the header was not supplied, the default value "application/octet-stream". +The value of the \fBContent-Type\fR response header or, if the header was not +supplied, the default value "application/octet-stream". .TP \fBbinary\fR . -This boolean value, set by \fB::http::geturl\fR, describes how the command has interpreted the entity returned by the server (after decoding any compression specified -by the -.QW "Content-Encoding" -response header). This decoded entity is accessible as the return value of the command \fB::http::responseBody\fR. - -The value is \fBtrue\fR if http has interpreted the decoded entity as binary. The value returned by ::http::responseBody is a Tcl binary string. This is a suitable format for image data, zip files, etc. -\fB::http::geturl\fR chooses this value if the user has requested a binary interpretation by passing the option \fI\-binary\fR to the command, or if the server has supplied a binary content type in a Content-Type response header, or if the server has not supplied any Content-Type header. - -The value is \fBfalse\fR if http has interpreted the decoded entity as text. The text has been converted from the character set notified by the server into Tcl's internal Unicode format, and the value returned by ::http::responseBody is an ordinary Tcl string. - -It is always worth checking the value of "binary" after a HTTP transaction, to determine whether a misconfigured server has caused http to interpret a text resource as a binary, or vice versa. +This boolean value, set by \fB::http::geturl\fR, describes how the command +has interpreted the entity returned by the server (after decoding any +compression specified by the \fBContent-Encoding\fR response header). +This decoded entity is accessible as the return value of the +command \fB::http::responseBody\fR. +.PP +.RS +The value is \fBtrue\fR if http has interpreted the decoded entity as binary. +The value returned by \fB::http::responseBody\fR is a Tcl binary string. +This is a suitable format for image data, zip files, etc. +\fB::http::geturl\fR chooses this value if the user has requested a binary +interpretation by passing the option \fI\-binary\fR to the command, or if the +server has supplied a binary content type in a \fBContent-Type\fR response +header, or if the server has not supplied any \fBContent-Type\fR header. +.PP +The value is \fBfalse\fR in other cases, and this means that http has +interpreted the decoded entity as text. The text has been converted, from the +character set notified by the server, into Tcl's internal Unicode format; +the value returned by \fB::http::responseBody\fR is an ordinary Tcl string. +.PP +It is always worth checking the value of "binary" after a HTTP transaction, +to determine whether a misconfigured server has caused http to interpret a +text resource as a binary, or vice versa. +.RE .TP \fBredirection\fR . -The URL that is the redirection target. The value is that of the Location response header. This header is sent when a response has status code 3XX (redirection). +The URL that is the redirection target. The value is that of the \fBLocation\fR +response header. This header is sent when a response has status code +3XX (redirection). +.TP +\fBupgrade\fR +. +If not empty, the value indicates the protocol(s) to which the server will +switch after completion of this transaction, while continuing to use the +same connection. When the server intends to switch protocols, it will also +send the value "101" as the status code (the \fBresponseCode\fR key), and the +word "upgrade" as an element of the \fBConnection\fR response header (the +\fBconnectionResponse\fR key), and it will not send a response body. +See the section \fBPROTOCOL UPGRADES\fR for more information. +.TP +\fBerror\fR +. +The error message, if there is one. Further information, including a stack +trace and error code, are available from command \fB::http::error\fR. +.TP +\fBpostError\fR +. +The error message (if any) generated when a HTTP POST request sends its +request-body to the server. Further information, including a stack trace +and error code, are available from command \fB::http::postError\fR. A POST +transaction may appear complete, according to the +keys \fBstage\fR, \fBstatus\fR, and \fBresponseCode\fR, but it is important +to check this \fBpostError\fR key in case an error occurred when uploading +the request-body. +.PP .RS .RS \fB===== Informational Values =====\fR .RE .RE .TP +\fBmethod\fR +. +The HTTP method used in the request. +.TP \fBcharset\fR . -The value of the charset attribute of the \fBContent-Type\fR response header. The charset value is used only for a text resource. If the server did not specify a charset, the value defaults to that of the variable \fB::http::defaultCharset\fR, which unless it has been deliberately modified by the caller is \fBiso8859-1\fR. Incoming text data is automatically converted from the character set defined by \fBcharset\fR to Tcl's internal Unicode representation, i.e. to a Tcl string. +The value of the charset attribute of the \fBContent-Type\fR response header. +The charset value is used only for a text resource. If the server did not +specify a charset, the value defaults to that of the +variable \fB::http::defaultCharset\fR, which unless it has been deliberately +modified by the caller is \fBiso8859-1\fR. Incoming text data is automatically +converted from the character set defined by \fBcharset\fR to Tcl's internal +Unicode representation, i.e. to a Tcl string. .TP -\fBcoding\fR +\fBcompression\fR . A copy of the \fBContent-Encoding\fR response-header value. .TP \fBhttpRequest\fR . The version of HTTP specified in the request (i.e. sent in the request line). +The value is that of the option \fB\-protocol\fR supplied +to \fB::http::geturl\fR (default value "1.1"), unless the command reduced the +value to "1.0" because it was passed the \fB\-handler\fR option. .TP \fBhttpResponse\fR . -The version of HTTP used by the server (obtained from the response "status line"). The server uses this version of HTTP in its response, but ensures that this response is compatible with the HTTP version specified in the client's request. +The version of HTTP used by the server (obtained from the response +"status line"). The server uses this version of HTTP in its response, but +ensures that this response is compatible with the HTTP version specified in the +client's request. If the value cannot be extracted from the status line, the +full status line is returned. .TP \fBurl\fR . -The requested URL, typically the URL supplied as an argument to \fB::http::geturl\fR but without its "fragment" (the final part of the URL beginning with "#". +The requested URL, typically the URL supplied as an argument +to \fB::http::geturl\fR but without its "fragment" (the final part of the URL +beginning with "#"). .TP -\fBconnRequest\fR +\fBconnectionRequest\fR . -The value, if any, sent to the server in "Connection" request header(s). +The value, if any, sent to the server in \fBConnection\fR request header(s). .TP -\fBconnResponse\fR +\fBconnectionResponse\fR . -The value, if any, received from the server in "Connection" response header(s). +The value, if any, received from the server in \fBConnection\fR response +header(s). .TP -\fBconnection\fR +\fBconnectionActual\fR . -This value, set by \fB::http::geturl\fR, reports whether the connection was closed after the transaction (value "close"), or left open (value "keep-alive"). +This value, set by \fB::http::geturl\fR, reports whether the connection was +closed after the transaction (value "close"), or left open (value "keep-alive"). .TP -\fBtransfer\fR +\fBtransferEncoding\fR . -The value of the Transfer-Encoding response header, if it is present. The value is either "chunked" (indicating HTTP/1.1 "chunked encoding") or the empty string. +The value of the Transfer-Encoding response header, if it is present. +The value is either "chunked" (indicating HTTP/1.1 "chunked encoding") or +the empty string. .TP -\fBquerylength\fR +\fBtotalPost\fR . The total length of the request body in a POST request. .TP -\fBqueryoffset\fR +\fBcurrentPost\fR . The number of bytes of the POST request body sent to the server so far. +The value is the same as that returned by command \fB::http::size\fR. .TP -\fBtotalsize\fR +\fBtotalSize\fR . A copy of the \fBContent-Length\fR response-header value. -The number of bytes specified in a Content-Length header, if one was sent. If none was sent, the value is 0. A correctly configured server omits this header if the transfer-encoding is "chunked", or (for older servers) if the server closes the connection when it reaches the end of the resource. +The number of bytes specified in a \fBContent-Length\fR header, if one +was sent. If none was sent, the value is 0. A correctly configured server +omits this header if the transfer-encoding is "chunked", or (for older +servers) if the server closes the connection when it reaches the end of +the resource. .TP -\fBcurrentsize\fR +\fBcurrentSize\fR . The number of bytes fetched from the server so far. +.PP +.SS "MORE METADATA" +The dictionary returned by \fB::http::responseInfo\fR is the most useful +subset of the available metadata. Other metadata include: +.PP +1. The full "status line" of the response, available as the return value +of command \fB::http::responseLine\fR. +.PP +2. The full response headers, available as the return value of +command \fB::http::responseHeaders\fR. This return value is a list of the +response-header names and values, in the order that they were received from +the server. +.PP +The return value is not a \fBdict\fR because some header names may +occur more than once, notably \fBSet-Cookie\fR. If the value is read +into a \fBdict\fR or into an array (using array set), only the last header +with each name will be preserved. +.PP +.RS +Some of the header names (metadata keys) are listed below, but the HTTP +standard defines several more, and servers are free to add their own. +When a dictionary key is mentioned below, this refers to the \fBdict\fR +value returned by command \fB::http::responseInfo\fR. +.TP +\fBContent-Type\fR +. +The content type of the URL contents. Examples include \fBtext/html\fR, +\fBimage/gif,\fR \fBapplication/postscript\fR and +\fBapplication/x-tcl\fR. Text values typically specify a character set, e.g. +\fBtext/html; charset=UTF-8\fR. Dictionary key \fIcontentType\fR. +.TP +\fBContent-Length\fR +. +The advertised size in bytes of the contents, available as dictionary +key \fItotalSize\fR. The actual number of bytes read by \fB::http::geturl\fR +so far is available as dictionary key \fBcurrentSize\fR. +.TP +\fBContent-Encoding\fR +. +The compression algorithm used for the contents. +Examples include \fBgzip\fR, \fBdeflate\fR. +Dictionary key \fIcontent\fR. +.TP +\fBLocation\fR +. +This header is sent when a response has status code 3XX (redirection). +It provides the URL that is the redirection target. +Dictionary key \fIredirection\fR. +.TP +\fBSet-Cookie\fR +. +This header is sent to offer a cookie to the client. Cookie management is +done by the \fB::http::config\fR option \fI\-cookiejar\fR, and so +the \fBSet-Cookie\fR headers need not be parsed by user scripts. +See section \fBCOOKIE JAR PROTOCOL\fR. +.TP +\fBConnection\fR +. +The value can be supplied as a comma-separated list, or by multiple headers. +The list often has only one element, either "close" or "keep-alive". +The value "upgrade" indicates a successful upgrade request and is typically +combined with the status code 101, an \fBUpgrade\fR response header, and no +response body. Dictionary key \fIconnectionResponse\fR. +.TP +\fBUpgrade\fR +. +The value indicates the protocol(s) to which the server will switch +immediately after the empty line that terminates the 101 response headers. +Dictionary key \fIupgrade\fR. +.RE +.PP +.SS "EVEN MORE METADATA" +.PP +1. Details of the HTTP request. The request is determined by the options +supplied to \fB::http::geturl\fR and \fB::http::config\fR. However, it is +sometimes helpful to examine what \fB::http::geturl\fR actually sent to the +server, and this information is available through +commands \fB::http::requestHeaders\fR and \fB::http::requestLine\fR. +.PP +2. The state array: the internal variables of \fB::http::geturl\fR. +It may sometimes be helpful to examine this array. +Details are given in the next section. .SH "STATE ARRAY" -The \fB::http::geturl\fR procedure returns a \fItoken\fR that can be used to -get to the state of the HTTP transaction in the form of a Tcl array. -Use this construct to create an easy-to-use array variable: +The \fB::http::geturl\fR procedure returns a \fItoken\fR that can be used +as an argument to other \fB::http::*\fR commands, which examine and manage +the state of the HTTP transaction. For most purposes these commands are +sufficient. The \fItoken\fR can also be used to access +the internal state of the transaction, which is stored in a Tcl array. +This facility is most useful when writing callback commands for the +options \fB\-command\fR, \fB\-handler\fR, \fB\-progress\fR, +or \fB\-queryprogress\fR. +Use the following command inside the proc to define an easy-to-use +array \fIstate\fR as a local variable within the proc .PP .CS -upvar #0 $token state +upvar 0 $token state .CE .PP Once the data associated with the URL is no longer needed, the state array should be unset to free up storage. The \fB::http::cleanup\fR procedure is provided for that purpose. -The following elements of -the array are supported: +.PP +The following elements of the array are supported, and are the origin of the +values returned by commands as described below. When a dictionary key is +mentioned below, this refers to the \fBdict\fR value returned by +command \fB::http::responseInfo\fR. .RS .TP \fBbinary\fR . -This is boolean \fBtrue\fR if (after decoding any compression specified -by the -.QW "Content-Encoding" -response header) the HTTP response is binary. It is boolean \fBfalse\fR -if the HTTP response is text. +For dictionary key \fIbinary\fR. .TP \fBbody\fR . -The contents of the URL. This will be empty if the \fB\-channel\fR -option has been specified. This value is returned by the \fB::http::data\fR -command. +For command \fB::http::responseBody\fR. .TP \fBcharset\fR . -The value of the charset attribute from the \fBContent-Type\fR metadata -value. If none was specified, this defaults to the RFC standard -\fBiso8859-1\fR, or the value of \fB$::http::defaultCharset\fR. Incoming -text data will be automatically converted from this charset to utf-8. +For dictionary key \fIcharset\fR. .TP \fBcoding\fR . -A copy of the \fBContent-Encoding\fR metadata value. +For dictionary key \fIcompression\fR. +.TP +\fBconnection\fR +. +For dictionary key \fIconnectionActual\fR. .TP \fBcurrentsize\fR . -The current number of bytes fetched from the URL. -This value is returned by the \fB::http::size\fR command. +For command \fB::http::size\fR; and for dictionary key \fIcurrentSize\fR. .TP \fBerror\fR . -If defined, this is the error string seen when the HTTP transaction -was aborted. +For command \fB::http::error\fR; part is used in dictionary key \fIerror\fR. .TP \fBhttp\fR . -The HTTP status reply from the server. This value -is returned by the \fB::http::code\fR command. The format of this value is: -.RS -.PP -.CS -\fIHTTP/1.1 code string\fR -.CE -.PP -The \fIcode\fR is a three-digit number defined in the HTTP standard. -A code of 200 is OK. Codes beginning with 4 or 5 indicate errors. -Codes beginning with 3 are redirection errors. In this case the -\fBLocation\fR metadata specifies a new URL that contains the -requested information. -.RE +For command \fB::http::responseLine\fR. +.TP +\fBhttpResponse\fR +. +For dictionary key \fIhttpResponse\fR. .TP \fBmeta\fR . -The response from a HTTP server includes metadata headers that describe the -response body and the message from the server. The \fBmeta\fR element of the -state array is a list of the keys (header names) and values (header values) of -the metadata. Header names are case-insensitive and are converted to lower -case. The value of meta is not a \fBdict\fR because some header names may -occur more than once, notably "set-cookie". If the value \fBmeta\fR is read -into a dict or into an array (using array set), only the last header with each -name will be preserved. -.PP -.RS -Some of the metadata keys are listed below, but the HTTP standard defines -more, and servers are free to add their own. +For command \fB::http::responseHeaders\fR. Further discussion above in the +section \fBMORE METADATA\fR. .TP -\fBContent-Type\fR +\fBmethod\fR . -The type of the URL contents. Examples include \fBtext/html\fR, -\fBimage/gif,\fR \fBapplication/postscript\fR and -\fBapplication/x-tcl\fR. +For dictionary key \fImethod\fR. .TP -\fBContent-Length\fR +\fBposterror\fR . -The advertised size of the contents. The actual size obtained by -\fB::http::geturl\fR is available as \fBstate(currentsize)\fR. +For dictionary key \fIpostError\fR. .TP -\fBLocation\fR +\fBpostErrorFull\fR . -An alternate URL that contains the requested data. -.RE +For command \fB::http::postError\fR. .TP -\fBposterror\fR +\fB\-protocol\fR . -The error, if any, that occurred while writing -the post query data to the server. +For dictionary key \fIhttpRequest\fR. +.TP +\fBquerylength\fR +. +For dictionary key \fItotalPost\fR. +.TP +\fBqueryoffset\fR +. +For dictionary key \fIcurrentPost\fR. +.TP +\fBreasonPhrase\fR +. +For dictionary key \fIreasonPhrase\fR. +.TP +\fBrequestHeaders\fR +. +For command \fB::http::requestHeaders\fR. +.TP +\fBrequestLine\fR +. +For command \fB::http::requestLine\fR. +.TP +\fBresponseCode\fR +. +For dictionary key \fIresponseCode\fR. +.TP +\fBstate\fR +. +For dictionary key \fIstage\fR. .TP \fBstatus\fR . -See description in the chapter \fBERRORS\fR above for a -list and description of \fBstatus\fR. -During the transaction this value is the empty string. +For command \fB::http::status\fR; and for dictionary key \fIstatus\fR. .TP \fBtotalsize\fR . -A copy of the \fBContent-Length\fR metadata value. +For dictionary key \fItotalSize\fR. +.TP +\fBtransfer\fR +. +For dictionary key \fItransferEncoding\fR. .TP \fBtype\fR . -A copy of the \fBContent-Type\fR metadata value. +For dictionary key \fIcontentType\fR. +.TP +\fBupgrade\fR +. +For dictionary key \fIupgrade\fR. .TP \fBurl\fR . -The requested URL. +For dictionary key \fIurl\fR. .RE .SH "PERSISTENT CONNECTIONS" .PP @@ -1153,7 +1417,7 @@ that fails because it uses a persistent connection that the server has half-closed (an .QW "asynchronous close event" ). Subsequent GET and HEAD requests in a failed pipeline will also be retried. -\fIThe \-repost option should be used only if the application understands +\fIThe \fB\-repost\fI option should be used only if the application understands that the retry is appropriate\fR - specifically, the application must know that if the failed POST successfully modified the state of the server, a repeat POST would have no adverse effect. @@ -1261,22 +1525,25 @@ Other keys may always be ignored; they have no meaning in this protocol. .VE TIP406 .SH "PROTOCOL UPGRADES" .PP -The HTTP/1.1 \fBConnection\fR and \fBUpgrade\fR client headers inform the server -that the client wishes to change the protocol used over the existing connection -(RFC 7230). This mechanism can be used to request a WebSocket (RFC 6455), a +The HTTP/1.1 \fBConnection\fR and \fBUpgrade\fR request headers inform the +server that the client wishes to change the protocol used over the existing +connection (RFC 7230). +This mechanism can be used to request a WebSocket (RFC 6455), a higher version of the HTTP protocol (HTTP 2), or TLS encryption. If the server accepts the upgrade request, its response code will be 101. .PP -To request a protocol upgrade when calling \fBhttp::geturl\fR, the \fB\-headers\fR -option must supply appropriate values for \fBConnection\fR and \fBUpgrade\fR, and +To request a protocol upgrade when calling \fBhttp::geturl\fR, +the \fB\-headers\fR option must supply appropriate values for \fBConnection\fR +and \fBUpgrade\fR, and the \fB\-command\fR option must supply a command that implements the requested protocol and can also handle the server response if the server refuses the protocol upgrade. For upgrade requests \fBhttp::geturl\fR ignores the value of option \fB\-keepalive\fR, and always uses the value \fB0\fR so that the upgrade -request is not made over a connection that is intended for multiple HTTP requests. +request is not made over a connection that is intended for multiple HTTP +requests. .PP -The Tcllib library \fBwebsocket\fR implements WebSockets, and makes the necessary -calls to commands in the \fBhttp\fR package. +The Tcllib library \fBwebsocket\fR implements WebSockets, and makes the +necessary calls to commands in the \fBhttp\fR package. .PP There is currently no native Tcl client library for HTTP/2. .PP @@ -1287,30 +1554,59 @@ protocols such as Internet Printing Protocol (IPP) that are built on top of traffic. .PP In browsers, opportunistic encryption is instead implemented by the -\fBUpgrade-Insecure-Requests\fR client header. If a secure service is available, -the server response code is a 307 redirect, and the response header -\fBLocation\fR specifies the target URL. The browser must call \fBhttp::geturl\fR -again in order to fetch this URL. +\fBUpgrade-Insecure-Requests\fR client header. If a secure service is +available, the server response code is a 307 redirect, and the response header +\fBLocation\fR specifies the target URL. The browser must +call \fBhttp::geturl\fR again in order to fetch this URL. See https://w3c.github.io/webappsec-upgrade-insecure-requests/ .PP .SH THREADS .PP .SS "PURPOSE" .PP -Command \fB::http::geturl\fR uses the Tcl \fB::socket\fR command with the \-async option to connect to a remote server, but the return from this command can be delayed in adverse cases (e.g. a slow DNS lookup), preventing the event loop from processing other events. This delay is avoided if the \fB::socket\fR command is evaluated in another thread. The Thread package is not part of Tcl but is provided in "Batteries Included" distributions. Instead of the \fB::socket\fR command, the http package uses \fB::http::socket\fR which makes connections in the manner specified by the value of \-threadlevel and the availability of package Thread. +Command \fB::http::geturl\fR uses the Tcl \fB::socket\fR command with +the \fI\-async\fR option to connect to a remote server, but the return from +this command can be delayed in adverse cases (e.g. a slow DNS lookup), +preventing the event loop from processing other events. +This delay is avoided if the \fB::socket\fR command is evaluated in another +thread. The Thread package is not part of Tcl but is provided in +"Batteries Included" distributions. Instead of the \fB::socket\fR command, +the http package uses \fB::http::socket\fR which makes connections in the +manner specified by the value of \fI\-threadlevel\fR and the availability +of package Thread. .PP .SS "WITH TLS (HTTPS)" .PP -The same \-threadlevel configuration applies to both HTTP and HTTPS connections. HTTPS is enabled by using the \fBhttp::register\fR command, typically by specifying the \fB::tls::socket\fR command of the tls package to handle TLS cryptography. The \fB::tls::socket\fR command connects to the remote server by using the command specified by the value of variable \fI::tls::socketCmd\fR, and this value defaults to "::socket". If http::geturl finds that \fI::tls::socketCmd\fR has this value, it replaces it with the value "::http::socket". If \fI::tls::socketCmd\fR has a value other than "::socket", i.e. if the script or the Tcl installation has replaced the value "::socket" with the name of a different command, then http does not change the value. The script or installation that modified \fI::tls::socketCmd\fR is responsible for integrating \fR::http::socket\fR into its own replacement command. +The same \fI\-threadlevel\fR configuration applies to both HTTP and HTTPS +connections. +HTTPS is enabled by using the \fBhttp::register\fR command, typically by +specifying the \fB::tls::socket\fR command of the tls package to handle TLS +cryptography. The \fB::tls::socket\fR command connects to the remote server by +using the command specified by the value of variable \fI::tls::socketCmd\fR, and +this value defaults to "::socket". If http::geturl finds +that \fI::tls::socketCmd\fR has this value, it replaces it with the value +"::http::socket". If \fI::tls::socketCmd\fR has a value other than "::socket", +i.e. if the script or the Tcl installation has replaced the value "::socket" +with the name of a different command, then http does not change the value. +The script or installation that modified \fI::tls::socketCmd\fR is responsible +for integrating \fR::http::socket\fR into its own replacement command. .PP .SS "WITH A CHILD INTERPRETER" .PP -The peer thread can transfer the socket only to the main interpreter of the script's thread. Therefore the thread-based \fB::http::socket\fR works with non-zero \-threadlevel values only if the script runs in the main interpreter. A child interpreter must use \-threadlevel 0 unless the parent interpreter has provided alternative facilities. The main parent interpreter may grant full \-threadlevel facilities to a child interpreter, for example by aliasing, to \fB::http::socket\fR in the child, a command that runs \fBhttp::socket\fR in the parent, and then transfers the socket to the child. +The peer thread can transfer the socket only to the main interpreter of the +script's thread. Therefore the thread-based \fB::http::socket\fR works with +non-zero \fI\-threadlevel\fR values only if the script runs in the main +interpreter. A child interpreter must use \fI\-threadlevel 0\fR unless the +parent interpreter has provided alternative facilities. The main parent +interpreter may grant full \fI\-threadlevel\fR facilities to a child +interpreter, for example by aliasing, to \fB::http::socket\fR in the child, +a command that runs \fBhttp::socket\fR in the parent, and then transfers +the socket to the child. .PP .SH EXAMPLE .PP This example creates a procedure to copy a URL to a file while printing a -progress meter, and prints the metadata associated with the URL. +progress meter, and prints the response headers associated with the URL. .PP .CS proc httpcopy { url file {chunk 4096} } { @@ -1322,7 +1618,7 @@ proc httpcopy { url file {chunk 4096} } { # This ends the line started by httpCopyProgress puts stderr "" - upvar #0 $token state + upvar 0 $token state set max 0 foreach {name value} $state(meta) { if {[string length $name] > $max} { diff --git a/library/http/http.tcl b/library/http/http.tcl index 15fd031..23b065c 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -1016,8 +1016,8 @@ proc http::CreateToken {url args} { status "" http "" httpResponse {} - ncode {} - reason {} + responseCode {} + reasonPhrase {} connection keep-alive tid {} requestHeaders {} @@ -1651,37 +1651,18 @@ proc http::OpenSocket {token DoLater} { dict unset socketCoEvent($state(socketinfo)) $token unset -nocomplain state(socketcoro) - set reusing $state(reusing) + if {[catch { + if {$state(reusing)} { + # If ($state(reusing)) is true, then we do not need to create a new + # socket, even if $sockOld is only a placeholder for a socket. + set sock $sockOld + } else { + # set sock in the [catch] below. + set pre [clock milliseconds] + ##Log pre socket opened, - token $token + ##Log $state(openCmd) - token $token + set sock [namespace eval :: $state(openCmd)] - if {$reusing} { - # If ($reusing) is true, then we do not need to create a new socket, - # even if $sockOld is only a placeholder for a socket. - set sock $sockOld - } else { - # set sock in the [catch] below. - set pre [clock milliseconds] - ##Log pre socket opened, - token $token - ##Log $state(openCmd) - token $token - if {[catch {namespace eval :: $state(openCmd)} sock errdict]} { - # ERROR CASE - # Something went wrong while trying to establish the connection. - # Tidy up after events and such, but DON'T call the command - # callback (if available). - # - When this was inline code in http::geturl, it threw an exception - # from here instead. - # - Now that this code is called from geturl as an idletask and not - # as inline code, it is inappropriate to run cleanup or throw an - # exception. Instead do a normal return, and let Finish report - # the error using token/state and the -command callback. - # Finish also undoes PreparePersistentConnection. - - set state(sock) NONE - set ::errorInfo [dict get $errdict -errorinfo] - set ::errorCode [dict get $errdict -errorcode] - Finish $token $sock - # cleanup $token - return - } else { # Normal return from $state(openCmd) always returns a valid socket. # Initialisation of a new socket. ##Log post socket opened, - token $token @@ -1694,15 +1675,16 @@ proc http::OpenSocket {token DoLater} { fconfigure $sock -translation {auto crlf} \ -buffersize $state(-blocksize) ##Log socket opened, DONE fconfigure - token $token - } - } - - Log "Using $sock for $state(socketinfo) - token $token" \ - [expr {$state(-keepalive)?"keepalive":""}] + } - # Code above has set state(sock) $sock - ConfigureNewSocket $token $sockOld $DoLater + Log "Using $sock for $state(socketinfo) - token $token" \ + [expr {$state(-keepalive)?"keepalive":""}] + # Code above has set state(sock) $sock + ConfigureNewSocket $token $sockOld $DoLater + } result errdict]} { + Finish $token $result + } ##Log Leaving http::OpenSocket coroutine [info coroutine] - token $token return } @@ -3084,47 +3066,62 @@ proc http::Meta {token who args} { # Arguments: # token - connection token (name of an array) # -# Return Value: a dict +# Return Value: a dict. See man page http(n) for a description of each item. # ------------------------------------------------------------------------------ proc http::responseInfo {token} { variable $token upvar 0 $token state set result {} - foreach key { - stage - status - ncode - reason - type - binary - redirection - charset - coding - httpRequest - httpResponse - url - connRequest - connResponse - connection - transfer - querylength - queryoffset - totalsize - currentsize + foreach {key origin name} { + stage STATE state + status STATE status + responseCode STATE responseCode + reasonPhrase STATE reasonPhrase + contentType STATE type + binary STATE binary + redirection RESP location + upgrade STATE upgrade + error ERROR - + postError STATE posterror + method STATE method + charset STATE charset + compression STATE coding + httpRequest STATE -protocol + httpResponse STATE httpResponse + url STATE url + connectionRequest REQ connection + connectionResponse RESP connection + connectionActual STATE connection + transferEncoding STATE transfer + totalPost STATE querylength + currentPost STATE queryoffset + totalSize STATE totalsize + currentSize STATE currentsize } { - if {$key eq {stage}} { - dict set result $key $state(state) - } elseif {$key eq {redirection}} { - dict set result $key [responseHeaderValue $token Location] - } elseif {$key eq {httpRequest}} { - dict set result $key $state(-protocol) - } elseif {$key eq {connRequest}} { - dict set result $key [requestHeaderValue $token connection] - } elseif {$key eq {connResponse}} { - dict set result $key [responseHeaderValue $token connection] + if {$origin eq {STATE}} { + if {[info exists state($name)]} { + dict set result $key $state($name) + } else { + # Should never come here + dict set result $key {} + } + } elseif {$origin eq {REQ}} { + dict set result $key [requestHeaderValue $token $name] + } elseif {$origin eq {RESP}} { + dict set result $key [responseHeaderValue $token $name] + } elseif {$origin eq {ERROR}} { + # Don't flood the dict with data. The command ::http::error is + # available. + if {[info exists state(error)]} { + set msg [lindex $state(error) 0] + } else { + set msg {} + } + dict set result $key $msg } else { - dict set result $key $state($key) + # Should never come here + dict set result $key {} } } return $result @@ -3140,8 +3137,8 @@ proc http::error {token} { proc http::postError {token} { variable $token upvar 0 $token state - if {[info exists state(posterror)]} { - return $state(posterror) + if {[info exists state(postErrorFull)]} { + return $state(postErrorFull) } return } @@ -3309,11 +3306,13 @@ proc http::Write {token} { set done 1 } } - } err]} { + } err opts]} { # Do not call Finish here, but instead let the read half of the socket # process whatever server reply there is to get. - set state(posterror) $err + set info [dict get $opts -errorinfo] + set code [dict get $opts -code] + set state(postErrorFull) [list $err $info $code] set done 1 } @@ -3460,15 +3459,15 @@ proc http::Event {sock token} { # We have $state(http) so let's split it into its components. if {[regexp {^HTTP/(\S+) ([0-9]{3}) (.*)$} $state(http) \ - -> httpResponse ncode reason] + -> httpResponse responseCode reasonPhrase] } { set state(httpResponse) $httpResponse - set state(ncode) $ncode - set state(reason) $reason + set state(responseCode) $responseCode + set state(reasonPhrase) $reasonPhrase } else { set state(httpResponse) $state(http) - set state(ncode) $state(http) - set state(reason) $state(http) + set state(responseCode) $state(http) + set state(reasonPhrase) $state(http) } if { ([info exists state(connection)]) @@ -3674,13 +3673,12 @@ proc http::Event {sock token} { connection { # RFC 7230 Section 6.1 states that a comma-separated # list is an acceptable value. - if {![info exists state(connectionResponse)]} { + if {![info exists state(connectionRespFlag)]} { # This is the first "Connection" response header. # Scrub the earlier value set by iniitialisation. - set state(connectionResponse) {} + set state(connectionRespFlag) {} set state(connection) {} } - set state(connOrig[incr ::countConn]) [string trim $value] foreach el [SplitCommaSeparatedFieldValue $value] { lappend state(connection) [string tolower $el] } @@ -4423,6 +4421,7 @@ proc http::GuessType {token} { set state(body) [encoding convertfrom $enc $state(body)] set state(body) [string map {\r\n \n \r \n} $state(body)] set state(type) application/xml + set state(binary) 0 set state(charset) $res return 1 } @@ -4732,6 +4731,10 @@ interp alias {} http::ncode {} http::responseCode # - The http::socket command is simple, and can easily be replaced with an # alternative command that uses a different technique to open a socket while # entering the event loop. +# - Unexpected behaviour by thread::send -async (Thread 2.8.6). +# An error in thread::send -async causes return of just the error message +# (not the expected 3 elements), and raises a bgerror in the main thread. +# Hence wrap the command with catch as a precaution. # ------------------------------------------------------------------------------ proc http::socket {args} { @@ -4756,8 +4759,11 @@ proc http::socket {args} { set defcmd ::socket set sockargs $args set script " - [list proc ::SockInThread {caller defcmd sockargs} [info body ::http::SockInThread]] - [list ::SockInThread [thread::id] $defcmd $sockargs] + set code \[catch { + [list proc ::SockInThread {caller defcmd sockargs} [info body ::http::SockInThread]] + [list ::SockInThread [thread::id] $defcmd $sockargs] + } result opts\] + list \$code \$opts \$result " set state(tid) [thread::create] @@ -4779,10 +4785,26 @@ proc http::socket {args} { Log >U Thread End Wait $args -- coro [info coroutine] $varName [set $varName] thread::release $state(tid) set state(tid) {} - lassign [set $varName] catchCode errdict sock + set result [set $varName] unset $varName - dict set errdict -code $catchCode - return -options $errdict $sock + if {(![string is list $result]) || ([llength $result] != 3)} { + return -code error "result from peer thread is not a list of\ + length 3: it is \n$result" + } + lassign $result threadCode threadDict threadResult + if {($threadCode != 0)} { + # This is an error in thread::send. Return the lot. + return -options $threadDict -code error $threadResult + } + + # Now the results of the catch in the peer thread. + lassign $threadResult catchCode errdict sock + + if {($catchCode == 0) && ($sock ni [chan names])} { + return -code error {Transfer of socket from peer thread failed.\ + Check that this script is not running in a child interpreter.} + } + return -options $errdict -code $catchCode $sock } # The commands below are dependencies of http::socket and -- cgit v0.12 From bf9bd6f5fe6cf2220c77b85a5881596b5eebf27e Mon Sep 17 00:00:00 2001 From: kjnash Date: Thu, 15 Sep 2022 19:09:03 +0000 Subject: Undo temporary mods to tests. --- tests/httpPipeline.test | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/httpPipeline.test b/tests/httpPipeline.test index 50db441..161519f 100644 --- a/tests/httpPipeline.test +++ b/tests/httpPipeline.test @@ -674,7 +674,7 @@ proc RunTest {header footer delay te} { # If still obscure, uncomment #Log and ##Log lines in the http package. # ------------------------------------------------------------------------------ -setHttpTestOptions -verbose 2 +setHttpTestOptions -verbose 0 # ------------------------------------------------------------------------------ # (4) Define the base URLs used for testing. Each must have a query string. @@ -699,9 +699,9 @@ setHttpTestOptions -verbose 2 namespace eval ::httpTestScript { variable URL array set URL { - a http://test-tcl-http.kerlin.org.minerva/index.html?page=privacy - b http://test-tcl-http.kerlin.org.minerva/index.html?page=conditions - c http://test-tcl-http.kerlin.org.minerva/index.html?page=welcome + a http://test-tcl-http.kerlin.org/index.html?page=privacy + b http://test-tcl-http.kerlin.org/index.html?page=conditions + c http://test-tcl-http.kerlin.org/index.html?page=welcome } } -- cgit v0.12