From 55d9cca97fb558444ff53d71b4aef4ba99ef0274 Mon Sep 17 00:00:00 2001 From: kjnash Date: Tue, 13 Sep 2022 12:54:26 +0000 Subject: In namespace ::http, add new commands postError, responseInfo. Rename (the unreleased public API) reason to reasonPhrase. Rename private commands make-transformation-chunked to MakeTransformationChunked, getTextLine to GetTextLine. Rename mapReply to quoteString (and reverse the aliasing). Update namespace exports. Conventional use of fully-qualified command names. Initialise some members of state array. --- doc/http.n | 137 ++++++++++++++++++++++++++++++++++++++++-- library/http/http.tcl | 160 ++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 260 insertions(+), 37 deletions(-) diff --git a/doc/http.n b/doc/http.n index dcd65ae..8a9c35b 100644 --- a/doc/http.n +++ b/doc/http.n @@ -34,6 +34,8 @@ http \- Client-side implementation of the HTTP/1.1 protocol .sp \fB::http::error \fItoken\fR .sp +\fB::http::postError \fItoken\fR +.sp \fB::http::cleanup \fItoken\fR .sp \fB::http::requestLine\fR \fItoken\fR @@ -46,12 +48,14 @@ http \- Client-side implementation of the HTTP/1.1 protocol .sp \fB::http::responseCode\fR \fItoken\fR .sp -\fB::http::reason \fIcode\fR +\fB::http::reasonPhrase\fR \fIcode\fR .sp \fB::http::responseHeaders\fR \fItoken\fR ?\fIheaderName\fR? .sp \fB::http::responseHeaderValue\fR \fItoken\fR \fIheaderName\fR .sp +\fB::http::responseInfo\fR \fItoken\fR +.sp \fB::http::responseBody\fR \fItoken\fR .sp \fB::http::register \fIproto port command\fR @@ -72,11 +76,15 @@ http \- Client-side implementation of the HTTP/1.1 protocol .SH "EXPORTED COMMANDS" .PP Namespace \fBhttp\fR exports the commands \fBconfig\fR, \fBformatQuery\fR, -\fBgeturl\fR, \fBquoteString\fR, \fBregister\fR, \fBregisterError\fR, +\fBgeturl\fR, \fBpostError\fR, \fBquoteString\fR, \fBreasonPhrase\fR, +\fBregister\fR, +\fBregisterError\fR, \fBrequestHeaders\fR, \fBrequestHeaderValue\fR, +\fBrequestLine\fR, \fBresponseBody\fR, \fBresponseCode\fR, +\fBresponseHeaders\fR, \fBresponseHeaderValue\fR, \fBresponseInfo\fR, \fBresponseLine\fR, \fBreset\fR, \fBunregister\fR, and \fBwait\fR. .PP It does not export the commands \fBcleanup\fR, \fBcode\fR, \fBdata\fR, -\fBerror\fR, \fBmeta\fR, \fBmetaValue\fR, \fBncode\fR, \fBreason\fR, +\fBerror\fR, \fBmeta\fR, \fBmetaValue\fR, \fBncode\fR, \fBsize\fR, or \fBstatus\fR. .BE .SH DESCRIPTION @@ -525,6 +533,10 @@ received from the URL in the \fB::http::geturl\fR call. This is a convenience procedure that returns the \fBerror\fR element of the state array. .TP +\fB::http::postError\fR \fItoken\fR +. +A POST request is a call to \fB::http::geturl\fR with either the \fB\-query\fR or \fB\-querychannel\fR option. The \fB::http::postError\fR command returns the error string generated when a HTTP POST request sends its request-body to the server; or the empty string if there was no error. When this type of error occurs, the \fB::http::geturl\fR command continues the transaction and attempts to receive a response from the server. +.TP \fB::http::cleanup\fR \fItoken\fR . This procedure cleans up the state associated with the connection @@ -600,7 +612,7 @@ The "reason phrase" is a textual description of the "status code": it may vary from server to server, and can be changed without affecting the HTTP protocol. The recommended values (RFC 7231 and IANA assignments) for each code are provided by the -command \fB::http::reason\fR. +command \fB::http::reasonPhrase\fR. .RE .TP \fB::http::responseCode\fR \fItoken\fR @@ -610,7 +622,7 @@ This command returns the "status code" (200, 404, etc.) of the server line is returned. See command \fB::http::code\fR for more information on the "status line". .TP -\fB::http::reason\fR \fIcode\fR +\fB::http::reasonPhrase\fR \fIcode\fR . This command returns the IANA recommended "reason phrase" for a particular "status code" returned by a HTTP server. The argument \fIcode\fR is a valid @@ -657,6 +669,14 @@ comma-separated-list syntax and cannot be combined into a single value. Each \fIset-cookie\fR header must be treated individually, e.g. by processing the return value of \fB::http::responseHeaders\fR \fItoken\fR \fIset-cookie\fR. .TP +\fB::http::responseInfo\fR \fItoken\fR +. +This command returns a \fBdict\fR of selected response metadata that are essential for identifying a successful transaction and making use of the response, along with other metadata that are informational. The keys of the \fBdict\fR are \fIstage\fR, \fIstatus\fR, \fIncode\fR, \fIreason\fR, \fItype\fR, \fIbinary\fR, \fIredirection\fR, \fIcharset\fR, \fIcoding\fR, \fIhttpRequest\fR, \fIhttpResponse\fR, \fIurl\fR, \fIconnRequest\fR, \fIconnResponse\fR, \fIconnection\fR, \fItransfer\fR, \fItotalsize\fR, and \fIcurrentsize\fR. The meaning of these keys is described in the chapter \fBMETADATA\fR below. +.RS +.PP +It is always worth checking the value of \fIbinary\fR after a HTTP transaction, to determine whether a misconfigured server has caused http to interpret a text resource as a binary, or vice versa. +.RE +.TP \fB::http::responseBody\fR \fItoken\fR . This command returns the entity sent by the HTTP server (unless @@ -807,6 +827,113 @@ transaction. If it can read the server's response it will end up with an \fBok\fR status, otherwise it will have an \fBeof\fR status. +.SH "METADATA" +When a HTTP server responds to a request, it supplies not only the entity requested, but also metadata. This is provided by the first line (the "status line") of the response, and by a number of HTTP headers. Further metadata relates to how \fB::http::geturl\fR has processed the response from the server. +.PP +The most important metadata can be accessed with the command +\fB::http::responseInfo\fR. +This command returns a \fBdict\fR of metadata that are essential for identifying a successful transaction and making use of the response, along with other metadata that are informational. The keys of the \fBdict\fR are: +.RS +.RS +\fB===== Essential Values =====\fR +.RE +.RE +.TP +\fBstage\fR +. +This value, set by \fB::http::geturl\fR, describes the stage that the transaction has reached. Values, in order of the transaction lifecycle, are: "created", "connecting", "header", "body", and "complete". Other \fBdict\fR keys are available when the value of stage is "body" or "complete". The key \fBconnection\fR has its final value only when \fBstage\fR is "complete". +.TP +\fBstatus\fR +. +This value, set by \fB::http::geturl\fR, is "ok" for a successful transaction; "eof", "error", "timeout", or "reset" for an unsuccessful transaction; or "" if the transaction is still in progress. In the last case the values for other dictionary keys may not be available. The meaning of these values is described in the chapter \fBERRORS\fR (above). +.TP +\fBncode\fR +. +The "HTTP status code" for the response. +.TP +\fBreason\fR +. +The "reason phrase" sent by the server. +.TP +\fBcontent-type\fR +. +The value of the \fBContent-Type\fR response header or, if the header was not supplied, the default value "application/octet-stream". +.TP +\fBbinary\fR +. +This boolean value, set by \fB::http::geturl\fR, describes how the command has interpreted the entity returned by the server (after decoding any compression specified +by the +.QW "Content-Encoding" +response header). This decoded entity is accessible as the return value of the command \fB::http::responseBody\fR. + +The value is \fBtrue\fR if http has interpreted the decoded entity as binary. The value returned by ::http::responseBody is a Tcl binary string. This is a suitable format for image data, zip files, etc. +\fB::http::geturl\fR chooses this value if the user has requested a binary interpretation by passing the option \fI\-binary\fR to the command, or if the server has supplied a binary content type in a Content-Type response header, or if the server has not supplied any Content-Type header. + +The value is \fBfalse\fR if http has interpreted the decoded entity as text. The text has been converted from the character set notified by the server into Tcl's internal Unicode format, and the value returned by ::http::responseBody is an ordinary Tcl string. + +It is always worth checking the value of "binary" after a HTTP transaction, to determine whether a misconfigured server has caused http to interpret a text resource as a binary, or vice versa. +.TP +\fBredirection\fR +. +The URL that is the redirection target. The value is that of the Location response header. This header is sent when a response has status code 3XX (redirection). +.RS +.RS +\fB===== Informational Values =====\fR +.RE +.RE +.TP +\fBcharset\fR +. +The value of the charset attribute of the \fBContent-Type\fR response header. The charset value is used only for a text resource. If the server did not specify a charset, the value defaults to that of the variable \fB::http::defaultCharset\fR, which unless it has been deliberately modified by the caller is \fBiso8859-1\fR. Incoming text data is automatically converted from the character set defined by \fBcharset\fR to Tcl's internal Unicode representation, i.e. to a Tcl string. +.TP +\fBcoding\fR +. +A copy of the \fBContent-Encoding\fR response-header value. +.TP +\fBhttpRequest\fR +. +The version of HTTP specified in the request (i.e. sent in the request line). +.TP +\fBhttpResponse\fR +. +The version of HTTP used by the server (obtained from the response "status line"). The server uses this version of HTTP in its response, but ensures that this response is compatible with the HTTP version specified in the client's request. +.TP +\fBurl\fR +. +The requested URL, typically the URL supplied as an argument to \fB::http::geturl\fR but without its "fragment" (the final part of the URL beginning with "#". +.TP +\fBconnRequest\fR +. +The value, if any, sent to the server in "Connection" request header(s). +.TP +\fBconnResponse\fR +. +The value, if any, received from the server in "Connection" response header(s). +.TP +\fBconnection\fR +. +This value, set by \fB::http::geturl\fR, reports whether the connection was closed after the transaction (value "close"), or left open (value "keep-alive"). +.TP +\fBtransfer\fR +. +The value of the Transfer-Encoding response header, if it is present. The value is either "chunked" (indicating HTTP/1.1 "chunked encoding") or the empty string. +.TP +\fBquerylength\fR +. +The total length of the request body in a POST request. +.TP +\fBqueryoffset\fR +. +The number of bytes of the POST request body sent to the server so far. +.TP +\fBtotalsize\fR +. +A copy of the \fBContent-Length\fR response-header value. +The number of bytes specified in a Content-Length header, if one was sent. If none was sent, the value is 0. A correctly configured server omits this header if the transfer-encoding is "chunked", or (for older servers) if the server closes the connection when it reaches the end of the resource. +.TP +\fBcurrentsize\fR +. +The number of bytes fetched from the server so far. .SH "STATE ARRAY" The \fB::http::geturl\fR procedure returns a \fItoken\fR that can be used to get to the state of the HTTP transaction in the form of a Tcl array. diff --git a/library/http/http.tcl b/library/http/http.tcl index 359666d..15fd031 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -217,13 +217,22 @@ namespace eval http { 511 {Network Authentication Required} }] - namespace export geturl config reset wait formatQuery quoteString + namespace export geturl config reset wait formatQuery postError quoteString namespace export register unregister registerError - # - Useful, but not exported: data, size, status, code, cleanup, error, - # meta, ncode, mapReply, init. Comments suggest that "init" can be used - # for re-initialisation, although the command is undocumented. - # - Not exported, probably should be upper-case initial letter as part - # of the internals: getTextLine, make-transformation-chunked. + namespace export requestLine requestHeaders requestHeaderValue + namespace export responseLine responseHeaders responseHeaderValue + namespace export responseCode responseBody responseInfo reasonPhrase + # - Legacy aliases, were never exported: + # data, code, mapReply, meta, ncode + # - Callable from outside (e.g. from TLS) by fully-qualified name, but + # not exported: + # socket + # - Useful, but never exported (and likely to have naming collisions): + # size, status, cleanup, error, init + # Comments suggest that "init" can be used for re-initialisation, + # although the command is undocumented. + # - Never exported, renamed from lower-case names: + # GetTextLine, MakeTransformationChunked. } # http::Log -- @@ -318,7 +327,7 @@ proc http::config {args} { } # ------------------------------------------------------------------------------ -# Proc http::reason +# Proc http::reasonPhrase # ------------------------------------------------------------------------------ # Command to return the IANA-recommended "reason phrase" for a HTTP Status Code. # Information obtained from: @@ -330,7 +339,7 @@ proc http::config {args} { # Return Value: the reason phrase # ------------------------------------------------------------------------------ -proc http::reason {code} { +proc http::reasonPhrase {code} { variable reasonDict if {![regexp -- {^[1-5][0-9][0-9]$} $code]} { set msg {argument must be a three-digit integer from 100 to 599} @@ -1006,10 +1015,14 @@ proc http::CreateToken {url args} { body {} status "" http "" + httpResponse {} + ncode {} + reason {} connection keep-alive tid {} requestHeaders {} requestLine {} + transfer {} } set state(-keepalive) $defaultKeepalive set state(-strict) $strict @@ -2441,7 +2454,7 @@ proc http::EventGateway {sock token} { # http::reset or http::cleanup, or if the caller set option -channel # but not option -handler: in the last case reading from the socket is # now managed by commands ::http::Copy*, http::ReceiveChunked, and - # http::make-transformation-chunked. + # http::MakeTransformationChunked. # # Catch in case the coroutine has closed the socket. catch {fileevent $sock readable [list http::EventGateway $sock $token]} @@ -3061,6 +3074,61 @@ proc http::Meta {token who args} { } } + +# ------------------------------------------------------------------------------ +# Proc http::responseInfo +# ------------------------------------------------------------------------------ +# Command to return a dictionary of the most useful metadata of a HTTP +# response. +# +# Arguments: +# token - connection token (name of an array) +# +# Return Value: a dict +# ------------------------------------------------------------------------------ + +proc http::responseInfo {token} { + variable $token + upvar 0 $token state + set result {} + foreach key { + stage + status + ncode + reason + type + binary + redirection + charset + coding + httpRequest + httpResponse + url + connRequest + connResponse + connection + transfer + querylength + queryoffset + totalsize + currentsize + } { + if {$key eq {stage}} { + dict set result $key $state(state) + } elseif {$key eq {redirection}} { + dict set result $key [responseHeaderValue $token Location] + } elseif {$key eq {httpRequest}} { + dict set result $key $state(-protocol) + } elseif {$key eq {connRequest}} { + dict set result $key [requestHeaderValue $token connection] + } elseif {$key eq {connResponse}} { + dict set result $key [responseHeaderValue $token connection] + } else { + dict set result $key $state($key) + } + } + return $result +} proc http::error {token} { variable $token upvar 0 $token state @@ -3069,6 +3137,14 @@ proc http::error {token} { } return } +proc http::postError {token} { + variable $token + upvar 0 $token state + if {[info exists state(posterror)]} { + return $state(posterror) + } + return +} # http::cleanup # @@ -3382,6 +3458,19 @@ proc http::Event {sock token} { } else { } + # We have $state(http) so let's split it into its components. + if {[regexp {^HTTP/(\S+) ([0-9]{3}) (.*)$} $state(http) \ + -> httpResponse ncode reason] + } { + set state(httpResponse) $httpResponse + set state(ncode) $ncode + set state(reason) $reason + } else { + set state(httpResponse) $state(http) + set state(ncode) $state(http) + set state(reason) $state(http) + } + if { ([info exists state(connection)]) && ([info exists socketMapping($state(socketinfo))]) && ("keep-alive" in $state(connection)) @@ -3514,7 +3603,7 @@ proc http::Event {sock token} { && ("close" in $state(connection)) ) ) - && (![info exists state(transfer)]) + && ($state(transfer) eq {}) && ($state(totalsize) == 0) } { set msg {body size is 0 and no events likely - complete} @@ -3585,6 +3674,13 @@ proc http::Event {sock token} { connection { # RFC 7230 Section 6.1 states that a comma-separated # list is an acceptable value. + if {![info exists state(connectionResponse)]} { + # This is the first "Connection" response header. + # Scrub the earlier value set by iniitialisation. + set state(connectionResponse) {} + set state(connection) {} + } + set state(connOrig[incr ::countConn]) [string trim $value] foreach el [SplitCommaSeparatedFieldValue $value] { lappend state(connection) [string tolower $el] } @@ -3652,7 +3748,7 @@ proc http::Event {sock token} { } } elseif {[info exists state(transfer_final)]} { # This code forgives EOF in place of the final CRLF. - set line [getTextLine $sock] + set line [GetTextLine $sock] set n [string length $line] set state(state) complete if {$n > 0} { @@ -3675,7 +3771,7 @@ proc http::Event {sock token} { } { ##Log chunked - token $token set size 0 - set hexLenChunk [getTextLine $sock] + set hexLenChunk [GetTextLine $sock] #set ntl [string length $hexLenChunk] if {[string trim $hexLenChunk] ne ""} { scan $hexLenChunk %x size @@ -3705,7 +3801,7 @@ proc http::Event {sock token} { } # CRLF that follows chunk. # If eof, this is handled at the end of this proc. - getTextLine $sock + GetTextLine $sock } else { set n 0 set state(transfer_final) {} @@ -3986,7 +4082,7 @@ proc http::ParseCookie {token value} { {*}$http(-cookiejar) storeCookie $realopts } -# http::getTextLine -- +# http::GetTextLine -- # # Get one line with the stream in crlf mode. # Used if Transfer-Encoding is chunked, to read the line that @@ -4000,7 +4096,7 @@ proc http::ParseCookie {token value} { # Results: # The line of text, without trailing newline -proc http::getTextLine {sock} { +proc http::GetTextLine {sock} { set tr [fconfigure $sock -translation] lassign $tr trRead trWrite fconfigure $sock -translation [list crlf $trWrite] @@ -4076,7 +4172,7 @@ proc http::CopyStart {sock token {initial 1}} { } lappend state(zlib) [zlib stream $coding2] } - make-transformation-chunked $sock [namespace code [list CopyChunk $token]] + MakeTransformationChunked $sock [namespace code [list CopyChunk $token]] } else { if {$initial} { foreach coding [ContentEncoding $token] { @@ -4376,7 +4472,7 @@ proc http::formatQuery {args} { set result "" set sep "" foreach i $args { - append result $sep [mapReply $i] + append result $sep [quoteString $i] if {$sep eq "="} { set sep & } else { @@ -4386,7 +4482,7 @@ proc http::formatQuery {args} { return $result } -# http::mapReply -- +# http::quoteString -- # # Do x-www-urlencoded character mapping # @@ -4396,7 +4492,7 @@ proc http::formatQuery {args} { # Results: # The encoded string -proc http::mapReply {string} { +proc http::quoteString {string} { variable http variable formMap @@ -4407,7 +4503,6 @@ proc http::mapReply {string} { set string [encoding convertto $http(-urlencoding) $string] return [string map $formMap $string] } -interp alias {} http::quoteString {} http::mapReply # http::ProxyRequired -- # Default proxy filter. @@ -4600,7 +4695,7 @@ proc http::GetFieldValue {headers fieldName} { return $r } -proc http::make-transformation-chunked {chan command} { +proc http::MakeTransformationChunked {chan command} { coroutine [namespace current]::dechunk$chan ::http::ReceiveChunked $chan $command chan event $chan readable [namespace current]::dechunk$chan return @@ -4608,6 +4703,7 @@ proc http::make-transformation-chunked {chan command} { interp alias {} http::data {} http::responseBody interp alias {} http::code {} http::responseLine +interp alias {} http::mapReply {} http::quoteString interp alias {} http::meta {} http::responseHeaders interp alias {} http::metaValue {} http::responseHeaderValue interp alias {} http::ncode {} http::responseCode @@ -4660,7 +4756,7 @@ proc http::socket {args} { set defcmd ::socket set sockargs $args set script " - [list proc ::SockInThread {caller defcmd sockargs} [info body http::SockInThread]] + [list proc ::SockInThread {caller defcmd sockargs} [info body ::http::SockInThread]] [list ::SockInThread [thread::id] $defcmd $sockargs] " @@ -4750,7 +4846,7 @@ proc http::SockInThread {caller defcmd sockargs} { # ------------------------------------------------------------------------------ -# Proc ::http::cwaiter::cwait +# Proc http::cwaiter::cwait # ------------------------------------------------------------------------------ # Command to substitute for vwait, without the ordering issues. # A command that uses cwait must be a coroutine that is launched by an event, @@ -4769,13 +4865,13 @@ proc http::SockInThread {caller defcmd sockargs} { # Return Value: none # ------------------------------------------------------------------------------ -namespace eval ::http::cwaiter { +namespace eval http::cwaiter { namespace export cwait variable log {} variable logOn 0 } -proc ::http::cwaiter::cwait { +proc http::cwaiter::cwait { varName {coroName {}} {timeout {}} {timeoutValue {}} } { set thisCoro [info coroutine] @@ -4806,7 +4902,7 @@ proc ::http::cwaiter::cwait { # ------------------------------------------------------------------------------ -# Proc ::http::cwaiter::CwaitHelper +# Proc http::cwaiter::CwaitHelper # ------------------------------------------------------------------------------ # Helper command called by the trace set by cwait. # - Ignores the arguments added by trace. @@ -4817,7 +4913,7 @@ proc ::http::cwaiter::cwait { # - Remove the trace immediately. We don't want multiple calls. # ------------------------------------------------------------------------------ -proc ::http::cwaiter::CwaitHelper {varName coroName toe args} { +proc http::cwaiter::CwaitHelper {varName coroName toe args} { CoLog "got $varName for $coroName" set cmd [list ::http::cwaiter::CwaitHelper $varName $coroName $toe] trace remove variable $varName write $cmd @@ -4829,12 +4925,12 @@ proc ::http::cwaiter::CwaitHelper {varName coroName toe args} { # ------------------------------------------------------------------------------ -# Proc ::http::cwaiter::LogInit +# Proc http::cwaiter::LogInit # ------------------------------------------------------------------------------ # Call this command to initiate debug logging and clear the log. # ------------------------------------------------------------------------------ -proc ::http::cwaiter::LogInit {} { +proc http::cwaiter::LogInit {} { variable log variable logOn set log {} @@ -4842,12 +4938,12 @@ proc ::http::cwaiter::LogInit {} { return } -proc ::http::cwaiter::LogRead {} { +proc http::cwaiter::LogRead {} { variable log return $log } -proc ::http::cwaiter::CoLog {msg} { +proc http::cwaiter::CoLog {msg} { variable log variable logOn if {$logOn} { @@ -4856,7 +4952,7 @@ proc ::http::cwaiter::CoLog {msg} { return } -namespace eval ::http { +namespace eval http { namespace import ::http::cwaiter::* } -- cgit v0.12