diff options
| -rw-r--r-- | doc/http.n | 984 | ||||
| -rw-r--r-- | library/http/http.tcl | 692 | ||||
| -rw-r--r-- | tests/http.test | 10 | ||||
| -rw-r--r-- | tests/http11.test | 161 | ||||
| -rw-r--r-- | tests/httpd11.tcl | 31 |
5 files changed, 1544 insertions, 334 deletions
@@ -13,7 +13,7 @@ .SH NAME http \- Client-side implementation of the HTTP/1.1 protocol .SH SYNOPSIS -\fBpackage require http\fI ?\fB2.10\fR? +\fBpackage require http\fR ?\fB2.10\fR? .\" See Also -useragent option documentation in body! .sp \fB::http::config\fR ?\fI\-option value\fR ...? @@ -32,36 +32,67 @@ http \- Client-side implementation of the HTTP/1.1 protocol .sp \fB::http::size \fItoken\fR .sp -\fB::http::code \fItoken\fR +\fB::http::error \fItoken\fR .sp -\fB::http::ncode \fItoken\fR +\fB::http::postError \fItoken\fR +.sp +\fB::http::cleanup \fItoken\fR .sp -\fB::http::meta \fItoken\fR +\fB::http::requestLine\fR \fItoken\fR .sp -\fB::http::data \fItoken\fR +\fB::http::requestHeaders\fR \fItoken\fR ?\fIheaderName\fR? .sp -\fB::http::error \fItoken\fR +\fB::http::requestHeaderValue\fR \fItoken\fR \fIheaderName\fR .sp -\fB::http::cleanup \fItoken\fR +\fB::http::responseLine\fR \fItoken\fR +.sp +\fB::http::responseCode\fR \fItoken\fR +.sp +\fB::http::reasonPhrase\fR \fIcode\fR +.sp +\fB::http::responseHeaders\fR \fItoken\fR ?\fIheaderName\fR? +.sp +\fB::http::responseHeaderValue\fR \fItoken\fR \fIheaderName\fR +.sp +\fB::http::responseInfo\fR \fItoken\fR +.sp +\fB::http::responseBody\fR \fItoken\fR .sp \fB::http::register \fIproto port command\fR .sp \fB::http::registerError \fIport\fR ?\fImessage\fR? .sp \fB::http::unregister \fIproto\fR +.sp +\fB::http::code \fItoken\fR +.sp +\fB::http::data \fItoken\fR +.sp +\fB::http::meta \fItoken\fR ?\fIheaderName\fR? +.sp +\fB::http::metaValue\fR \fItoken\fR \fIheaderName\fR +.sp +\fB::http::ncode \fItoken\fR .SH "EXPORTED COMMANDS" .PP Namespace \fBhttp\fR exports the commands \fBconfig\fR, \fBformatQuery\fR, -\fBgeturl\fR, \fBquoteString\fR, \fBregister\fR, \fBregisterError\fR, +\fBgeturl\fR, \fBpostError\fR, \fBquoteString\fR, \fBreasonPhrase\fR, +\fBregister\fR, +\fBregisterError\fR, \fBrequestHeaders\fR, \fBrequestHeaderValue\fR, +\fBrequestLine\fR, \fBresponseBody\fR, \fBresponseCode\fR, +\fBresponseHeaders\fR, \fBresponseHeaderValue\fR, \fBresponseInfo\fR, +\fBresponseLine\fR, \fBreset\fR, \fBunregister\fR, and \fBwait\fR. .PP It does not export the commands \fBcleanup\fR, \fBcode\fR, \fBdata\fR, -\fBerror\fR, \fBmeta\fR, \fBncode\fR, \fBsize\fR, or \fBstatus\fR. +\fBerror\fR, \fBmeta\fR, \fBmetaValue\fR, \fBncode\fR, +\fBsize\fR, or \fBstatus\fR. .BE .SH DESCRIPTION .PP The \fBhttp\fR package provides the client side of the HTTP/1.1 -protocol, as defined in RFC 7230 to RFC 7235, which supersede RFC 2616. +protocol, as defined in RFC 9110 to 9112, which supersede RFC 7230 +to RFC 7235, which in turn supersede RFC 2616. The package implements the GET, POST, and HEAD operations of HTTP/1.1. It allows configuration of a proxy host to get through firewalls. The package is compatible with the \fBSafesock\fR security @@ -74,14 +105,13 @@ The \fB::http::geturl\fR procedure does a HTTP transaction. Its \fIoptions \fR determine whether a GET, POST, or HEAD transaction is performed. The return value of \fB::http::geturl\fR is a token for the transaction. -The value is also the name of an array in the ::http namespace -that contains state information about the transaction. The elements -of this array are described in the \fBSTATE ARRAY\fR section. +The token can be supplied as an argument to other commands, to manage the +transaction and examine its results. .PP If the \fB\-command\fR option is specified, then the HTTP operation is done in the background. \fB::http::geturl\fR returns immediately after generating the -HTTP request and the callback is invoked +HTTP request and the \fB\-command\fR callback is invoked when the transaction completes. For this to work, the Tcl event loop must be active. In Tk applications this is always true. For pure-Tcl applications, the caller can use \fB::http::wait\fR after calling @@ -90,6 +120,15 @@ applications, the caller can use \fB::http::wait\fR after calling \fBNote:\fR The event queue is even used without the \fB\-command\fR option. As a side effect, arbitrary commands may be processed while \fBhttp::geturl\fR is running. +.PP +When the HTTP server has replied to the request, call the command +\fB::http::responseInfo\fR, which +returns a \fBdict\fR of metadata that is essential for identifying a +successful transaction and making use of the response. See +section \fBMETADATA\fR for details of the information returned. +The response itself is returned by command \fB::http::responseBody\fR, +unless it has been redirected to a file by the \fI\-channel\fR option +of \fB::http::geturl\fR. .SH COMMANDS .TP \fB::http::config\fR ?\fIoptions\fR? @@ -175,7 +214,8 @@ default is 0. .TP \fB\-threadlevel\fR \fIlevel\fR . -Specifies whether and how to use the \fBThread\fR package. Possible values of \fIlevel\fR are 0, 1 or 2. +Specifies whether and how to use the \fBThread\fR package. Possible values +of \fIlevel\fR are 0, 1 or 2. .RS .PP .DS @@ -183,7 +223,11 @@ Specifies whether and how to use the \fBThread\fR package. Possible values of \ 1 - use Thread if it is available, do not use it if it is unavailable 2 - use Thread if it is available, raise an error if it is unavailable .DE -The Tcl \fBsocket -async\fR command can block in adverse cases (e.g. a slow DNS lookup). Using the Thread package works around this problem, for both HTTP and HTTPS transactions. Values of \fIlevel\fR other than 0 are available only to the main interpreter in each thread. See section \fBTHREADS\fR for more information. +The Tcl \fBsocket -async\fR command can block in adverse cases (e.g. a slow +DNS lookup). Using the Thread package works around this problem, for both +HTTP and HTTPS transactions. Values of \fIlevel\fR other than 0 are +available only to the main interpreter in each thread. See +section \fBTHREADS\fR for more information. .RE .TP \fB\-urlencoding\fR \fIencoding\fR @@ -205,21 +249,22 @@ numbers of \fBhttp\fR and \fBTcl\fR. \fB\-zip\fR \fIboolean\fR . If the value is boolean \fBtrue\fR, then by default requests will send a header -.QW "\fBAccept-Encoding: gzip,deflate,compress\fR" . -If the value is boolean \fBfalse\fR, then by default this header will not be -sent. In either case the default can be overridden for an individual request by +.QW "\fBAccept-Encoding: gzip,deflate\fR" . +If the value is boolean \fBfalse\fR, then by default requests will send a header +.QW "\fBAccept-Encoding: identity\fR" . +In either case the default can be overridden for an individual request by supplying a custom \fBAccept-Encoding\fR header in the \fB\-headers\fR option -of \fBhttp::geturl\fR. The default is 1. +of \fBhttp::geturl\fR. The default value is 1. .RE .TP \fB::http::geturl\fR \fIurl\fR ?\fIoptions\fR? . The \fB::http::geturl\fR command is the main procedure in the package. -The \fB\-query\fR option causes a POST operation and +The \fB\-query\fR or \fB\-querychannel\fR option causes a POST operation and the \fB\-validate\fR option causes a HEAD operation; otherwise, a GET operation is performed. The \fB::http::geturl\fR command -returns a \fItoken\fR value that can be used to get -information about the transaction. See the \fBSTATE ARRAY\fR and +returns a \fItoken\fR value that can be passed as an argument to other commands +to get information about the transaction. See the \fBMETADATA\fR and \fBERRORS\fR section for details. The \fB::http::geturl\fR command blocks until the operation completes, unless the \fB\-command\fR option specifies a callback @@ -231,7 +276,7 @@ that is invoked when the HTTP transaction completes. . Specifies whether to force interpreting the URL data as binary. Normally this is auto-detected (anything not beginning with a \fBtext\fR content -type or whose content encoding is \fBgzip\fR or \fBcompress\fR is +type or whose content encoding is \fBgzip\fR or \fBdeflate\fR is considered binary data). .TP \fB\-blocksize\fR \fIsize\fR @@ -243,13 +288,14 @@ At most \fIsize\fR bytes are read at once. After each block, a call to the \fB\-channel\fR \fIname\fR . Copy the URL contents to channel \fIname\fR instead of saving it in -\fBstate(body)\fR. +a Tcl variable for retrieval by \fB::http::responseBody\fR. .TP \fB\-command\fR \fIcallback\fR . -Invoke \fIcallback\fR after the HTTP transaction completes. -This option causes \fB::http::geturl\fR to return immediately. -The \fIcallback\fR gets an additional argument that is the \fItoken\fR returned +The presence of this option causes \fB::http::geturl\fR to return immediately. +After the HTTP transaction completes, the value of \fIcallback\fR is expanded, +an additional argument is added, and the resulting command is evaluated. +The additional argument is the \fItoken\fR returned from \fB::http::geturl\fR. This token is the name of an array that is described in the \fBSTATE ARRAY\fR section. Here is a template for the callback: @@ -257,8 +303,10 @@ callback: .PP .CS proc httpCallback {token} { - upvar #0 $token state - # Access state as a Tcl array + upvar 0 $token state + # Access state as a Tcl array defined in this proc + ... + return } .CE .PP @@ -268,11 +316,30 @@ not call the \fBbgerror\fR handler. See the \fBERRORS\fR section for details. .RE .TP +\fB\-guesstype\fR \fIboolean\fR +. +Attempt to guess the \fBContent-Type\fR and character set when a misconfigured +server provides no information. The default value is \fIfalse\fR (do +nothing). If boolean \fItrue\fR then, if the server does not send a +\fBContent-Type\fR header, or if it sends the value "application/octet-stream", +\fBhttp::geturl\fR will attempt to guess appropriate values. This is not +intended to become a general-purpose tool, and currently it is limited to +detecting XML documents that begin with an XML declaration. In this case +the \fBContent-Type\fR is changed to "application/xml", the binary flag +state(binary) is changed to 0, and the character set is changed to +the one specified by the "encoding" tag of the XML line, or to utf-8 if no +encoding is specified. Not used if a \fI\-channel\fR is specified. +.TP \fB\-handler\fR \fIcallback\fR . -Invoke \fIcallback\fR whenever HTTP data is available; if present, nothing -else will be done with the HTTP data. This procedure gets two additional -arguments: the socket for the HTTP data and the \fItoken\fR returned from +If this option is absent, \fBhttp::geturl\fR processes incoming data itself, +either appending it to the state(body) variable or writing it to the -channel. +But if the \fB\-handler\fR option is present, \fBhttp::geturl\fR does not do +this processing and instead calls \fIcallback\fR. +Whenever HTTP data is available, the value of \fIcallback\fR is expanded, an +additional two arguments are added, and the resulting command is evaluated. +The two additional +arguments are: the socket for the HTTP data and the \fItoken\fR returned from \fB::http::geturl\fR. The token is the name of a global array that is described in the \fBSTATE ARRAY\fR section. The procedure is expected to return the number of bytes read from the socket. Here is a @@ -281,8 +348,8 @@ template for the callback: .PP .CS proc httpHandlerCallback {socket token} { - upvar #0 $token state - # Access socket, and state as a Tcl array + upvar 0 $token state + # Access socket, and state as a Tcl array defined in this proc # For example... ... set data [read $socket 1000] @@ -295,8 +362,9 @@ proc httpHandlerCallback {socket token} { The \fBhttp::geturl\fR code for the \fB\-handler\fR option is not compatible with either compression or chunked transfer-encoding. If \fB\-handler\fR is specified, then to work around these issues \fBhttp::geturl\fR will reduce the -HTTP protocol to 1.0, and override the \fB\-zip\fR option (i.e. it will not -send the header "\fBAccept-Encoding: gzip,deflate,compress\fR"). +HTTP protocol to 1.0, and override the \fB\-zip\fR option (i.e. it will +send the header \fBAccept-Encoding: identity\fR instead +of \fBAccept-Encoding: gzip,deflate\fR). .PP If options \fB\-handler\fR and \fB\-channel\fR are used together, the handler is responsible for copying the data from the HTTP socket to the specified @@ -342,7 +410,10 @@ It is the caller's responsibility to ensure that the headers and request body (if any) conform to the requirements of the request method. For example, if using \fB\-method\fR \fIPOST\fR to send a POST with an empty request body, the caller must also supply the option -.QW "\-headers {Content-Length 0}" . +.PP +.CS +\-headers {Content-Length 0} +.CE .RE .TP \fB\-myaddr\fR \fIaddress\fR @@ -352,18 +423,26 @@ multiple interfaces are available. .TP \fB\-progress\fR \fIcallback\fR . -The \fIcallback\fR is made after each transfer of data from the URL. -The callback gets three additional arguments: the \fItoken\fR from +If the \fB\-progress\fR option is present, +then the \fIcallback\fR is made after each transfer of data from the URL. +The value of \fIcallback\fR is expanded, an additional three arguments are +added, and the resulting command is evaluated. +The three additional arguments are: the \fItoken\fR returned from \fB::http::geturl\fR, the expected total size of the contents from the -\fBContent-Length\fR meta-data, and the current number of bytes -transferred so far. The expected total size may be unknown, in which +\fBContent-Length\fR response header, and the current number of bytes +transferred so far. The token is the name of a global array that is +described in the \fBSTATE ARRAY\fR section. The expected total size may +be unknown, in which case zero is passed to the callback. Here is a template for the progress callback: .RS .PP .CS proc httpProgress {token total current} { - upvar #0 $token state + upvar 0 $token state + # Access state as a Tcl array defined in this proc + ... + return } .CE .RE @@ -407,20 +486,24 @@ This flag causes \fB::http::geturl\fR to do a POST request that passes the data contained in \fIchannelID\fR to the server. The data contained in \fIchannelID\fR must be an x-url-encoding formatted query unless the \fB\-type\fR option below is used. -If a Content-Length header is not specified via the \fB\-headers\fR options, -\fB::http::geturl\fR attempts to determine the size of the post data +If a \fBContent-Length\fR header is not specified via the \fB\-headers\fR +options, \fB::http::geturl\fR attempts to determine the size of the post data in order to create that header. If it is unable to determine the size, it returns an error. .TP \fB\-queryprogress\fR \fIcallback\fR . -The \fIcallback\fR is made after each transfer of data to the URL -(i.e. POST) and acts exactly like the \fB\-progress\fR option (the -callback format is the same). +If the \fB\-queryprogress\fR option is present, +then the \fIcallback\fR is made after each transfer of data to the URL +in a POST request (i.e. a call to \fB::http::geturl\fR with +option \fB\-query\fR or \fB\-querychannel\fR) and acts exactly like +the \fB\-progress\fR option (the callback format is the same). .TP \fB\-strict\fR \fIboolean\fR . -Whether to enforce RFC 3986 URL validation on the request. Default is 1. +If true then the command will test that the URL complies with RFC 3986, i.e. +that it has no characters that should be "x-url-encoded" (e.g. a space should +be encoded to "%20"). Default value is 1. .TP \fB\-timeout\fR \fImilliseconds\fR . @@ -428,7 +511,8 @@ If \fImilliseconds\fR is non-zero, then \fB::http::geturl\fR sets up a timeout to occur after the specified number of milliseconds. A timeout results in a call to \fB::http::reset\fR and to the \fB\-command\fR callback, if specified. -The return value of \fB::http::status\fR is \fBtimeout\fR +The return value of \fB::http::status\fR (and the value of the \fIstatus\fR key +in the dictionary returned by \fB::http::responseInfo\fR) is \fBtimeout\fR after a timeout has occurred. .TP \fB\-type\fR \fImime-type\fR @@ -440,10 +524,11 @@ POST operation. \fB\-validate\fR \fIboolean\fR . If \fIboolean\fR is non-zero, then \fB::http::geturl\fR does an HTTP HEAD -request. This request returns meta information about the URL, but the -contents are not returned. The meta information is available in the -\fBstate(meta) \fR variable after the transaction. See the -\fBSTATE ARRAY\fR section for details. +request. This server returns the same status line and response headers as it +would for a HTTP GET request, but omits the response entity +(the URL "contents"). The response headers are available after the +transaction using command \fB::http::responseHeaders\fR or, for selected +information, \fB::http::responseInfo\fR. .RE .TP \fB::http::formatQuery\fR \fIkey value\fR ?\fIkey value\fR ...? @@ -467,7 +552,7 @@ This sets the \fBstate(status)\fR value to \fIwhy\fR, which defaults to .TP \fB::http::wait\fR \fItoken\fR . -This is a convenience procedure that blocks and waits for the +This command blocks and waits for the transaction to complete. This only works in trusted code because it uses \fBvwait\fR. Also, it is not useful for the case where \fB::http::geturl\fR is called \fIwithout\fR the \fB\-command\fR option @@ -475,54 +560,210 @@ because in this case the \fB::http::geturl\fR call does not return until the HTTP transaction is complete, and thus there is nothing to wait for. .TP -\fB::http::data\fR \fItoken\fR -. -This is a convenience procedure that returns the \fBbody\fR element -(i.e., the URL data) of the state array. -.TP -\fB::http::error\fR \fItoken\fR -. -This is a convenience procedure that returns the \fBerror\fR element -of the state array. -.TP \fB::http::status\fR \fItoken\fR . -This is a convenience procedure that returns the \fBstatus\fR element of -the state array. -.TP -\fB::http::code\fR \fItoken\fR -. -This is a convenience procedure that returns the \fBhttp\fR element of the -state array. +This command returns a description of the status of the HTTP transaction. +The return value is the empty string until the HTTP transaction is +completed; after completion it has one of the values ok, eof, error, +timeout, and reset. The meaning of these values is described in the +section \fBERRORS\fR (below). +.PP +.RS +The name "status" is not related to the terms "status line" and +"status code" that are defined for a HTTP response. +.RE .TP -\fB::http::ncode\fR \fItoken\fR +\fB::http::size\fR \fItoken\fR . -This is a convenience procedure that returns just the numeric return -code (200, 404, etc.) from the \fBhttp\fR element of the state array. +This command returns the number of bytes +received so far from the URL in the \fB::http::geturl\fR call. .TP -\fB::http::size\fR \fItoken\fR +\fB::http::error\fR \fItoken\fR . -This is a convenience procedure that returns the \fBcurrentsize\fR -element of the state array, which represents the number of bytes -received from the URL in the \fB::http::geturl\fR call. +This command returns the error information if the HTTP transaction failed, +or the empty string if there was no error. The information is a Tcl list of +the error message, stack trace, and error code. .TP -\fB::http::meta\fR \fItoken\fR +\fB::http::postError\fR \fItoken\fR . -This is a convenience procedure that returns the \fBmeta\fR -element of the state array which contains the HTTP response -headers. See below for an explanation of this element. +A POST request is a call to \fB::http::geturl\fR with either +the \fB\-query\fR or \fB\-querychannel\fR option. +The \fB::http::postError\fR command returns the error information generated +when a HTTP POST request sends its request-body to the server; or the empty +string if there was no error. The information is a Tcl list of the error +message, stack trace, and error code. When this type of error occurs, +the \fB::http::geturl\fR command continues the transaction and attempts to +receive a response from the server. .TP \fB::http::cleanup\fR \fItoken\fR . This procedure cleans up the state associated with the connection identified by \fItoken\fR. After this call, the procedures -like \fB::http::data\fR cannot be used to get information +like \fB::http::responseBody\fR cannot be used to get information about the operation. It is \fIstrongly\fR recommended that you call this function after you are done with a given HTTP request. Not doing so will result in memory not being freed, and if your app calls \fB::http::geturl\fR enough times, the memory leak could cause a performance hit...or worse. .TP +\fB::http::requestLine\fR \fItoken\fR +. +This command returns the "request line" sent to the server. +The "request line" is the first line of a HTTP client request, and has three +elements separated by spaces: the HTTP method, the URL relative to the server, +and the HTTP version. Examples: +.PP +.DS +.RS +GET / HTTP/1.1 +GET /introduction.html?subject=plumbing HTTP/1.1 +POST /forms/order.html HTTP/1.1 +.RE +.DE +.TP +\fB::http::requestHeaders\fR \fItoken\fR ?\fIheaderName\fR? +. +This command returns the HTTP request header names and values, in the +order that they were sent to the server, as a Tcl list of the form +?name value ...? Header names are case-insensitive and are converted to lower +case. The return value is not a \fBdict\fR because some header names may occur +more than once. If one argument is supplied, all request headers +are returned. If two arguments are supplied, the +second provides the value of a header name. Only headers with the requested +name (converted to lower case) are returned. If no such headers are found, +an empty list is returned. +.TP +\fB::http::requestHeaderValue\fR \fItoken\fR \fIheaderName\fR +. +This command returns the value of the HTTP request header named +\fIheaderName\fR. Header names are case-insensitive and are converted to +lower case. If no such header exists, the return value is the empty string. +If there are multiple headers named \fIheaderName\fR, the result is obtained +by joining the individual values with the string ", " (comma and space), +preserving their order. +.TP +\fB::http::responseLine\fR \fItoken\fR +. +This command returns the first line of the server response: the +HTTP "status line". The "status line" has three +elements separated by spaces: the HTTP version, a three-digit numerical +"status code", and a "reason phrase". Only the reason phrase may contain +spaces. Examples: +.PP +.DS +.RS +HTTP/1.1 200 OK +HTTP/1.0 404 Not Found +.RE +.DE +.RS +The "status code" is a three-digit number in the range 100 to 599. +A value of 200 is the normal return from a GET request, and its matching +"reason phrase" is "OK". Codes beginning with 4 or 5 indicate errors. +Codes beginning with 3 are redirection errors. In this case the +\fBLocation\fR response header specifies a new URL that contains the +requested information. +.PP +The "reason phrase" is a textual description of the "status code": it may +vary from server to server, +and can be changed without affecting the HTTP protocol. The recommended +values (RFC 7231 and IANA assignments) for each code are provided by the +command \fB::http::reasonPhrase\fR. +.RE +.TP +\fB::http::responseCode\fR \fItoken\fR +. +This command returns the "status code" (200, 404, etc.) of the server +"status line". If a three-digit code cannot be found, the full status +line is returned. See command \fB::http::responseLine\fR for more information +on the "status line". +.TP +\fB::http::reasonPhrase\fR \fIcode\fR +. +This command returns the IANA recommended "reason phrase" for a particular +"status code" returned by a HTTP server. The argument \fIcode\fR is a valid +status code, and therefore is an integer in the range 100 to 599 inclusive. +For numbers in this range with no assigned meaning, the command returns the +value "Unassigned". Several status codes are used only in response to the +methods defined by HTTP extensions such as WebDAV, and not in response to a +HEAD, GET, or POST request method. +.PP +.RS +The "reason phrase" returned by a HTTP server may differ from the recommended +value, without affecting the HTTP protocol. The value returned by +\fB::http::geturl\fR can be obtained by calling either command +\fB::http::responseLine\fR (which returns the full status line) or command +\fB::http::responseInfo\fR (which returns a dictionary, with +the "reason phrase" stored in key \fIreasonPhrase\fR). +.PP +A registry of valid status codes is maintained at +https://www.iana.org/assignments/http-status-codes/http-status-codes.xhtml +.RE +.TP +\fB::http::responseHeaders\fR \fItoken\fR ?\fIheaderName\fR? +. +The response from a HTTP server includes metadata headers that describe the +response body and the transaction itself. +This command returns the HTTP response header names and values, in the +order that they were received from the server, as a Tcl list of the form +?name value ...? Header names are case-insensitive and are converted to lower +case. The return value is not a \fBdict\fR because some header names may occur +more than once, notably \fBSet-Cookie\fR. If the second argument is not +supplied, all response headers are returned. If the second argument is +supplied, it provides the value of a header name. Only headers with the +requested name (converted to lower case) are returned. If no such headers +are found, an empty list is returned. See section \fBMETADATA\fR for more +information. +.TP +\fB::http::responseHeaderValue\fR \fItoken\fR \fIheaderName\fR +. +This command returns the value of the HTTP response header named +\fIheaderName\fR. Header names are case-insensitive and are converted to +lower case. If no such header exists, the return value is the empty string. +If there are multiple headers named \fIheaderName\fR, the result is obtained +by joining the individual values with the string ", " (comma and space), +preserving their order. Multiple headers with the same name may be processed +in this manner, except \fBSet-Cookie\fR which does not conform to the +comma-separated-list syntax and cannot be combined into a single value. +Each \fBSet-Cookie\fR header must be treated individually, e.g. by processing +the return value of \fB::http::responseHeaders\fR \fItoken\fR \fBSet-Cookie\fR. +.TP +\fB::http::responseInfo\fR \fItoken\fR +. +This command returns a \fBdict\fR of selected response metadata that are +essential for identifying a successful transaction and making use of the +response, along with other metadata that are informational. The keys of +the \fBdict\fR are \fIstage\fR, \fIstatus\fR, \fIresponseCode\fR, +\fIreasonPhrase\fR, \fIcontentType\fR, \fIbinary\fR, \fIredirection\fR, +\fIupgrade\fR, \fIerror\fR, \fIpostError\fR, \fImethod\fR, \fIcharset\fR, +\fIcompression\fR, \fIhttpRequest\fR, \fIhttpResponse\fR, \fIurl\fR, +\fIconnectionRequest\fR, \fIconnectionResponse\fR, \fIconnectionActual\fR, +\fItransferEncoding\fR, \fItotalPost\fR, \fIcurrentPost\fR, \fItotalSize\fR, +and \fIcurrentSize\fR. The meaning of these keys is described in the +section \fBMETADATA\fR below. +.RS +.PP +It is always worth checking the value of \fIbinary\fR after a HTTP transaction, +to determine whether a misconfigured server has caused http to interpret a +text resource as a binary, or vice versa. +.PP +After a POST transaction, check the value of \fIpostError\fR to verify that +the request body was uploaded without error. +.RE +.TP +\fB::http::responseBody\fR \fItoken\fR +. +This command returns the entity sent by the HTTP server (unless +\fI-channel\fR was used, in which case the entity was delivered to the +channel, and the command returns the empty string). +.RS +.PP +Other terms for +"entity", with varying precision, include "representation of resource", +"resource", "response body after decoding", "payload", +"message body after decoding", "content(s)", and "file". +.RE +.TP \fB::http::register\fR \fIproto port command\fR . This procedure allows one to provide custom HTTP transport types @@ -558,18 +799,34 @@ registered via \fB::http::register\fR, returning a two-item list of the default port and handler command that was previously installed (via \fB::http::register\fR) if there was such a handler, and an error if there was no such handler. +.TP +\fB::http::code\fR \fItoken\fR +. +An alternative name for the command \fB::http::responseLine\fR +.TP +\fB::http::data\fR \fItoken\fR +. +An alternative name for the command \fB::http::responseBody\fR. +.TP +\fB::http::meta\fR \fItoken\fR ?\fIheaderName\fR? +. +An alternative name for the command \fB::http::responseHeaders\fR +.TP +\fB::http::ncode\fR \fItoken\fR +. +An alternative name for the command \fB::http::responseCode\fR .SH ERRORS The \fB::http::geturl\fR procedure will raise errors in the following cases: invalid command line options, -an invalid URL, -a URL on a non-existent host, -or a URL at a bad port on an existing host. +or an invalid URL. These errors mean that it cannot even start the network transaction. -It will also raise an error if it gets an I/O error while -writing out the HTTP request header. For synchronous \fB::http::geturl\fR calls (where \fB\-command\fR is -not specified), it will raise an error if it gets an I/O error while +not specified), it will raise an error if +the URL is on a non-existent host +or at a bad port on an existing host. +It will also raise an error for any I/O errors while +writing out the HTTP request line and headers, or reading the HTTP reply headers or data. Because \fB::http::geturl\fR does not return a token in these cases, it does all the required cleanup and there is no issue of your app having to call @@ -581,13 +838,12 @@ HTTP reply headers or data, no exception is thrown. This is because after writing the HTTP headers, \fB::http::geturl\fR returns, and the rest of the HTTP transaction occurs in the background. The command callback can check if any error occurred during the read by calling -\fB::http::status\fR to check the status and if its \fIerror\fR, -calling \fB::http::error\fR to get the error message. +\fB::http::responseInfo\fR to check the transaction status. .PP Alternatively, if the main program flow reaches a point where it needs to know the result of the asynchronous HTTP request, it can call \fB::http::wait\fR and then check status and error, just as the -callback does. +synchronous call does. .PP The \fB::http::geturl\fR command runs the \fB\-command\fR, \fB\-handler\fR, and \fB\-proxyfilter\fR callbacks inside a \fBcatch\fR command. Therefore @@ -601,15 +857,17 @@ In any case, you must still call \fB::http::cleanup\fR to delete the state array when you are done. .PP There are other possible results of the HTTP transaction -determined by examining the status from \fB::http::status\fR. +determined by examining the status from \fB::http::status\fR (or the value +of the \fIstatus\fR key in the dictionary returned +by \fB::http::responseInfo\fR). These are described below. .TP \fBok\fR . If the HTTP transaction completes entirely, then status will be \fBok\fR. -However, you should still check the \fB::http::code\fR value to get -the HTTP status. The \fB::http::ncode\fR procedure provides just -the numeric error (e.g., 200, 404 or 500) while the \fB::http::code\fR +However, you should still check the \fB::http::responseLine\fR value to get +the HTTP status. The \fB::http::responseCode\fR procedure provides just +the numeric error (e.g., 200, 404 or 500) while the \fB::http::responseLine\fR procedure returns a value like .QW "HTTP 404 File not found" . .TP @@ -620,147 +878,447 @@ is raised, but the status of the transaction will be \fBeof\fR. .TP \fBerror\fR . -The error message will also be stored in the \fBerror\fR status -array element, accessible via \fB::http::error\fR. +The error message, stack trace, and error code are accessible +via \fB::http::error\fR. The error message is also provided by the value of +the \fIerror\fR key in the dictionary returned by \fB::http::responseInfo\fR. .TP \fBtimeout\fR . -A timeout occurred before the transaction could complete +A timeout occurred before the transaction could complete. .TP \fBreset\fR . -user-reset -.PP -Another error possibility is that \fB::http::geturl\fR is unable to -write all the post query data to the server before the server -responds and closes the socket. -The error message is saved in the \fBposterror\fR status array -element and then \fB::http::geturl\fR attempts to complete the -transaction. -If it can read the server's response -it will end up with an \fBok\fR status, otherwise it will have -an \fBeof\fR status. +The user has called \fB::http::reset\fR. +.TP +\fB""\fR +. +(empty string) The transaction has not yet finished. +.PP +Another error possibility is that \fB::http::geturl\fR failed to +write the whole of the POST request body (\fB-query\fR or \fB-querychannel\fR +data) to the server. \fB::http::geturl\fR stores the error message for later +retrieval by the \fB::http::postError\fR or \fB::http::responseInfo\fR +commands, and then attempts to complete the transaction. +If it can read the server's response the status will be \fBok\fR, but it is +important to call \fB::http::postError\fR or \fB::http::responseInfo\fR after +every POST to check that the data was sent in full. +If the server has closed the connection the status will be \fBeof\fR. +.SH "METADATA" +.PP +.SS "MOST USEFUL METADATA" +When a HTTP server responds to a request, it supplies not only the entity +requested, but also metadata. This is provided by the first line (the +"status line") of the response, and by a number of HTTP headers. Further +metadata relates to how \fB::http::geturl\fR has processed the response +from the server. +.PP +The most important metadata can be accessed with the command +\fB::http::responseInfo\fR. +This command returns a \fBdict\fR of metadata that are essential for +identifying a successful transaction and making use of the response, +along with other metadata that are informational. The keys of +the \fBdict\fR are: +.PP +.RS +.RS +\fB===== Essential Values =====\fR +.RE +.RE +.TP +\fBstage\fR +. +This value, set by \fB::http::geturl\fR, describes the stage that the +transaction has reached. Values, in order of the transaction lifecycle, +are: "created", "connecting", "header", "body", and "complete". The +other \fBdict\fR keys will not be available until the value of \fBstage\fR +is "body" or "complete". The key \fBcurrentSize\fR has its final value only +when \fBstage\fR is "complete". +.TP +\fBstatus\fR +. +This value, set by \fB::http::geturl\fR, is "ok" for a successful transaction; +"eof", "error", "timeout", or "reset" for an unsuccessful transaction; or "" +if the transaction is still in progress. The value is the same as that +returned by command \fB::http::status\fR. The meaning of these values is +described in the section \fBERRORS\fR (above). +.TP +\fBresponseCode\fR +. +The "HTTP status code" sent by the server in the first line (the "status line") +of the response. If the value cannot be extracted from the status line, the +full status line is returned. +.TP +\fBreasonPhrase\fR +. +The "reason phrase" sent by the server as a description of the HTTP status code. +If the value cannot be extracted from the status line, the full status +line is returned. +.TP +\fBcontentType\fR +. +The value of the \fBContent-Type\fR response header or, if the header was not +supplied, the default value "application/octet-stream". +.TP +\fBbinary\fR +. +This boolean value, set by \fB::http::geturl\fR, describes how the command +has interpreted the entity returned by the server (after decoding any +compression specified by the \fBContent-Encoding\fR response header). +This decoded entity is accessible as the return value of the +command \fB::http::responseBody\fR. +.PP +.RS +The value is \fBtrue\fR if http has interpreted the decoded entity as binary. +The value returned by \fB::http::responseBody\fR is a Tcl binary string. +This is a suitable format for image data, zip files, etc. +\fB::http::geturl\fR chooses this value if the user has requested a binary +interpretation by passing the option \fI\-binary\fR to the command, or if the +server has supplied a binary content type in a \fBContent-Type\fR response +header, or if the server has not supplied any \fBContent-Type\fR header. +.PP +The value is \fBfalse\fR in other cases, and this means that http has +interpreted the decoded entity as text. The text has been converted, from the +character set notified by the server, into Tcl's internal Unicode format; +the value returned by \fB::http::responseBody\fR is an ordinary Tcl string. +.PP +It is always worth checking the value of "binary" after a HTTP transaction, +to determine whether a misconfigured server has caused http to interpret a +text resource as a binary, or vice versa. +.RE +.TP +\fBredirection\fR +. +The URL that is the redirection target. The value is that of the \fBLocation\fR +response header. This header is sent when a response has status code +3XX (redirection). +.TP +\fBupgrade\fR +. +If not empty, the value indicates the protocol(s) to which the server will +switch after completion of this transaction, while continuing to use the +same connection. When the server intends to switch protocols, it will also +send the value "101" as the status code (the \fBresponseCode\fR key), and the +word "upgrade" as an element of the \fBConnection\fR response header (the +\fBconnectionResponse\fR key), and it will not send a response body. +See the section \fBPROTOCOL UPGRADES\fR for more information. +.TP +\fBerror\fR +. +The error message, if there is one. Further information, including a stack +trace and error code, are available from command \fB::http::error\fR. +.TP +\fBpostError\fR +. +The error message (if any) generated when a HTTP POST request sends its +request-body to the server. Further information, including a stack trace +and error code, are available from command \fB::http::postError\fR. A POST +transaction may appear complete, according to the +keys \fBstage\fR, \fBstatus\fR, and \fBresponseCode\fR, but it is important +to check this \fBpostError\fR key in case an error occurred when uploading +the request-body. +.PP +.RS +.RS +\fB===== Informational Values =====\fR +.RE +.RE +.TP +\fBmethod\fR +. +The HTTP method used in the request. +.TP +\fBcharset\fR +. +The value of the charset attribute of the \fBContent-Type\fR response header. +The charset value is used only for a text resource. If the server did not +specify a charset, the value defaults to that of the +variable \fB::http::defaultCharset\fR, which unless it has been deliberately +modified by the caller is \fBiso8859-1\fR. Incoming text data is automatically +converted from the character set defined by \fBcharset\fR to Tcl's internal +Unicode representation, i.e. to a Tcl string. +.TP +\fBcompression\fR +. +A copy of the \fBContent-Encoding\fR response-header value. +.TP +\fBhttpRequest\fR +. +The version of HTTP specified in the request (i.e. sent in the request line). +The value is that of the option \fB\-protocol\fR supplied +to \fB::http::geturl\fR (default value "1.1"), unless the command reduced the +value to "1.0" because it was passed the \fB\-handler\fR option. +.TP +\fBhttpResponse\fR +. +The version of HTTP used by the server (obtained from the response +"status line"). The server uses this version of HTTP in its response, but +ensures that this response is compatible with the HTTP version specified in the +client's request. If the value cannot be extracted from the status line, the +full status line is returned. +.TP +\fBurl\fR +. +The requested URL, typically the URL supplied as an argument +to \fB::http::geturl\fR but without its "fragment" (the final part of the URL +beginning with "#"). +.TP +\fBconnectionRequest\fR +. +The value, if any, sent to the server in \fBConnection\fR request header(s). +.TP +\fBconnectionResponse\fR +. +The value, if any, received from the server in \fBConnection\fR response +header(s). +.TP +\fBconnectionActual\fR +. +This value, set by \fB::http::geturl\fR, reports whether the connection was +closed after the transaction (value "close"), or left open (value "keep-alive"). +.TP +\fBtransferEncoding\fR +. +The value of the Transfer-Encoding response header, if it is present. +The value is either "chunked" (indicating HTTP/1.1 "chunked encoding") or +the empty string. +.TP +\fBtotalPost\fR +. +The total length of the request body in a POST request. +.TP +\fBcurrentPost\fR +. +The number of bytes of the POST request body sent to the server so far. +The value is the same as that returned by command \fB::http::size\fR. +.TP +\fBtotalSize\fR +. +A copy of the \fBContent-Length\fR response-header value. +The number of bytes specified in a \fBContent-Length\fR header, if one +was sent. If none was sent, the value is 0. A correctly configured server +omits this header if the transfer-encoding is "chunked", or (for older +servers) if the server closes the connection when it reaches the end of +the resource. +.TP +\fBcurrentSize\fR +. +The number of bytes fetched from the server so far. +.PP +.SS "MORE METADATA" +The dictionary returned by \fB::http::responseInfo\fR is the most useful +subset of the available metadata. Other metadata include: +.PP +1. The full "status line" of the response, available as the return value +of command \fB::http::responseLine\fR. +.PP +2. The full response headers, available as the return value of +command \fB::http::responseHeaders\fR. This return value is a list of the +response-header names and values, in the order that they were received from +the server. +.PP +The return value is not a \fBdict\fR because some header names may +occur more than once, notably \fBSet-Cookie\fR. If the value is read +into a \fBdict\fR or into an array (using array set), only the last header +with each name will be preserved. +.PP +.RS +Some of the header names (metadata keys) are listed below, but the HTTP +standard defines several more, and servers are free to add their own. +When a dictionary key is mentioned below, this refers to the \fBdict\fR +value returned by command \fB::http::responseInfo\fR. +.TP +\fBContent-Type\fR +. +The content type of the URL contents. Examples include \fBtext/html\fR, +\fBimage/gif,\fR \fBapplication/postscript\fR and +\fBapplication/x-tcl\fR. Text values typically specify a character set, e.g. +\fBtext/html; charset=UTF-8\fR. Dictionary key \fIcontentType\fR. +.TP +\fBContent-Length\fR +. +The advertised size in bytes of the contents, available as dictionary +key \fItotalSize\fR. The actual number of bytes read by \fB::http::geturl\fR +so far is available as dictionary key \fBcurrentSize\fR. +.TP +\fBContent-Encoding\fR +. +The compression algorithm used for the contents. +Examples include \fBgzip\fR, \fBdeflate\fR. +Dictionary key \fIcontent\fR. +.TP +\fBLocation\fR +. +This header is sent when a response has status code 3XX (redirection). +It provides the URL that is the redirection target. +Dictionary key \fIredirection\fR. +.TP +\fBSet-Cookie\fR +. +This header is sent to offer a cookie to the client. Cookie management is +done by the \fB::http::config\fR option \fI\-cookiejar\fR, and so +the \fBSet-Cookie\fR headers need not be parsed by user scripts. +See section \fBCOOKIE JAR PROTOCOL\fR. +.TP +\fBConnection\fR +. +The value can be supplied as a comma-separated list, or by multiple headers. +The list often has only one element, either "close" or "keep-alive". +The value "upgrade" indicates a successful upgrade request and is typically +combined with the status code 101, an \fBUpgrade\fR response header, and no +response body. Dictionary key \fIconnectionResponse\fR. +.TP +\fBUpgrade\fR +. +The value indicates the protocol(s) to which the server will switch +immediately after the empty line that terminates the 101 response headers. +Dictionary key \fIupgrade\fR. +.RE +.PP +.SS "EVEN MORE METADATA" +.PP +1. Details of the HTTP request. The request is determined by the options +supplied to \fB::http::geturl\fR and \fB::http::config\fR. However, it is +sometimes helpful to examine what \fB::http::geturl\fR actually sent to the +server, and this information is available through +commands \fB::http::requestHeaders\fR and \fB::http::requestLine\fR. +.PP +2. The state array: the internal variables of \fB::http::geturl\fR. +It may sometimes be helpful to examine this array. +Details are given in the next section. .SH "STATE ARRAY" -The \fB::http::geturl\fR procedure returns a \fItoken\fR that can be used to -get to the state of the HTTP transaction in the form of a Tcl array. -Use this construct to create an easy-to-use array variable: +The \fB::http::geturl\fR procedure returns a \fItoken\fR that can be used +as an argument to other \fB::http::*\fR commands, which examine and manage +the state of the HTTP transaction. For most purposes these commands are +sufficient. The \fItoken\fR can also be used to access +the internal state of the transaction, which is stored in a Tcl array. +This facility is most useful when writing callback commands for the +options \fB\-command\fR, \fB\-handler\fR, \fB\-progress\fR, +or \fB\-queryprogress\fR. +Use the following command inside the proc to define an easy-to-use +array \fIstate\fR as a local variable within the proc .PP .CS -upvar #0 $token state +upvar 0 $token state .CE .PP Once the data associated with the URL is no longer needed, the state array should be unset to free up storage. The \fB::http::cleanup\fR procedure is provided for that purpose. -The following elements of -the array are supported: +.PP +The following elements of the array are supported, and are the origin of the +values returned by commands as described below. When a dictionary key is +mentioned below, this refers to the \fBdict\fR value returned by +command \fB::http::responseInfo\fR. .RS .TP \fBbinary\fR . -This is boolean \fBtrue\fR if (after decoding any compression specified -by the -.QW "Content-Encoding" -response header) the HTTP response is binary. It is boolean \fBfalse\fR -if the HTTP response is text. +For dictionary key \fIbinary\fR. .TP \fBbody\fR . -The contents of the URL. This will be empty if the \fB\-channel\fR -option has been specified. This value is returned by the \fB::http::data\fR -command. +For command \fB::http::responseBody\fR. .TP \fBcharset\fR . -The value of the charset attribute from the \fBContent-Type\fR meta-data -value. If none was specified, this defaults to the RFC standard -\fBiso8859-1\fR, or the value of \fB$::http::defaultCharset\fR. Incoming -text data will be automatically converted from this charset to utf-8. +For dictionary key \fIcharset\fR. .TP \fBcoding\fR . -A copy of the \fBContent-Encoding\fR meta-data value. +For dictionary key \fIcompression\fR. +.TP +\fBconnection\fR +. +For dictionary key \fIconnectionActual\fR. .TP \fBcurrentsize\fR . -The current number of bytes fetched from the URL. -This value is returned by the \fB::http::size\fR command. +For command \fB::http::size\fR; and for dictionary key \fIcurrentSize\fR. .TP \fBerror\fR . -If defined, this is the error string seen when the HTTP transaction -was aborted. +For command \fB::http::error\fR; part is used in dictionary key \fIerror\fR. .TP \fBhttp\fR . -The HTTP status reply from the server. This value -is returned by the \fB::http::code\fR command. The format of this value is: -.RS -.PP -.CS -\fIHTTP/1.1 code string\fR -.CE -.PP -The \fIcode\fR is a three-digit number defined in the HTTP standard. -A code of 200 is OK. Codes beginning with 4 or 5 indicate errors. -Codes beginning with 3 are redirection errors. In this case the -\fBLocation\fR meta-data specifies a new URL that contains the -requested information. -.RE +For command \fB::http::responseLine\fR. +.TP +\fBhttpResponse\fR +. +For dictionary key \fIhttpResponse\fR. .TP \fBmeta\fR . -The HTTP protocol returns meta-data that describes the URL contents. -The \fBmeta\fR element of the state array is a list of the keys and -values of the meta-data. This is in a format useful for initializing -an array that just contains the meta-data: -.RS -.PP -.CS -array set meta $state(meta) -.CE -.PP -Some of the meta-data keys are listed below, but the HTTP standard defines -more, and servers are free to add their own. +For command \fB::http::responseHeaders\fR. Further discussion above in the +section \fBMORE METADATA\fR. .TP -\fBContent-Type\fR +\fBmethod\fR . -The type of the URL contents. Examples include \fBtext/html\fR, -\fBimage/gif,\fR \fBapplication/postscript\fR and -\fBapplication/x-tcl\fR. +For dictionary key \fImethod\fR. .TP -\fBContent-Length\fR +\fBposterror\fR . -The advertised size of the contents. The actual size obtained by -\fB::http::geturl\fR is available as \fBstate(currentsize)\fR. +For dictionary key \fIpostError\fR. .TP -\fBLocation\fR +\fBpostErrorFull\fR . -An alternate URL that contains the requested data. -.RE +For command \fB::http::postError\fR. .TP -\fBposterror\fR +\fB\-protocol\fR +. +For dictionary key \fIhttpRequest\fR. +.TP +\fBquerylength\fR +. +For dictionary key \fItotalPost\fR. +.TP +\fBqueryoffset\fR . -The error, if any, that occurred while writing -the post query data to the server. +For dictionary key \fIcurrentPost\fR. +.TP +\fBreasonPhrase\fR +. +For dictionary key \fIreasonPhrase\fR. +.TP +\fBrequestHeaders\fR +. +For command \fB::http::requestHeaders\fR. +.TP +\fBrequestLine\fR +. +For command \fB::http::requestLine\fR. +.TP +\fBresponseCode\fR +. +For dictionary key \fIresponseCode\fR. +.TP +\fBstate\fR +. +For dictionary key \fIstage\fR. .TP \fBstatus\fR . -See description in the chapter \fBERRORS\fR above for a -list and description of \fBstatus\fR. -During the transaction this value is the empty string. +For command \fB::http::status\fR; and for dictionary key \fIstatus\fR. .TP \fBtotalsize\fR . -A copy of the \fBContent-Length\fR meta-data value. +For dictionary key \fItotalSize\fR. +.TP +\fBtransfer\fR +. +For dictionary key \fItransferEncoding\fR. .TP \fBtype\fR . -A copy of the \fBContent-Type\fR meta-data value. +For dictionary key \fIcontentType\fR. +.TP +\fBupgrade\fR +. +For dictionary key \fIupgrade\fR. .TP \fBurl\fR . -The requested URL. +For dictionary key \fIurl\fR. .RE .SH "PERSISTENT CONNECTIONS" .PP @@ -859,7 +1417,7 @@ that fails because it uses a persistent connection that the server has half-closed (an .QW "asynchronous close event" ). Subsequent GET and HEAD requests in a failed pipeline will also be retried. -\fIThe \-repost option should be used only if the application understands +\fIThe \fB\-repost\fI option should be used only if the application understands that the retry is appropriate\fR - specifically, the application must know that if the failed POST successfully modified the state of the server, a repeat POST would have no adverse effect. @@ -967,22 +1525,25 @@ Other keys may always be ignored; they have no meaning in this protocol. .VE TIP406 .SH "PROTOCOL UPGRADES" .PP -The HTTP/1.1 \fBConnection\fR and \fBUpgrade\fR client headers inform the server -that the client wishes to change the protocol used over the existing connection -(RFC 7230). This mechanism can be used to request a WebSocket (RFC 6455), a +The HTTP/1.1 \fBConnection\fR and \fBUpgrade\fR request headers inform the +server that the client wishes to change the protocol used over the existing +connection (RFC 7230). +This mechanism can be used to request a WebSocket (RFC 6455), a higher version of the HTTP protocol (HTTP 2), or TLS encryption. If the server accepts the upgrade request, its response code will be 101. .PP -To request a protocol upgrade when calling \fBhttp::geturl\fR, the \fB\-headers\fR -option must supply appropriate values for \fBConnection\fR and \fBUpgrade\fR, and +To request a protocol upgrade when calling \fBhttp::geturl\fR, +the \fB\-headers\fR option must supply appropriate values for \fBConnection\fR +and \fBUpgrade\fR, and the \fB\-command\fR option must supply a command that implements the requested protocol and can also handle the server response if the server refuses the protocol upgrade. For upgrade requests \fBhttp::geturl\fR ignores the value of option \fB\-keepalive\fR, and always uses the value \fB0\fR so that the upgrade -request is not made over a connection that is intended for multiple HTTP requests. +request is not made over a connection that is intended for multiple HTTP +requests. .PP -The Tcllib library \fBwebsocket\fR implements WebSockets, and makes the necessary -calls to commands in the \fBhttp\fR package. +The Tcllib library \fBwebsocket\fR implements WebSockets, and makes the +necessary calls to commands in the \fBhttp\fR package. .PP There is currently no native Tcl client library for HTTP/2. .PP @@ -993,30 +1554,59 @@ protocols such as Internet Printing Protocol (IPP) that are built on top of traffic. .PP In browsers, opportunistic encryption is instead implemented by the -\fBUpgrade-Insecure-Requests\fR client header. If a secure service is available, -the server response code is a 307 redirect, and the response header -\fBLocation\fR specifies the target URL. The browser must call \fBhttp::geturl\fR -again in order to fetch this URL. +\fBUpgrade-Insecure-Requests\fR client header. If a secure service is +available, the server response code is a 307 redirect, and the response header +\fBLocation\fR specifies the target URL. The browser must +call \fBhttp::geturl\fR again in order to fetch this URL. See https://w3c.github.io/webappsec-upgrade-insecure-requests/ .PP .SH THREADS .PP .SS "PURPOSE" .PP -Command \fB::http::geturl\fR uses the Tcl \fB::socket\fR command with the \-async option to connect to a remote server, but the return from this command can be delayed in adverse cases (e.g. a slow DNS lookup), preventing the event loop from processing other events. This delay is avoided if the \fB::socket\fR command is evaluated in another thread. The Thread package is not part of Tcl but is provided in "Batteries Included" distributions. Instead of the \fB::socket\fR command, the http package uses \fB::http::socket\fR which makes connections in the manner specified by the value of \-threadlevel and the availability of package Thread. +Command \fB::http::geturl\fR uses the Tcl \fB::socket\fR command with +the \fI\-async\fR option to connect to a remote server, but the return from +this command can be delayed in adverse cases (e.g. a slow DNS lookup), +preventing the event loop from processing other events. +This delay is avoided if the \fB::socket\fR command is evaluated in another +thread. The Thread package is not part of Tcl but is provided in +"Batteries Included" distributions. Instead of the \fB::socket\fR command, +the http package uses \fB::http::socket\fR which makes connections in the +manner specified by the value of \fI\-threadlevel\fR and the availability +of package Thread. .PP .SS "WITH TLS (HTTPS)" .PP -The same \-threadlevel configuration applies to both HTTP and HTTPS connections. HTTPS is enabled by using the \fBhttp::register\fR command, typically by specifying the \fB::tls::socket\fR command of the tls package to handle TLS cryptography. The \fB::tls::socket\fR command connects to the remote server by using the command specified by the value of variable \fB::tls::socketCmd\fR, and this value defaults to "::socket". If http::geturl finds that \fB::tls::socketCmd\fR has this value, it replaces it with the value "::http::socket". If \fB::tls::socketCmd\fR has a value other than "::socket", i.e. if the script or the Tcl installation has replaced the value "::socket" with the name of a different command, then http does not change the value. The script or installation that modified \fB::tls::socketCmd\fR is responsible for integrating \fB::http::socket\fR into its own replacement command. +The same \fI\-threadlevel\fR configuration applies to both HTTP and HTTPS +connections. +HTTPS is enabled by using the \fBhttp::register\fR command, typically by +specifying the \fB::tls::socket\fR command of the tls package to handle TLS +cryptography. The \fB::tls::socket\fR command connects to the remote server by +using the command specified by the value of variable \fB::tls::socketCmd\fR, and +this value defaults to "::socket". If http::geturl finds +that \fB::tls::socketCmd\fR has this value, it replaces it with the value +"::http::socket". If \fB::tls::socketCmd\fR has a value other than "::socket", +i.e. if the script or the Tcl installation has replaced the value "::socket" +with the name of a different command, then http does not change the value. +The script or installation that modified \fB::tls::socketCmd\fR is responsible +for integrating \fR::http::socket\fR into its own replacement command. .PP .SS "WITH A CHILD INTERPRETER" .PP -The peer thread can transfer the socket only to the main interpreter of the script's thread. Therefore the thread-based \fB::http::socket\fR works with non-zero \-threadlevel values only if the script runs in the main interpreter. A child interpreter must use \-threadlevel 0 unless the parent interpreter has provided alternative facilities. The main parent interpreter may grant full \-threadlevel facilities to a child interpreter, for example by aliasing, to \fB::http::socket\fR in the child, a command that runs \fBhttp::socket\fR in the parent, and then transfers the socket to the child. +The peer thread can transfer the socket only to the main interpreter of the +script's thread. Therefore the thread-based \fB::http::socket\fR works with +non-zero \fI\-threadlevel\fR values only if the script runs in the main +interpreter. A child interpreter must use \fI\-threadlevel 0\fR unless the +parent interpreter has provided alternative facilities. The main parent +interpreter may grant full \fI\-threadlevel\fR facilities to a child +interpreter, for example by aliasing, to \fB::http::socket\fR in the child, +a command that runs \fBhttp::socket\fR in the parent, and then transfers +the socket to the child. .PP .SH EXAMPLE .PP This example creates a procedure to copy a URL to a file while printing a -progress meter, and prints the meta-data associated with the URL. +progress meter, and prints the response headers associated with the URL. .PP .CS proc httpcopy { url file {chunk 4096} } { @@ -1028,7 +1618,7 @@ proc httpcopy { url file {chunk 4096} } { # This ends the line started by httpCopyProgress puts stderr "" - upvar #0 $token state + upvar 0 $token state set max 0 foreach {name value} $state(meta) { if {[string length $name] > $max} { diff --git a/library/http/http.tcl b/library/http/http.tcl index 3f4da2e..326aede 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -151,13 +151,88 @@ namespace eval http { variable TmpSockCounter 0 variable ThreadCounter 0 - namespace export geturl config reset wait formatQuery quoteString + variable reasonDict [dict create {*}{ + 100 Continue + 101 {Switching Protocols} + 102 Processing + 103 {Early Hints} + 200 OK + 201 Created + 202 Accepted + 203 {Non-Authoritative Information} + 204 {No Content} + 205 {Reset Content} + 206 {Partial Content} + 207 Multi-Status + 208 {Already Reported} + 226 {IM Used} + 300 {Multiple Choices} + 301 {Moved Permanently} + 302 Found + 303 {See Other} + 304 {Not Modified} + 305 {Use Proxy} + 306 (Unused) + 307 {Temporary Redirect} + 308 {Permanent Redirect} + 400 {Bad Request} + 401 Unauthorized + 402 {Payment Required} + 403 Forbidden + 404 {Not Found} + 405 {Method Not Allowed} + 406 {Not Acceptable} + 407 {Proxy Authentication Required} + 408 {Request Timeout} + 409 Conflict + 410 Gone + 411 {Length Required} + 412 {Precondition Failed} + 413 {Content Too Large} + 414 {URI Too Long} + 415 {Unsupported Media Type} + 416 {Range Not Satisfiable} + 417 {Expectation Failed} + 418 (Unused) + 421 {Misdirected Request} + 422 {Unprocessable Content} + 423 Locked + 424 {Failed Dependency} + 425 {Too Early} + 426 {Upgrade Required} + 428 {Precondition Required} + 429 {Too Many Requests} + 431 {Request Header Fields Too Large} + 451 {Unavailable For Legal Reasons} + 500 {Internal Server Error} + 501 {Not Implemented} + 502 {Bad Gateway} + 503 {Service Unavailable} + 504 {Gateway Timeout} + 505 {HTTP Version Not Supported} + 506 {Variant Also Negotiates} + 507 {Insufficient Storage} + 508 {Loop Detected} + 510 {Not Extended (OBSOLETED)} + 511 {Network Authentication Required} + }] + + namespace export geturl config reset wait formatQuery postError quoteString namespace export register unregister registerError - # - Useful, but not exported: data, size, status, code, cleanup, error, - # meta, ncode, mapReply, init. Comments suggest that "init" can be used - # for re-initialisation, although the command is undocumented. - # - Not exported, probably should be upper-case initial letter as part - # of the internals: getTextLine, make-transformation-chunked. + namespace export requestLine requestHeaders requestHeaderValue + namespace export responseLine responseHeaders responseHeaderValue + namespace export responseCode responseBody responseInfo reasonPhrase + # - Legacy aliases, were never exported: + # data, code, mapReply, meta, ncode + # - Callable from outside (e.g. from TLS) by fully-qualified name, but + # not exported: + # socket + # - Useful, but never exported (and likely to have naming collisions): + # size, status, cleanup, error, init + # Comments suggest that "init" can be used for re-initialisation, + # although the command is undocumented. + # - Never exported, renamed from lower-case names: + # GetTextLine, MakeTransformationChunked. } # http::Log -- @@ -251,6 +326,33 @@ proc http::config {args} { } } +# ------------------------------------------------------------------------------ +# Proc http::reasonPhrase +# ------------------------------------------------------------------------------ +# Command to return the IANA-recommended "reason phrase" for a HTTP Status Code. +# Information obtained from: +# https://www.iana.org/assignments/http-status-codes/http-status-codes.xhtml +# +# Arguments: +# code - A valid HTTP Status Code (integer from 100 to 599) +# +# Return Value: the reason phrase +# ------------------------------------------------------------------------------ + +proc http::reasonPhrase {code} { + variable reasonDict + if {![regexp -- {^[1-5][0-9][0-9]$} $code]} { + set msg {argument must be a three-digit integer from 100 to 599} + return -code error $msg + } + if {[dict exists $reasonDict $code]} { + set reason [dict get $reasonDict $code] + } else { + set reason Unassigned + } + return $reason +} + # http::Finish -- # # Clean up the socket and eval close time callbacks @@ -368,7 +470,7 @@ proc http::Finish {token {errormsg ""} {skipCB 0}} { if {[info exists state(-command)] && (!$skipCB) && (![info exists state(done-command-cb)])} { set state(done-command-cb) yes - if {[catch {eval $state(-command) {$token}} err] && $errormsg eq ""} { + if {[catch {namespace eval :: $state(-command) $token} err] && $errormsg eq ""} { set state(error) [list $err $errorInfo $errorCode] set state(status) error } @@ -899,6 +1001,7 @@ proc http::CreateToken {url args} { -type application/x-www-form-urlencoded -queryprogress {} -protocol 1.1 + -guesstype 0 binary 0 state created meta {} @@ -908,12 +1011,18 @@ proc http::CreateToken {url args} { totalsize 0 querylength 0 queryoffset 0 - type text/html + type application/octet-stream body {} status "" http "" + httpResponse {} + responseCode {} + reasonPhrase {} connection keep-alive tid {} + requestHeaders {} + requestLine {} + transfer {} } set state(-keepalive) $defaultKeepalive set state(-strict) $strict @@ -921,6 +1030,7 @@ proc http::CreateToken {url args} { array set type { -binary boolean -blocksize integer + -guesstype boolean -queryblocksize integer -strict boolean -timeout integer @@ -929,7 +1039,7 @@ proc http::CreateToken {url args} { } set state(charset) $defaultCharset set options { - -binary -blocksize -channel -command -handler -headers -keepalive + -binary -blocksize -channel -command -guesstype -handler -headers -keepalive -method -myaddr -progress -protocol -query -queryblocksize -querychannel -queryprogress -strict -timeout -type -validate } @@ -1001,6 +1111,9 @@ proc http::CreateToken {url args} { # Note that the RE actually combines the user and password parts, as # recommended in RFC 3986. Indeed, that RFC states that putting passwords # in URLs is a Really Bad Idea, something with which I would agree utterly. + # RFC 9110 Sec 4.2.4 goes further than this, and deprecates the format + # "user:password@". It is retained here for backward compatibility, + # but its use is not recommended. # # From a validation perspective, we need to ensure that the parts of the # URL that are going to the server are correctly encoded. This is only @@ -1538,37 +1651,18 @@ proc http::OpenSocket {token DoLater} { dict unset socketCoEvent($state(socketinfo)) $token unset -nocomplain state(socketcoro) - set reusing $state(reusing) + if {[catch { + if {$state(reusing)} { + # If ($state(reusing)) is true, then we do not need to create a new + # socket, even if $sockOld is only a placeholder for a socket. + set sock $sockOld + } else { + # set sock in the [catch] below. + set pre [clock milliseconds] + ##Log pre socket opened, - token $token + ##Log $state(openCmd) - token $token + set sock [namespace eval :: $state(openCmd)] - if {$reusing} { - # If ($reusing) is true, then we do not need to create a new socket, - # even if $sockOld is only a placeholder for a socket. - set sock $sockOld - } else { - # set sock in the [catch] below. - set pre [clock milliseconds] - ##Log pre socket opened, - token $token - ##Log $state(openCmd) - token $token - if {[catch {eval $state(openCmd)} sock errdict]} { - # ERROR CASE - # Something went wrong while trying to establish the connection. - # Tidy up after events and such, but DON'T call the command - # callback (if available). - # - When this was inline code in http::geturl, it threw an exception - # from here instead. - # - Now that this code is called from geturl as an idletask and not - # as inline code, it is inappropriate to run cleanup or throw an - # exception. Instead do a normal return, and let Finish report - # the error using token/state and the -command callback. - # Finish also undoes PreparePersistentConnection. - - set state(sock) NONE - set ::errorInfo [dict get $errdict -errorinfo] - set ::errorCode [dict get $errdict -errorcode] - Finish $token $sock - # cleanup $token - return - } else { # Normal return from $state(openCmd) always returns a valid socket. # Initialisation of a new socket. ##Log post socket opened, - token $token @@ -1581,15 +1675,16 @@ proc http::OpenSocket {token DoLater} { fconfigure $sock -translation {auto crlf} \ -buffersize $state(-blocksize) ##Log socket opened, DONE fconfigure - token $token - } - } - - Log "Using $sock for $state(socketinfo) - token $token" \ - [expr {$state(-keepalive)?"keepalive":""}] + } - # Code above has set state(sock) $sock - ConfigureNewSocket $token $sockOld $DoLater + Log "Using $sock for $state(socketinfo) - token $token" \ + [expr {$state(-keepalive)?"keepalive":""}] + # Code above has set state(sock) $sock + ConfigureNewSocket $token $sockOld $DoLater + } result errdict]} { + Finish $token $result + } ##Log Leaving http::OpenSocket coroutine [info coroutine] - token $token return } @@ -1880,6 +1975,30 @@ proc http::ScheduleRequest {token} { } +# ------------------------------------------------------------------------------ +# Proc http::SendHeader +# ------------------------------------------------------------------------------ +# Command to send a request header, and keep a copy in state(requestHeaders) +# for debugging purposes. +# +# Arguments: +# token - connection token (name of an array) +# key - header name +# value - header value +# +# Return Value: none +# ------------------------------------------------------------------------------ + +proc http::SendHeader {token key value} { + variable $token + upvar 0 $token state + set tk [namespace tail $token] + set sock $state(sock) + lappend state(requestHeaders) [string tolower $key] $value + puts $sock "$key: $value" + return +} + # http::Connected -- # # Callback used when the connection to the HTTP server is actually @@ -1964,29 +2083,31 @@ proc http::Connected {token proto phost srvurl} { if {[catch { set state(method) $how - puts $sock "$how $srvurl HTTP/$state(-protocol)" + set state(requestHeaders) {} + set state(requestLine) "$how $srvurl HTTP/$state(-protocol)" + puts $sock $state(requestLine) set hostValue [GetFieldValue $state(-headers) Host] if {$hostValue ne {}} { # Allow Host spoofing. [Bug 928154] regexp {^[^:]+} $hostValue state(host) - puts $sock "Host: $hostValue" + SendHeader $token Host $hostValue } elseif {$port == $defport} { # Don't add port in this case, to handle broken servers. [Bug # #504508] set state(host) $host - puts $sock "Host: $host" + SendHeader $token Host $host } else { set state(host) $host - puts $sock "Host: $host:$port" + SendHeader $token Host "$host:$port" } - puts $sock "User-Agent: $http(-useragent)" + SendHeader $token User-Agent $http(-useragent) if {($state(-protocol) > 1.0) && $state(-keepalive)} { # Send this header, because a 1.1 server is not compelled to treat # this as the default. - puts $sock "Connection: keep-alive" + SendHeader $token Connection keep-alive } if {($state(-protocol) > 1.0) && !$state(-keepalive)} { - puts $sock "Connection: close" ;# RFC2616 sec 8.1.2.1 + SendHeader $token Connection close ;# RFC2616 sec 8.1.2.1 } if {($state(-protocol) < 1.1)} { # RFC7230 A.1 @@ -1995,7 +2116,7 @@ proc http::Connected {token proto phost srvurl} { # Don't leave this to chance. # For HTTP/1.0 we have already "set state(connection) close" # and "state(-keepalive) 0". - puts $sock "Connection: close" + SendHeader $token Connection close } # RFC7230 A.1 - "clients are encouraged not to send the # Proxy-Connection header field in any requests" @@ -2021,19 +2142,22 @@ proc http::Connected {token proto phost srvurl} { set state(querylength) $value } if {[string length $key]} { - puts $sock "$key: $value" + SendHeader $token $key $value } } # Allow overriding the Accept header on a per-connection basis. Useful # for working with REST services. [Bug c11a51c482] if {!$accept_types_seen} { - puts $sock "Accept: $state(accept-types)" + SendHeader $token Accept $state(accept-types) } if { (!$accept_encoding_seen) && (![info exists state(-handler)]) && $http(-zip) } { - puts $sock "Accept-Encoding: gzip,deflate,compress" + SendHeader $token Accept-Encoding gzip,deflate + } elseif {!$accept_encoding_seen} { + SendHeader $token Accept-Encoding identity + } else { } if {$isQueryChannel && ($state(querylength) == 0)} { # Try to determine size of data in channel. If we cannot seek, the @@ -2058,7 +2182,7 @@ proc http::Connected {token proto phost srvurl} { set separator "; " } if {$cookies ne ""} { - puts $sock "Cookie: $cookies" + SendHeader $token Cookie $cookies } } @@ -2082,10 +2206,10 @@ proc http::Connected {token proto phost srvurl} { if {$isQuery || $isQueryChannel} { # POST method. if {!$content_type_seen} { - puts $sock "Content-Type: $state(-type)" + SendHeader $token Content-Type $state(-type) } if {!$contDone} { - puts $sock "Content-Length: $state(querylength)" + SendHeader $token Content-Length $state(querylength) } puts $sock "" flush $sock @@ -2312,7 +2436,7 @@ proc http::EventGateway {sock token} { # http::reset or http::cleanup, or if the caller set option -channel # but not option -handler: in the last case reading from the socket is # now managed by commands ::http::Copy*, http::ReceiveChunked, and - # http::make-transformation-chunked. + # http::MakeTransformationChunked. # # Catch in case the coroutine has closed the socket. catch {fileevent $sock readable [list http::EventGateway $sock $token]} @@ -2837,7 +2961,7 @@ proc http::ReplayCore {newQueue} { # Code - the HTTP transaction code, e.g., 200 # Size - the size of the URL data -proc http::data {token} { +proc http::responseBody {token} { variable $token upvar 0 $token state return $state(body) @@ -2850,12 +2974,17 @@ proc http::status {token} { upvar 0 $token state return $state(status) } -proc http::code {token} { +proc http::responseLine {token} { variable $token upvar 0 $token state return $state(http) } -proc http::ncode {token} { +proc http::requestLine {token} { + variable $token + upvar 0 $token state + return $state(requestLine) +} +proc http::responseCode {token} { variable $token upvar 0 $token state if {[regexp {[0-9]{3}} $state(http) numeric_code]} { @@ -2869,10 +2998,133 @@ proc http::size {token} { upvar 0 $token state return $state(currentsize) } -proc http::meta {token} { +proc http::requestHeaders {token args} { + set lenny [llength $args] + if {$lenny > 1} { + return -code error {usage: ::http::requestHeaders token ?headerName?} + } else { + return [Meta $token request {*}$args] + } +} +proc http::responseHeaders {token args} { + set lenny [llength $args] + if {$lenny > 1} { + return -code error {usage: ::http::responseHeaders token ?headerName?} + } else { + return [Meta $token response {*}$args] + } +} +proc http::requestHeaderValue {token header} { + Meta $token request $header VALUE +} +proc http::responseHeaderValue {token header} { + Meta $token response $header VALUE +} +proc http::Meta {token who args} { + variable $token + upvar 0 $token state + + if {$who eq {request}} { + set whom requestHeaders + } elseif {$who eq {response}} { + set whom meta + } else { + return -code error {usage: ::http::Meta token request|response ?headerName ?VALUE??} + } + + set header [string tolower [lindex $args 0]] + set how [string tolower [lindex $args 1]] + set lenny [llength $args] + if {$lenny == 0} { + return $state($whom) + } elseif {($lenny > 2) || (($lenny == 2) && ($how ne {value}))} { + return -code error {usage: ::http::Meta token request|response ?headerName ?VALUE??} + } else { + set result {} + set combined {} + foreach {key value} $state($whom) { + if {$key eq $header} { + lappend result $key $value + append combined $value {, } + } + } + if {$lenny == 1} { + return $result + } else { + return [string range $combined 0 end-2] + } + } +} + + +# ------------------------------------------------------------------------------ +# Proc http::responseInfo +# ------------------------------------------------------------------------------ +# Command to return a dictionary of the most useful metadata of a HTTP +# response. +# +# Arguments: +# token - connection token (name of an array) +# +# Return Value: a dict. See man page http(n) for a description of each item. +# ------------------------------------------------------------------------------ + +proc http::responseInfo {token} { variable $token upvar 0 $token state - return $state(meta) + set result {} + foreach {key origin name} { + stage STATE state + status STATE status + responseCode STATE responseCode + reasonPhrase STATE reasonPhrase + contentType STATE type + binary STATE binary + redirection RESP location + upgrade STATE upgrade + error ERROR - + postError STATE posterror + method STATE method + charset STATE charset + compression STATE coding + httpRequest STATE -protocol + httpResponse STATE httpResponse + url STATE url + connectionRequest REQ connection + connectionResponse RESP connection + connectionActual STATE connection + transferEncoding STATE transfer + totalPost STATE querylength + currentPost STATE queryoffset + totalSize STATE totalsize + currentSize STATE currentsize + } { + if {$origin eq {STATE}} { + if {[info exists state($name)]} { + dict set result $key $state($name) + } else { + # Should never come here + dict set result $key {} + } + } elseif {$origin eq {REQ}} { + dict set result $key [requestHeaderValue $token $name] + } elseif {$origin eq {RESP}} { + dict set result $key [responseHeaderValue $token $name] + } elseif {$origin eq {ERROR}} { + # Don't flood the dict with data. The command ::http::error is + # available. + if {[info exists state(error)]} { + set msg [lindex $state(error) 0] + } else { + set msg {} + } + dict set result $key $msg + } else { + # Should never come here + dict set result $key {} + } + } + return $result } proc http::error {token} { variable $token @@ -2882,6 +3134,14 @@ proc http::error {token} { } return } +proc http::postError {token} { + variable $token + upvar 0 $token state + if {[info exists state(postErrorFull)]} { + return $state(postErrorFull) + } + return +} # http::cleanup # @@ -3046,11 +3306,13 @@ proc http::Write {token} { set done 1 } } - } err]} { + } err opts]} { # Do not call Finish here, but instead let the read half of the socket # process whatever server reply there is to get. - set state(posterror) $err + set info [dict get $opts -errorinfo] + set code [dict get $opts -code] + set state(postErrorFull) [list $err $info $code] set done 1 } @@ -3066,7 +3328,7 @@ proc http::Write {token} { # Callback to the client after we've completely handled everything. if {[string length $state(-queryprogress)]} { - eval $state(-queryprogress) \ + namespace eval :: $state(-queryprogress) \ [list $token $state(querylength) $state(queryoffset)] } return @@ -3110,11 +3372,14 @@ proc http::Event {sock token} { if {[set d [read $sock]] ne ""} { Log "WARNING: additional data left on closed socket\ - token $token" + } else { } + } else { } Log ^X$tk end of response (token error) - token $token CloseSocket $sock return + } else { } if {$state(state) eq "connecting"} { ##Log - connecting - token $token @@ -3125,6 +3390,7 @@ proc http::Event {sock token} { } { set state(after) [after $state(-timeout) \ [list http::reset $token timeout]] + } else { } if {[catch {gets $sock state(http)} nsl]} { @@ -3136,8 +3402,8 @@ proc http::Event {sock token} { if {[TestForReplay $token read $nsl c]} { return + } else { } - # else: # This is NOT a persistent socket that has been closed since # its last use. @@ -3161,6 +3427,7 @@ proc http::Event {sock token} { if {[TestForReplay $token read {} d]} { return + } else { } # else: @@ -3168,6 +3435,7 @@ proc http::Event {sock token} { # last use. # If any other requests are in flight or pipelined/queued, they # will be discarded. + } else { } } elseif {$state(state) eq "header"} { if {[catch {gets $sock line} nhl]} { @@ -3186,6 +3454,20 @@ proc http::Event {sock token} { set state(state) "connecting" continue # This was a "return" in the pre-coroutine code. + } else { + } + + # We have $state(http) so let's split it into its components. + if {[regexp {^HTTP/(\S+) ([0-9]{3}) (.*)$} $state(http) \ + -> httpResponse responseCode reasonPhrase] + } { + set state(httpResponse) $httpResponse + set state(responseCode) $responseCode + set state(reasonPhrase) $reasonPhrase + } else { + set state(httpResponse) $state(http) + set state(responseCode) $state(http) + set state(reasonPhrase) $state(http) } if { ([info exists state(connection)]) @@ -3201,6 +3483,7 @@ proc http::Event {sock token} { # Previous value is $token. It cannot be "pending". set socketWrState($state(socketinfo)) Wready http::NextPipelinedWrite $token + } else { } # Once a "close" has been signaled, the client MUST NOT send any @@ -3231,6 +3514,7 @@ proc http::Event {sock token} { Log Move $tok from socketCoEvent to socketWrQueue and cancel its after idle coro } set socketCoEvent($state(socketinfo)) {} + } else { } if { ($socketRdQueue($state(socketinfo)) ne {}) @@ -3259,6 +3543,7 @@ proc http::Event {sock token} { if {[info exists ${tokenVal}(after)]} { after cancel [set ${tokenVal}(after)] unset ${tokenVal}(after) + } else { } # Tokens in the read queue have no (socketcoro) to # cancel. @@ -3271,6 +3556,7 @@ proc http::Event {sock token} { # Do not allow further connections on this socket (but # geturl can add new requests to the replay). set socketClosing($state(socketinfo)) 1 + } else { } set state(state) body @@ -3286,6 +3572,7 @@ proc http::Event {sock token} { && ("keep-alive" ni $state(connection)) } { lappend state(connection) "keep-alive" + } else { } # If doing a HEAD, then we won't get any body @@ -3294,6 +3581,7 @@ proc http::Event {sock token} { set state(state) complete Eot $token return + } else { } # - For non-chunked transfer we may have no body - in this case @@ -3314,7 +3602,7 @@ proc http::Event {sock token} { && ("close" in $state(connection)) ) ) - && (![info exists state(transfer)]) + && ($state(transfer) eq {}) && ($state(totalsize) == 0) } { set msg {body size is 0 and no events likely - complete} @@ -3324,6 +3612,7 @@ proc http::Event {sock token} { set state(state) complete Eot $token return + } else { } # We have to use binary translation to count bytes properly. @@ -3335,10 +3624,12 @@ proc http::Event {sock token} { } { # Turn off conversions for non-text data. set state(binary) 1 + } else { } if {[info exists state(-channel)]} { if {$state(binary) || [llength [ContentEncoding $token]]} { fconfigure $state(-channel) -translation binary + } else { } if {![info exists state(-handler)]} { # Initiate a sequence of background fcopies. @@ -3346,13 +3637,16 @@ proc http::Event {sock token} { rename ${token}--EventCoroutine {} CopyStart $sock $token return + } else { } + } else { } } elseif {$nhl > 0} { # Process header lines. ##Log header - token $token - $line if {[regexp -nocase {^([^:]+):(.+)$} $line x key value]} { - switch -- [string tolower $key] { + set key [string tolower $key] + switch -- $key { content-type { set state(type) [string trim [string tolower $value]] # Grab the optional charset information. @@ -3379,6 +3673,12 @@ proc http::Event {sock token} { connection { # RFC 7230 Section 6.1 states that a comma-separated # list is an acceptable value. + if {![info exists state(connectionRespFlag)]} { + # This is the first "Connection" response header. + # Scrub the earlier value set by iniitialisation. + set state(connectionRespFlag) {} + set state(connection) {} + } foreach el [SplitCommaSeparatedFieldValue $value] { lappend state(connection) [string tolower $el] } @@ -3389,18 +3689,21 @@ proc http::Event {sock token} { set-cookie { if {$http(-cookiejar) ne ""} { ParseCookie $token [string trim $value] + } else { } } } lappend state(meta) $key [string trim $value] + } else { } + } else { } } else { # Now reading body ##Log body - token $token if {[catch { if {[info exists state(-handler)]} { - set n [eval $state(-handler) [list $sock $token]] + set n [namespace eval :: $state(-handler) [list $sock $token]] ##Log handler $n - token $token # N.B. the protocol has been set to 1.0 because the -handler # logic is not expected to handle chunked encoding. @@ -3409,6 +3712,7 @@ proc http::Event {sock token} { # We know the transfer is complete only when the server # closes the connection - i.e. eof is not an error. set state(state) complete + } else { } if {![string is integer -strict $n]} { if 1 { @@ -3438,10 +3742,11 @@ proc http::Event {sock token} { set n 0 set state(state) complete } + } else { } } elseif {[info exists state(transfer_final)]} { # This code forgives EOF in place of the final CRLF. - set line [getTextLine $sock] + set line [GetTextLine $sock] set n [string length $line] set state(state) complete if {$n > 0} { @@ -3464,7 +3769,7 @@ proc http::Event {sock token} { } { ##Log chunked - token $token set size 0 - set hexLenChunk [getTextLine $sock] + set hexLenChunk [GetTextLine $sock] #set ntl [string length $hexLenChunk] if {[string trim $hexLenChunk] ne ""} { scan $hexLenChunk %x size @@ -3477,6 +3782,7 @@ proc http::Event {sock token} { incr state(log_size) [string length $chunk] ##Log chunk $n cumul $state(log_size) -\ token $token + } else { } if {$size != [string length $chunk]} { Log "WARNING: mis-sized chunk:\ @@ -3489,10 +3795,11 @@ proc http::Event {sock token} { set msg {error in chunked encoding - fetch\ terminated} Eot $token $msg + } else { } # CRLF that follows chunk. # If eof, this is handled at the end of this proc. - getTextLine $sock + GetTextLine $sock } else { set n 0 set state(transfer_final) {} @@ -3536,6 +3843,7 @@ proc http::Event {sock token} { append state(body) $block ##Log non-chunk [string length $state(body)] -\ token $token + } else { } } # This calculation uses n from the -handler, chunked, or @@ -3547,6 +3855,7 @@ proc http::Event {sock token} { set t $state(totalsize) ##Log another $n currentsize $c totalsize $t -\ token $token + } else { } # If Content-Length - check for end of data. if { @@ -3557,7 +3866,9 @@ proc http::Event {sock token} { token $token set state(state) complete Eot $token + } else { } + } else { } } err]} { Log ^X$tk end of response (error ${err}) - token $token @@ -3565,8 +3876,9 @@ proc http::Event {sock token} { return } else { if {[info exists state(-progress)]} { - eval $state(-progress) \ + namespace eval :: $state(-progress) \ [list $token $state(totalsize) $state(currentsize)] + } else { } } } @@ -3768,7 +4080,7 @@ proc http::ParseCookie {token value} { {*}$http(-cookiejar) storeCookie $realopts } -# http::getTextLine -- +# http::GetTextLine -- # # Get one line with the stream in crlf mode. # Used if Transfer-Encoding is chunked, to read the line that @@ -3782,7 +4094,7 @@ proc http::ParseCookie {token value} { # Results: # The line of text, without trailing newline -proc http::getTextLine {sock} { +proc http::GetTextLine {sock} { set tr [fconfigure $sock -translation] lassign $tr trRead trWrite fconfigure $sock -translation [list crlf $trWrite] @@ -3850,13 +4162,25 @@ proc http::CopyStart {sock token {initial 1}} { upvar 0 $token state if {[info exists state(transfer)] && $state(transfer) eq "chunked"} { foreach coding [ContentEncoding $token] { - lappend state(zlib) [zlib stream $coding] + if {$coding eq {deflateX}} { + # Use the standards-compliant choice. + set coding2 decompress + } else { + set coding2 $coding + } + lappend state(zlib) [zlib stream $coding2] } - make-transformation-chunked $sock [namespace code [list CopyChunk $token]] + MakeTransformationChunked $sock [namespace code [list CopyChunk $token]] } else { if {$initial} { foreach coding [ContentEncoding $token] { - zlib push $coding $sock + if {$coding eq {deflateX}} { + # Use the standards-compliant choice. + set coding2 decompress + } else { + set coding2 $coding + } + zlib push $coding2 $sock } } if {[catch { @@ -3884,7 +4208,7 @@ proc http::CopyChunk {token chunk} { } puts -nonewline $state(-channel) $chunk if {[info exists state(-progress)]} { - eval [linsert $state(-progress) end \ + namespace eval :: [linsert $state(-progress) end \ $token $state(totalsize) $state(currentsize)] } } else { @@ -3892,7 +4216,12 @@ proc http::CopyChunk {token chunk} { if {[info exists state(zlib)]} { set excess "" foreach stream $state(zlib) { - catch {set excess [$stream add -finalize $excess]} + catch { + $stream put -finalize $excess + set excess "" + set overflood "" + while {[set overflood [$stream get]] ne ""} { append excess $overflood } + } } puts -nonewline $state(-channel) $excess foreach stream $state(zlib) { $stream close } @@ -3920,7 +4249,7 @@ proc http::CopyDone {token count {error {}}} { set sock $state(sock) incr state(currentsize) $count if {[info exists state(-progress)]} { - eval $state(-progress) \ + namespace eval :: $state(-progress) \ [list $token $state(totalsize) $state(currentsize)] } # At this point the token may have been reset. @@ -3977,7 +4306,20 @@ proc http::Eot {token {reason {}}} { if {[string length $state(body)] > 0} { if {[catch { foreach coding [ContentEncoding $token] { - set state(body) [zlib $coding $state(body)] + if {$coding eq {deflateX}} { + # First try the standards-compliant choice. + set coding2 decompress + if {[catch {zlib $coding2 $state(body)} result]} { + # If that fails, try the MS non-compliant choice. + set coding2 inflate + set state(body) [zlib $coding2 $state(body)] + } else { + # error {failed at standards-compliant deflate} + set state(body) $result + } + } else { + set state(body) [zlib $coding $state(body)] + } } } err]} { Log "error doing decompression for token $token: $err" @@ -3999,11 +4341,92 @@ proc http::Eot {token {reason {}}} { # Translate text line endings. set state(body) [string map {\r\n \n \r \n} $state(body)] } + if {[info exists state(-guesstype)] && $state(-guesstype)} { + GuessType $token + } } Finish $token $reason return } + +# ------------------------------------------------------------------------------ +# Proc http::GuessType +# ------------------------------------------------------------------------------ +# Command to attempt limited analysis of a resource with undetermined +# Content-Type, i.e. "application/octet-stream". This value can be set for two +# reasons: +# (a) by the server, in a Content-Type header +# (b) by http::geturl, as the default value if the server does not supply a +# Content-Type header. +# +# This command converts a resource if: +# (1) it has type application/octet-stream +# (2) it begins with an XML declaration "<?xml name="value" ... >?" +# (3) one tag is named "encoding" and has a recognised value; or no "encoding" +# tag exists (defaulting to utf-8) +# +# RFC 9110 Sec. 8.3 states: +# "If a Content-Type header field is not present, the recipient MAY either +# assume a media type of "application/octet-stream" ([RFC2046], Section 4.5.1) +# or examine the data to determine its type." +# +# The RFC goes on to describe the pitfalls of "MIME sniffing", including +# possible security risks. +# +# Arguments: +# token - connection token +# +# Return Value: (boolean) true iff a change has been made +# ------------------------------------------------------------------------------ + +proc http::GuessType {token} { + variable $token + upvar 0 $token state + + if {$state(type) ne {application/octet-stream}} { + return 0 + } + + set body $state(body) + # e.g. {<?xml version="1.0" encoding="utf-8"?> ...} + + if {![regexp -nocase -- {^<[?]xml[[:space:]][^>?]*[?]>} $body match]} { + return 0 + } + # e.g. {<?xml version="1.0" encoding="utf-8"?>} + + set contents [regsub -- {[[:space:]]+} $match { }] + set contents [string range [string tolower $contents] 6 end-2] + # e.g. {version="1.0" encoding="utf-8"} + # without excess whitespace or upper-case letters + + if {![regexp -- {^([^=" ]+="[^"]+" )+$} "$contents "]} { + return 0 + } + # The application/xml default encoding: + set res utf-8 + + set tagList [regexp -all -inline -- {[^=" ]+="[^"]+"} $contents] + foreach tag $tagList { + regexp -- {([^=" ]+)="([^"]+)"} $tag -> name value + if {$name eq {encoding}} { + set res $value + } + } + set enc [CharsetToEncoding $res] + if {$enc eq "binary"} { + return 0 + } + set state(body) [encoding convertfrom $enc $state(body)] + set state(body) [string map {\r\n \n \r \n} $state(body)] + set state(type) application/xml + set state(binary) 0 + set state(charset) $res + return 1 +} + + # http::wait -- # # See documentation for details. @@ -4048,7 +4471,7 @@ proc http::formatQuery {args} { set result "" set sep "" foreach i $args { - append result $sep [mapReply $i] + append result $sep [quoteString $i] if {$sep eq "="} { set sep & } else { @@ -4058,7 +4481,7 @@ proc http::formatQuery {args} { return $result } -# http::mapReply -- +# http::quoteString -- # # Do x-www-urlencoded character mapping # @@ -4068,7 +4491,7 @@ proc http::formatQuery {args} { # Results: # The encoded string -proc http::mapReply {string} { +proc http::quoteString {string} { variable http variable formMap @@ -4079,7 +4502,6 @@ proc http::mapReply {string} { set string [encoding convertto $http(-urlencoding) $string] return [string map $formMap $string] } -interp alias {} http::quoteString {} http::mapReply # http::ProxyRequired -- # Default proxy filter. @@ -4147,16 +4569,41 @@ proc http::CharsetToEncoding {charset} { } } + +# ------------------------------------------------------------------------------ +# Proc http::ContentEncoding +# ------------------------------------------------------------------------------ # Return the list of content-encoding transformations we need to do in order. +# + # -------------------------------------------------------------------------- + # Options for Accept-Encoding, Content-Encoding: the switch command + # -------------------------------------------------------------------------- + # The symbol deflateX allows http to attempt both versions of "deflate", + # unless there is a -channel - for a -channel, only "decompress" is tried. + # Alternative/extra lines for switch: + # The standards-compliant version of "deflate" can be chosen with: + # deflate { lappend r decompress } + # The Microsoft non-compliant version of "deflate" can be chosen with: + # deflate { lappend r inflate } + # The previously used implementation of "compress", which appears to be + # incorrect and is rarely used by web servers, can be chosen with: + # compress - x-compress { lappend r decompress } + # -------------------------------------------------------------------------- +# +# Arguments: +# token - Connection token. +# +# Return Value: list +# ------------------------------------------------------------------------------ + proc http::ContentEncoding {token} { upvar 0 $token state set r {} if {[info exists state(coding)]} { foreach coding [split $state(coding) ,] { switch -exact -- $coding { - deflate { lappend r inflate } + deflate { lappend r deflateX } gzip - x-gzip { lappend r gunzip } - compress - x-compress { lappend r decompress } identity {} br { return -code error\ @@ -4247,12 +4694,18 @@ proc http::GetFieldValue {headers fieldName} { return $r } -proc http::make-transformation-chunked {chan command} { +proc http::MakeTransformationChunked {chan command} { coroutine [namespace current]::dechunk$chan ::http::ReceiveChunked $chan $command chan event $chan readable [namespace current]::dechunk$chan return } +interp alias {} http::data {} http::responseBody +interp alias {} http::code {} http::responseLine +interp alias {} http::mapReply {} http::quoteString +interp alias {} http::meta {} http::responseHeaders +interp alias {} http::metaValue {} http::responseHeaderValue +interp alias {} http::ncode {} http::responseCode # ------------------------------------------------------------------------------ # Proc http::socket @@ -4278,6 +4731,10 @@ proc http::make-transformation-chunked {chan command} { # - The http::socket command is simple, and can easily be replaced with an # alternative command that uses a different technique to open a socket while # entering the event loop. +# - Unexpected behaviour by thread::send -async (Thread 2.8.6). +# An error in thread::send -async causes return of just the error message +# (not the expected 3 elements), and raises a bgerror in the main thread. +# Hence wrap the command with catch as a precaution. # ------------------------------------------------------------------------------ proc http::socket {args} { @@ -4302,8 +4759,11 @@ proc http::socket {args} { set defcmd ::socket set sockargs $args set script " - [list proc ::SockInThread {caller defcmd sockargs} [info body http::SockInThread]] - [list ::SockInThread [thread::id] $defcmd $sockargs] + set code \[catch { + [list proc ::SockInThread {caller defcmd sockargs} [info body ::http::SockInThread]] + [list ::SockInThread [thread::id] $defcmd $sockargs] + } result opts\] + list \$code \$opts \$result " set state(tid) [thread::create] @@ -4325,10 +4785,26 @@ proc http::socket {args} { Log >U Thread End Wait $args -- coro [info coroutine] $varName [set $varName] thread::release $state(tid) set state(tid) {} - lassign [set $varName] catchCode errdict sock + set result [set $varName] unset $varName - dict set errdict -code $catchCode - return -options $errdict $sock + if {(![string is list $result]) || ([llength $result] != 3)} { + return -code error "result from peer thread is not a list of\ + length 3: it is \n$result" + } + lassign $result threadCode threadDict threadResult + if {($threadCode != 0)} { + # This is an error in thread::send. Return the lot. + return -options $threadDict -code error $threadResult + } + + # Now the results of the catch in the peer thread. + lassign $threadResult catchCode errdict sock + + if {($catchCode == 0) && ($sock ni [chan names])} { + return -code error {Transfer of socket from peer thread failed.\ + Check that this script is not running in a child interpreter.} + } + return -options $errdict -code $catchCode $sock } # The commands below are dependencies of http::socket and @@ -4392,7 +4868,7 @@ proc http::SockInThread {caller defcmd sockargs} { # ------------------------------------------------------------------------------ -# Proc ::http::cwaiter::cwait +# Proc http::cwaiter::cwait # ------------------------------------------------------------------------------ # Command to substitute for vwait, without the ordering issues. # A command that uses cwait must be a coroutine that is launched by an event, @@ -4411,13 +4887,13 @@ proc http::SockInThread {caller defcmd sockargs} { # Return Value: none # ------------------------------------------------------------------------------ -namespace eval ::http::cwaiter { +namespace eval http::cwaiter { namespace export cwait variable log {} variable logOn 0 } -proc ::http::cwaiter::cwait { +proc http::cwaiter::cwait { varName {coroName {}} {timeout {}} {timeoutValue {}} } { set thisCoro [info coroutine] @@ -4448,7 +4924,7 @@ proc ::http::cwaiter::cwait { # ------------------------------------------------------------------------------ -# Proc ::http::cwaiter::CwaitHelper +# Proc http::cwaiter::CwaitHelper # ------------------------------------------------------------------------------ # Helper command called by the trace set by cwait. # - Ignores the arguments added by trace. @@ -4459,7 +4935,7 @@ proc ::http::cwaiter::cwait { # - Remove the trace immediately. We don't want multiple calls. # ------------------------------------------------------------------------------ -proc ::http::cwaiter::CwaitHelper {varName coroName toe args} { +proc http::cwaiter::CwaitHelper {varName coroName toe args} { CoLog "got $varName for $coroName" set cmd [list ::http::cwaiter::CwaitHelper $varName $coroName $toe] trace remove variable $varName write $cmd @@ -4471,12 +4947,12 @@ proc ::http::cwaiter::CwaitHelper {varName coroName toe args} { # ------------------------------------------------------------------------------ -# Proc ::http::cwaiter::LogInit +# Proc http::cwaiter::LogInit # ------------------------------------------------------------------------------ # Call this command to initiate debug logging and clear the log. # ------------------------------------------------------------------------------ -proc ::http::cwaiter::LogInit {} { +proc http::cwaiter::LogInit {} { variable log variable logOn set log {} @@ -4484,12 +4960,12 @@ proc ::http::cwaiter::LogInit {} { return } -proc ::http::cwaiter::LogRead {} { +proc http::cwaiter::LogRead {} { variable log return $log } -proc ::http::cwaiter::CoLog {msg} { +proc http::cwaiter::CoLog {msg} { variable log variable logOn if {$logOn} { @@ -4498,7 +4974,7 @@ proc ::http::cwaiter::CoLog {msg} { return } -namespace eval ::http { +namespace eval http { namespace import ::http::cwaiter::* } diff --git a/tests/http.test b/tests/http.test index 26ba710..e88210a 100644 --- a/tests/http.test +++ b/tests/http.test @@ -145,7 +145,7 @@ test http-2.8 {http::CharsetToEncoding} { test http-3.1 {http::geturl} -returnCodes error -body { http::geturl -bogus flag -} -result {Unknown option flag, can be: -binary, -blocksize, -channel, -command, -handler, -headers, -keepalive, -method, -myaddr, -progress, -protocol, -query, -queryblocksize, -querychannel, -queryprogress, -strict, -timeout, -type, -validate} +} -result {Unknown option flag, can be: -binary, -blocksize, -channel, -command, -guesstype, -handler, -headers, -keepalive, -method, -myaddr, -progress, -protocol, -query, -queryblocksize, -querychannel, -queryprogress, -strict, -timeout, -type, -validate} test http-3.2 {http::geturl} -returnCodes error -body { http::geturl http:junk @@ -390,7 +390,7 @@ test http-3.25 {http::meta} -setup { } -cleanup { http::cleanup $token unset -nocomplain m token -} -result {Content-Length Content-Type Date} +} -result {content-length content-type date} test http-3.26 {http::meta} -setup { unset -nocomplain m token } -body { @@ -400,7 +400,7 @@ test http-3.26 {http::meta} -setup { } -cleanup { http::cleanup $token unset -nocomplain m token -} -result {Content-Length Content-Type Date X-Check} +} -result {content-length content-type date x-check} test http-3.27 {http::geturl: -headers override -type} -body { set token [http::geturl $url/headers -type "text/plain" -query dummy \ -headers [list "Content-Type" "text/plain;charset=utf-8"]] @@ -485,7 +485,7 @@ test http-4.1 {http::Event} -body { set token [http::geturl $url -keepalive 0] upvar #0 $token data array set meta $data(meta) - expr {($data(totalsize) == $meta(Content-Length))} + expr {($data(totalsize) == $meta(content-length))} } -cleanup { http::cleanup $token } -result 1 @@ -493,7 +493,7 @@ test http-4.2 {http::Event} -body { set token [http::geturl $url] upvar #0 $token data array set meta $data(meta) - string compare $data(type) [string trim $meta(Content-Type)] + string compare $data(type) [string trim $meta(content-type)] } -cleanup { http::cleanup $token } -result 0 diff --git a/tests/http11.test b/tests/http11.test index 346e334..71ef4c7 100644 --- a/tests/http11.test +++ b/tests/http11.test @@ -51,15 +51,11 @@ proc halt_httpd {} { } proc meta {tok {key ""}} { - set meta [http::meta $tok] - if {$key ne ""} { - if {[dict exists $meta $key]} { - return [dict get $meta $key] - } else { - return "" - } + if {$key eq ""} { + return [http::meta $tok] + } else { + return [http::metaValue $tok $key] } - return $meta } proc state {tok {key ""}} { @@ -88,6 +84,8 @@ proc check_crc {tok args} { makeFile "<html><head><title>test</title></head><body><p>this is a test</p>\n[string repeat {<p>This is a tcl test file.</p>} 4192]\n</body></html>" testdoc.html +makeFile "<html><head><title>test</title></head><body><p>this is a test</p>\n[string repeat {<p>This is a tcl test file.</p>} 5000]\n</body></html>" largedoc.html + if {![info exists ThreadLevel]} { if {[catch {package require Thread}] == 0} { set ValueRange {0 1 2} @@ -128,11 +126,12 @@ test http11-1.1 "normal,gzip,non-chunked" -setup { -timeout 10000 -headers {accept-encoding gzip}] http::wait $tok list [http::status $tok] [http::code $tok] [check_crc $tok] \ - [meta $tok content-encoding] [meta $tok transfer-encoding] + [meta $tok content-encoding] [meta $tok transfer-encoding] \ + [http::meta $tok content-encoding] [http::meta $tok transfer-encoding] } -cleanup { http::cleanup $tok halt_httpd -} -result {ok {HTTP/1.1 200 OK} ok gzip {}} +} -result {ok {HTTP/1.1 200 OK} ok gzip {} {content-encoding gzip} {}} test http11-1.2 "normal,deflated,non-chunked" -setup { variable httpd [create_httpd] @@ -147,7 +146,22 @@ test http11-1.2 "normal,deflated,non-chunked" -setup { halt_httpd } -result {ok {HTTP/1.1 200 OK} ok deflate {}} -test http11-1.3 "normal,compressed,non-chunked" -setup { +test http11-1.2.1 "normal,deflated,non-chunked,msdeflate" -setup { + variable httpd [create_httpd] +} -body { + set tok [http::geturl http://localhost:$httpd_port/testdoc.html?close=1&msdeflate=1 \ + -timeout 10000 -headers {accept-encoding deflate}] + http::wait $tok + list [http::status $tok] [http::code $tok] [check_crc $tok] \ + [meta $tok content-encoding] [meta $tok transfer-encoding] +} -cleanup { + http::cleanup $tok + halt_httpd +} -result {ok {HTTP/1.1 200 OK} ok deflate {}} + +test http11-1.3 "normal,compressed,non-chunked" -constraints badCompress -setup { + # The Tcl "compress" algorithm appears to be incorrect and has been removed. + # Bug [a13b9d0ce1]. variable httpd [create_httpd] } -body { set tok [http::geturl http://localhost:$httpd_port/testdoc.html?close=1 \ @@ -193,11 +207,12 @@ test http11-1.6 "normal, specify 1.1 " -setup { -protocol 1.1 -timeout 10000] http::wait $tok list [http::status $tok] [http::code $tok] [check_crc $tok] \ - [meta $tok connection] [meta $tok transfer-encoding] + [meta $tok connection] [meta $tok transfer-encoding] \ + [http::meta $tok connection] [http::meta $tok transfer-encoding] } -cleanup { http::cleanup $tok halt_httpd -} -result {ok {HTTP/1.1 200 OK} ok close chunked} +} -result {ok {HTTP/1.1 200 OK} ok close chunked {connection close} {transfer-encoding chunked}} test http11-1.7 "normal, 1.1 and keepalive " -setup { variable httpd [create_httpd] @@ -251,7 +266,22 @@ test http11-1.10 "normal,deflate,chunked" -setup { halt_httpd } -result {ok {HTTP/1.1 200 OK} ok deflate chunked} -test http11-1.11 "normal,compress,chunked" -setup { +test http11-1.10.1 "normal,deflate,chunked,msdeflate" -setup { + variable httpd [create_httpd] +} -body { + set tok [http::geturl http://localhost:$httpd_port/testdoc.html?msdeflate=1 \ + -timeout 10000 -headers {accept-encoding deflate}] + http::wait $tok + list [http::status $tok] [http::code $tok] [check_crc $tok] \ + [meta $tok content-encoding] [meta $tok transfer-encoding] +} -cleanup { + http::cleanup $tok + halt_httpd +} -result {ok {HTTP/1.1 200 OK} ok deflate chunked} + +test http11-1.11 "normal,compress,chunked" -constraints badCompress -setup { + # The Tcl "compress" algorithm appears to be incorrect and has been removed. + # Bug [a13b9d0ce1]. variable httpd [create_httpd] } -body { set tok [http::geturl http://localhost:$httpd_port/testdoc.html \ @@ -343,15 +373,40 @@ test http11-2.1 "-channel, encoding gzip" -setup { http::wait $tok seek $chan 0 set data [read $chan] + set diff [expr {[file size testdoc.html] - [file size testfile.tmp]}] list [http::status $tok] [http::code $tok] [check_crc $tok $data]\ [meta $tok connection] [meta $tok content-encoding]\ - [meta $tok transfer-encoding] + [meta $tok transfer-encoding] -- $diff bytes lost } -cleanup { http::cleanup $tok close $chan removeFile testfile.tmp halt_httpd -} -result {ok {HTTP/1.1 200 OK} ok close gzip chunked} +} -result {ok {HTTP/1.1 200 OK} ok close gzip chunked -- 0 bytes lost} + +# Cf. Bug [3610253] "CopyChunk does not drain decompressor(s)" +# This test failed before the bugfix. +# The pass/fail depended on file size. +test http11-2.1.1 "-channel, encoding gzip" -setup { + variable httpd [create_httpd] + set chan [open [makeFile {} testfile.tmp] wb+] + set fileName largedoc.html +} -body { + set tok [http::geturl http://localhost:$httpd_port/$fileName \ + -timeout 5000 -channel $chan -headers {accept-encoding gzip}] + http::wait $tok + seek $chan 0 + set data [read $chan] + set diff [expr {[file size $fileName] - [file size testfile.tmp]}] + list [http::status $tok] [http::code $tok] [check_crc $tok $data]\ + [meta $tok connection] [meta $tok content-encoding]\ + [meta $tok transfer-encoding] -- $diff bytes lost +} -cleanup { + http::cleanup $tok + close $chan + removeFile testfile.tmp + halt_httpd +} -result {ok {HTTP/1.1 200 OK} ok close gzip chunked -- 0 bytes lost} test http11-2.2 "-channel, encoding deflate" -setup { variable httpd [create_httpd] @@ -372,7 +427,28 @@ test http11-2.2 "-channel, encoding deflate" -setup { halt_httpd } -result {ok {HTTP/1.1 200 OK} ok close deflate chunked} -test http11-2.3 "-channel,encoding compress" -setup { +test http11-2.2.1 "-channel, encoding deflate,msdeflate" -setup { + variable httpd [create_httpd] + set chan [open [makeFile {} testfile.tmp] wb+] +} -body { + set tok [http::geturl http://localhost:$httpd_port/testdoc.html?msdeflate=1 \ + -timeout 5000 -channel $chan -headers {accept-encoding deflate}] + http::wait $tok + seek $chan 0 + set data [read $chan] + list [http::status $tok] [http::code $tok] [check_crc $tok $data]\ + [meta $tok connection] [meta $tok content-encoding]\ + [meta $tok transfer-encoding] +} -cleanup { + http::cleanup $tok + close $chan + removeFile testfile.tmp + halt_httpd +} -result {ok {HTTP/1.1 200 OK} ok close deflate chunked} + +test http11-2.3 "-channel,encoding compress" -constraints badCompress -setup { + # The Tcl "compress" algorithm appears to be incorrect and has been removed. + # Bug [a13b9d0ce1]. variable httpd [create_httpd] set chan [open [makeFile {} testfile.tmp] wb+] } -body { @@ -524,7 +600,32 @@ test http11-2.7 "-channel,encoding deflate,non-chunked" -setup { halt_httpd } -result {ok {HTTP/1.1 200 OK} ok close deflate {} 0} -test http11-2.8 "-channel,encoding compress,non-chunked" -setup { +test http11-2.7.1 "-channel,encoding deflate,non-chunked,msdeflate" -constraints knownBug -setup { + # Test fails because a -channel can only try one un-deflate algorithm, and the + # compliant "decompress" is tried, not the non-compliant "inflate" of + # the MS browser implementation. + variable httpd [create_httpd] + set chan [open [makeFile {} testfile.tmp] wb+] +} -body { + set tok [http::geturl http://localhost:$httpd_port/testdoc.html?close=1&msdeflate=1 \ + -timeout 5000 -channel $chan -headers {accept-encoding deflate}] + http::wait $tok + seek $chan 0 + set data [read $chan] + list [http::status $tok] [http::code $tok] [check_crc $tok $data]\ + [meta $tok connection] [meta $tok content-encoding]\ + [meta $tok transfer-encoding]\ + [expr {[file size testdoc.html]-[file size testfile.tmp]}] +} -cleanup { + http::cleanup $tok + close $chan + removeFile testfile.tmp + halt_httpd +} -result {ok {HTTP/1.1 200 OK} ok close deflate {} 0} + +test http11-2.8 "-channel,encoding compress,non-chunked" -constraints badCompress -setup { + # The Tcl "compress" algorithm appears to be incorrect and has been removed. + # Bug [a13b9d0ce1]. variable httpd [create_httpd] set chan [open [makeFile {} testfile.tmp] wb+] } -body { @@ -585,6 +686,27 @@ test http11-2.10 "-channel,deflate,keepalive" -setup { halt_httpd } -result {ok {HTTP/1.1 200 OK} ok {} deflate chunked 0} +test http11-2.10.1 "-channel,deflate,keepalive,msdeflate" -setup { + variable httpd [create_httpd] + set chan [open [makeFile {} testfile.tmp] wb+] +} -body { + set tok [http::geturl http://localhost:$httpd_port/testdoc.html?msdeflate=1 \ + -timeout 5000 -channel $chan -keepalive 1 \ + -headers {accept-encoding deflate}] + http::wait $tok + seek $chan 0 + set data [read $chan] + list [http::status $tok] [http::code $tok] [check_crc $tok $data]\ + [meta $tok connection] [meta $tok content-encoding]\ + [meta $tok transfer-encoding]\ + [expr {[file size testdoc.html]-[file size testfile.tmp]}] +} -cleanup { + http::cleanup $tok + close $chan + removeFile testfile.tmp + halt_httpd +} -result {ok {HTTP/1.1 200 OK} ok {} deflate chunked 0} + test http11-2.11 "-channel,identity,keepalive" -setup { variable httpd [create_httpd] set chan [open [makeFile {} testfile.tmp] wb+] @@ -623,7 +745,7 @@ test http11-2.12 "-channel,negotiate,keepalive" -setup { close $chan removeFile testfile.tmp halt_httpd -} -result {ok {HTTP/1.1 200 OK} ok {} gzip chunked gzip,deflate,compress 0} +} -result {ok {HTTP/1.1 200 OK} ok {} gzip chunked gzip,deflate 0} # ------------------------------------------------------------------------- @@ -938,6 +1060,7 @@ foreach p {create_httpd httpd_read halt_httpd meta check_crc} { if {[llength [info proc $p]]} {rename $p {}} } removeFile testdoc.html +removeFile largedoc.html unset -nocomplain httpd_port httpd p ::tcltest::cleanupTests diff --git a/tests/httpd11.tcl b/tests/httpd11.tcl index d0624f8..55b52fd 100644 --- a/tests/httpd11.tcl +++ b/tests/httpd11.tcl @@ -46,7 +46,7 @@ proc get-chunks {data {compression gzip}} { } set data "" - set chunker [make-chunk-generator $data 512] + set chunker [make-chunk-generator $data 671] while {[string length [set chunk [$chunker]]]} { append data $chunk } @@ -60,7 +60,7 @@ proc blow-chunks {data {ochan stdout} {compression gzip}} { compress { set data [zlib compress $data] } } - set chunker [make-chunk-generator $data 512] + set chunker [make-chunk-generator $data 671] while {[string length [set chunk [$chunker]]]} { puts -nonewline $ochan $chunk } @@ -160,6 +160,12 @@ proc Service {chan addr port} { if {$protocol eq "HTTP/1.1"} { foreach enc [split [dict get? $meta accept-encoding] ,] { set enc [string trim $enc] + # The current implementation of "compress" appears to be + # incorrect (bug [a13b9d0ce1]). Keep it here for + # experimentation only. The tests that use it have the + # constraint "badCompress". The client code in http has + # been removed, but can be restored from comments if + # experimentation is desired. if {$enc in {deflate gzip compress}} { set encoding $enc break @@ -171,6 +177,7 @@ proc Service {chan addr port} { } set nosendclose 0 + set msdeflate 0 foreach pair [split $query &] { if {[scan $pair {%[^=]=%s} key val] != 2} {set val ""} switch -exact -- $key { @@ -178,6 +185,7 @@ proc Service {chan addr port} { close {set close 1 ; set transfer 0} transfer {set transfer $val} content-type {set type $val} + msdeflate {set msdeflate $val} } } if {$protocol eq "HTTP/1.1"} { @@ -211,10 +219,23 @@ proc Service {chan addr port} { flush $chan chan configure $chan -buffering full -translation binary + if {$encoding eq {deflate}} { + # When http.tcl uses the correct decoder (bug [a13b9d0ce1]) for + # "accept-encoding deflate", i.e. "zlib decompress", this choice of + # encoding2 allows the tests to pass. It appears to do "deflate" + # correctly, but this has not been verified with a non-Tcl client. + set encoding2 compress + } else { + set encoding2 $encoding + } if {$transfer eq "chunked"} { - blow-chunks $data $chan $encoding - } elseif {$encoding ne "identity"} { - puts -nonewline $chan [zlib $encoding $data] + blow-chunks $data $chan $encoding2 + } elseif {$encoding2 ne "identity" && $msdeflate eq {1}} { + puts -nonewline $chan [string range [zlib $encoding2 $data] 2 end-4] + # Used in some tests of "deflate" to produce the non-RFC-compliant + # Microsoft version of "deflate". + } elseif {$encoding2 ne "identity"} { + puts -nonewline $chan [zlib $encoding2 $data] } else { puts -nonewline $chan $data } |
