diff options
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | library/http/http.tcl | 17 | ||||
-rw-r--r-- | tests/http.test | 16 |
3 files changed, 32 insertions, 6 deletions
@@ -1,3 +1,8 @@ +2013-04-04 Reinhard Max <max@suse.de> + + * library/http/http.tcl (http::geturl): Allow URLs that don't have + a path, but a query query, e.g. http://example.com?foo=bar . + 2013-03-22 Venkat Iyer <venkat@comit.com> * library/tzdata/Africa/Cairo: Update to tzdata2013b. * library/tzdata/Africa/Casablanca: diff --git a/library/http/http.tcl b/library/http/http.tcl index ddf066e..57f665a 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -394,13 +394,16 @@ proc http::geturl {url args} { # First, before the colon, is the protocol scheme (e.g. http) # Second, for HTTP-like protocols, is the authority # The authority is preceded by // and lasts up to (but not including) - # the following / and it identifies up to four parts, of which only one, - # the host, is required (if an authority is present at all). All other - # parts of the authority (user name, password, port number) are optional. + # the following / or ? and it identifies up to four parts, of which + # only one, the host, is required (if an authority is present at all). + # All other parts of the authority (user name, password, port number) + # are optional. # Third is the resource name, which is split into two parts at a ? # The first part (from the single "/" up to "?") is the path, and the # second part (from that "?" up to "#") is the query. *HOWEVER*, we do # not need to separate them; we send the whole lot to the server. + # Both, path and query are allowed to be missing, including their + # delimiting character. # Fourth is the fragment identifier, which is everything after the first # "#" in the URL. The fragment identifier MUST NOT be sent to the server # and indeed, we don't bother to validate it (it could be an error to @@ -437,7 +440,7 @@ proc http::geturl {url args} { ) (?: : (\d+) )? # <port part of authority> )? - ( / [^\#]*)? # <path> (including query) + ( [/\?] [^\#]*)? # <path> (including query) (?: \# (.*) )? # <fragment> $ } @@ -481,6 +484,12 @@ proc http::geturl {url args} { } } if {$srvurl ne ""} { + # RFC 3986 allows empty paths (not even a /), but servers + # return 400 if the path in the HTTP request doesn't start + # with / , so add it here if needed. + if {[string index $srvurl 0] ne "/"} { + set srvurl /$srvurl + } # Check for validity according to RFC 3986, Appendix A set validityRE {(?xi) ^ diff --git a/tests/http.test b/tests/http.test index e2de7d8..7d439b1 100644 --- a/tests/http.test +++ b/tests/http.test @@ -135,6 +135,7 @@ set fullurl http://user:pass@[info hostname]:$port/a/b/c set binurl //[info hostname]:$port/binary set posturl //[info hostname]:$port/post set badposturl //[info hostname]:$port/droppost +set authorityurl //[info hostname]:$port set ipv6url http://\[::1\]:$port/ test http-3.4 {http::geturl} -body { set token [http::geturl $url] @@ -391,7 +392,7 @@ Connection close Content-Type {text/plain;charset=utf-8} Accept-Encoding .* Content-Length 5} -test http-3.29 "http::geturl $ipv6url" -body { +test http-3.29 {http::geturl IPv6 address} -body { # We only want to see if the URL gets parsed correctly. This is # the case if http::geturl succeeds or returns a socket related # error. If the parsing is wrong, we'll get a parse error. @@ -405,7 +406,18 @@ test http-3.29 "http::geturl $ipv6url" -body { } -cleanup { catch { http::cleanup $token } } -result 0 - +test http-3.30 {http::geturl query without path} -body { + set token [http::geturl $authorityurl?var=val] + http::ncode $token +} -cleanup { + catch { http::cleanup $token } +} -result 200 +test http-3.31 {http::geturl fragment without path} -body { + set token [http::geturl "$authorityurl#fragment42"] + http::ncode $token +} -cleanup { + catch { http::cleanup $token } +} -result 200 test http-4.1 {http::Event} -body { set token [http::geturl $url -keepalive 0] upvar #0 $token data |