summaryrefslogtreecommitdiffstats
path: root/library
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2018-11-06 09:50:29 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2018-11-06 09:50:29 (GMT)
commit5429d6aeaf940119a7a7bb0a3d54ae966c234314 (patch)
tree9a59b97ee71d187032f6bd56024ee9147d6f0039 /library
parentd739e9b6a832caa5729f3d77aaf3017b0c1fb867 (diff)
parent1c2958d4552db77b82218a55830b9175f3be7888 (diff)
downloadtcl-5429d6aeaf940119a7a7bb0a3d54ae966c234314.zip
tcl-5429d6aeaf940119a7a7bb0a3d54ae966c234314.tar.gz
tcl-5429d6aeaf940119a7a7bb0a3d54ae966c234314.tar.bz2
Implement TIP 406
Diffstat (limited to 'library')
-rw-r--r--library/http/cookiejar.tcl745
-rw-r--r--library/http/effective_tld_names.txt.gzbin0 -> 39188 bytes
-rw-r--r--library/http/http.tcl121
-rw-r--r--library/http/idna.tcl292
-rw-r--r--library/http/pkgIndex.tcl2
5 files changed, 1159 insertions, 1 deletions
diff --git a/library/http/cookiejar.tcl b/library/http/cookiejar.tcl
new file mode 100644
index 0000000..309ca7a
--- /dev/null
+++ b/library/http/cookiejar.tcl
@@ -0,0 +1,745 @@
+# cookiejar.tcl --
+#
+# Implementation of an HTTP cookie storage engine using SQLite. The
+# implementation is done as a TclOO class, and includes a punycode
+# encoder and decoder (though only the encoder is currently used).
+#
+# See the file "license.terms" for information on usage and redistribution of
+# this file, and for a DISCLAIMER OF ALL WARRANTIES.
+
+# Dependencies
+package require Tcl 8.6
+package require http 2.8.4
+package require sqlite3
+package require tcl::idna 1.0
+
+#
+# Configuration for the cookiejar package, plus basic support procedures.
+#
+
+# This is the class that we are creating
+if {![llength [info commands ::http::cookiejar]]} {
+ ::oo::class create ::http::cookiejar
+}
+
+namespace eval [info object namespace ::http::cookiejar] {
+ proc setInt {*var val} {
+ upvar 1 ${*var} var
+ if {[catch {incr dummy $val} msg]} {
+ return -code error $msg
+ }
+ set var $val
+ }
+ proc setInterval {trigger *var val} {
+ upvar 1 ${*var} var
+ if {![string is integer -strict $val] || $val < 1} {
+ return -code error "expected positive integer but got \"$val\""
+ }
+ set var $val
+ {*}$trigger
+ }
+ proc setBool {*var val} {
+ upvar 1 ${*var} var
+ if {[catch {if {$val} {}} msg]} {
+ return -code error $msg
+ }
+ set var [expr {!!$val}]
+ }
+
+ proc setLog {*var val} {
+ upvar 1 ${*var} var
+ set var [::tcl::prefix match -message "log level" \
+ {debug info warn error} $val]
+ }
+
+ # Keep this in sync with pkgIndex.tcl and with the install directories in
+ # Makefiles
+ variable version 0.1
+
+ variable domainlist \
+ http://publicsuffix.org/list/effective_tld_names.dat
+ variable domainfile \
+ [file join [file dirname [info script]] effective_tld_names.txt.gz]
+ # The list is directed to from http://publicsuffix.org/list/
+ variable loglevel info
+ variable vacuumtrigger 200
+ variable retainlimit 100
+ variable offline false
+ variable purgeinterval 60000
+ variable refreshinterval 10000000
+ variable domaincache {}
+
+ # Some support procedures, none particularly useful in general
+ namespace eval support {
+ # Set up a logger if the http package isn't actually loaded yet.
+ if {![llength [info commands ::http::Log]]} {
+ proc ::http::Log args {
+ # Do nothing by default...
+ }
+ }
+
+ namespace export *
+ proc locn {secure domain path {key ""}} {
+ if {$key eq ""} {
+ format "%s://%s%s" [expr {$secure?"https":"http"}] \
+ [::tcl::idna encode $domain] $path
+ } else {
+ format "%s://%s%s?%s" \
+ [expr {$secure?"https":"http"}] [::tcl::idna encode $domain] \
+ $path $key
+ }
+ }
+ proc splitDomain domain {
+ set pieces [split $domain "."]
+ for {set i [llength $pieces]} {[incr i -1] >= 0} {} {
+ lappend result [join [lrange $pieces $i end] "."]
+ }
+ return $result
+ }
+ proc splitPath path {
+ set pieces [split [string trimleft $path "/"] "/"]
+ for {set j -1} {$j < [llength $pieces]} {incr j} {
+ lappend result /[join [lrange $pieces 0 $j] "/"]
+ }
+ return $result
+ }
+ proc isoNow {} {
+ set ms [clock milliseconds]
+ set ts [expr {$ms / 1000}]
+ set ms [format %03d [expr {$ms % 1000}]]
+ clock format $ts -format "%Y%m%dT%H%M%S.${ms}Z" -gmt 1
+ }
+ proc log {level msg args} {
+ namespace upvar [info object namespace ::http::cookiejar] \
+ loglevel loglevel
+ set who [uplevel 1 self class]
+ set mth [uplevel 1 self method]
+ set map {debug 0 info 1 warn 2 error 3}
+ if {[string map $map $level] >= [string map $map $loglevel]} {
+ set msg [format $msg {*}$args]
+ set LVL [string toupper $level]
+ ::http::Log "[isoNow] $LVL $who $mth - $msg"
+ }
+ }
+ }
+}
+
+# Now we have enough information to provide the package.
+package provide cookiejar \
+ [set [info object namespace ::http::cookiejar]::version]
+
+# The implementation of the cookiejar package
+::oo::define ::http::cookiejar {
+ self {
+ method configure {{optionName "\u0000\u0000"} {optionValue "\u0000\u0000"}} {
+ set tbl {
+ -domainfile {domainfile set}
+ -domainlist {domainlist set}
+ -domainrefresh {refreshinterval setInterval}
+ -loglevel {loglevel setLog}
+ -offline {offline setBool}
+ -purgeold {purgeinterval setInterval}
+ -retain {retainlimit setInt}
+ -vacuumtrigger {vacuumtrigger setInt}
+ }
+ dict lappend tbl -domainrefresh [namespace code {
+ my IntervalTrigger PostponeRefresh
+ }]
+ dict lappend tbl -purgeold [namespace code {
+ my IntervalTrigger PostponePurge
+ }]
+ if {$optionName eq "\u0000\u0000"} {
+ return [dict keys $tbl]
+ }
+ set opt [::tcl::prefix match -message "option" \
+ [dict keys $tbl] $optionName]
+ set setter [lassign [dict get $tbl $opt] varname]
+ namespace upvar [namespace current] $varname var
+ if {$optionValue ne "\u0000\u0000"} {
+ {*}$setter var $optionValue
+ }
+ return $var
+ }
+
+ method IntervalTrigger {method} {
+ # TODO: handle subclassing
+ foreach obj [info class instances [self]] {
+ [info object namespace $obj]::my $method
+ }
+ }
+ }
+
+ variable purgeTimer deletions refreshTimer
+ constructor {{path ""}} {
+ namespace import [info object namespace [self class]]::support::*
+
+ if {$path eq ""} {
+ sqlite3 [namespace current]::db :memory:
+ set storeorigin "constructed cookie store in memory"
+ } else {
+ sqlite3 [namespace current]::db $path
+ db timeout 500
+ set storeorigin "loaded cookie store from $path"
+ }
+
+ set deletions 0
+ db transaction {
+ db eval {
+ --;# Store the persistent cookies in this table.
+ --;# Deletion policy: once they expire, or if explicitly
+ --;# killed.
+ CREATE TABLE IF NOT EXISTS persistentCookies (
+ id INTEGER PRIMARY KEY,
+ secure INTEGER NOT NULL,
+ domain TEXT NOT NULL COLLATE NOCASE,
+ path TEXT NOT NULL,
+ key TEXT NOT NULL,
+ value TEXT NOT NULL,
+ originonly INTEGER NOT NULL,
+ expiry INTEGER NOT NULL,
+ lastuse INTEGER NOT NULL,
+ creation INTEGER NOT NULL);
+ CREATE UNIQUE INDEX IF NOT EXISTS persistentUnique
+ ON persistentCookies (domain, path, key);
+ CREATE INDEX IF NOT EXISTS persistentLookup
+ ON persistentCookies (domain, path);
+
+ --;# Store the session cookies in this table.
+ --;# Deletion policy: at cookiejar instance deletion, if
+ --;# explicitly killed, or if the number of session cookies is
+ --;# too large and the cookie has not been used recently.
+ CREATE TEMP TABLE sessionCookies (
+ id INTEGER PRIMARY KEY,
+ secure INTEGER NOT NULL,
+ domain TEXT NOT NULL COLLATE NOCASE,
+ path TEXT NOT NULL,
+ key TEXT NOT NULL,
+ originonly INTEGER NOT NULL,
+ value TEXT NOT NULL,
+ lastuse INTEGER NOT NULL,
+ creation INTEGER NOT NULL);
+ CREATE UNIQUE INDEX sessionUnique
+ ON sessionCookies (domain, path, key);
+ CREATE INDEX sessionLookup ON sessionCookies (domain, path);
+
+ --;# View to allow for simple looking up of a cookie.
+ --;# Deletion policy: NOT SUPPORTED via this view.
+ CREATE TEMP VIEW cookies AS
+ SELECT id, domain, (
+ CASE originonly WHEN 1 THEN path ELSE '.' || path END
+ ) AS path, key, value, secure, 1 AS persistent
+ FROM persistentCookies
+ UNION
+ SELECT id, domain, (
+ CASE originonly WHEN 1 THEN path ELSE '.' || path END
+ ) AS path, key, value, secure, 0 AS persistent
+ FROM sessionCookies;
+
+ --;# Encoded domain permission policy; if forbidden is 1, no
+ --;# cookie may be ever set for the domain, and if forbidden
+ --;# is 0, cookies *may* be created for the domain (overriding
+ --;# the forbiddenSuper table).
+ --;# Deletion policy: normally not modified.
+ CREATE TABLE IF NOT EXISTS domains (
+ domain TEXT PRIMARY KEY NOT NULL,
+ forbidden INTEGER NOT NULL);
+
+ --;# Domains that may not have a cookie defined for direct
+ --;# child domains of them.
+ --;# Deletion policy: normally not modified.
+ CREATE TABLE IF NOT EXISTS forbiddenSuper (
+ domain TEXT PRIMARY KEY);
+
+ --;# When we last retrieved the domain list.
+ CREATE TABLE IF NOT EXISTS domainCacheMetadata (
+ id INTEGER PRIMARY KEY,
+ retrievalDate INTEGER,
+ installDate INTEGER);
+ }
+
+ set cookieCount "no"
+ db eval {
+ SELECT COUNT(*) AS cookieCount FROM persistentCookies
+ }
+ log info "%s with %s entries" $storeorigin $cookieCount
+
+ my PostponePurge
+
+ if {$path ne ""} {
+ if {[db exists {SELECT 1 FROM domains}]} {
+ my RefreshDomains
+ } else {
+ my InitDomainList
+ my PostponeRefresh
+ }
+ } else {
+ set data [my GetDomainListOffline metadata]
+ my InstallDomainData $data $metadata
+ my PostponeRefresh
+ }
+ }
+ }
+
+ method PostponePurge {} {
+ namespace upvar [info object namespace [self class]] \
+ purgeinterval interval
+ catch {after cancel $purgeTimer}
+ set purgeTimer [after $interval [namespace code {my PurgeCookies}]]
+ }
+
+ method PostponeRefresh {} {
+ namespace upvar [info object namespace [self class]] \
+ refreshinterval interval
+ catch {after cancel $refreshTimer}
+ set refreshTimer [after $interval [namespace code {my RefreshDomains}]]
+ }
+
+ method RefreshDomains {} {
+ # TODO: domain list refresh policy
+ my PostponeRefresh
+ }
+
+ method HttpGet {url {timeout 0} {maxRedirects 5}} {
+ for {set r 0} {$r < $maxRedirects} {incr r} {
+ set tok [::http::geturl $url -timeout $timeout]
+ try {
+ if {[::http::status $tok] eq "timeout"} {
+ return -code error "connection timed out"
+ } elseif {[::http::ncode $tok] == 200} {
+ return [::http::data $tok]
+ } elseif {[::http::ncode $tok] >= 400} {
+ return -code error [::http::error $tok]
+ } elseif {[dict exists [::http::meta $tok] Location]} {
+ set url [dict get [::http::meta $tok] Location]
+ continue
+ }
+ return -code error \
+ "unexpected state: [::http::code $tok]"
+ } finally {
+ ::http::cleanup $tok
+ }
+ }
+ return -code error "too many redirects"
+ }
+ method GetDomainListOnline {metaVar} {
+ upvar 1 $metaVar meta
+ namespace upvar [info object namespace [self class]] \
+ domainlist url domaincache cache
+ lassign $cache when data
+ if {$when > [clock seconds] - 3600} {
+ log debug "using cached value created at %s" \
+ [clock format $when -format {%Y%m%dT%H%M%SZ} -gmt 1]
+ dict set meta retrievalDate $when
+ return $data
+ }
+ log debug "loading domain list from %s" $url
+ try {
+ set when [clock seconds]
+ set data [my HttpGet $url]
+ set cache [list $when $data]
+ # TODO: Should we use the Last-Modified header instead?
+ dict set meta retrievalDate $when
+ return $data
+ } on error msg {
+ log error "failed to fetch list of forbidden cookie domains from %s: %s" \
+ $url $msg
+ return {}
+ }
+ }
+ method GetDomainListOffline {metaVar} {
+ upvar 1 $metaVar meta
+ namespace upvar [info object namespace [self class]] \
+ domainfile filename
+ log debug "loading domain list from %s" $filename
+ try {
+ set f [open $filename]
+ try {
+ if {[string match *.gz $filename]} {
+ zlib push gunzip $f
+ }
+ fconfigure $f -encoding utf-8
+ dict set meta retrievalDate [file mtime $filename]
+ return [read $f]
+ } finally {
+ close $f
+ }
+ } on error {msg opt} {
+ log error "failed to read list of forbidden cookie domains from %s: %s" \
+ $filename $msg
+ return -options $opt $msg
+ }
+ }
+ method InitDomainList {} {
+ namespace upvar [info object namespace [self class]] \
+ offline offline
+ if {!$offline} {
+ try {
+ set data [my GetDomainListOnline metadata]
+ if {[string length $data]} {
+ my InstallDomainData $data $metadata
+ return
+ }
+ } on error {} {
+ log warn "attempting to fall back to built in version"
+ }
+ }
+ set data [my GetDomainListOffline metadata]
+ my InstallDomainData $data $metadata
+ }
+
+ method InstallDomainData {data meta} {
+ set n [db total_changes]
+ db transaction {
+ foreach line [split $data "\n"] {
+ if {[string trim $line] eq ""} {
+ continue
+ } elseif {[string match //* $line]} {
+ continue
+ } elseif {[string match !* $line]} {
+ set line [string range $line 1 end]
+ set idna [string tolower [::tcl::idna encode $line]]
+ set utf [::tcl::idna decode [string tolower $line]]
+ db eval {
+ INSERT OR REPLACE INTO domains (domain, forbidden)
+ VALUES ($utf, 0);
+ }
+ if {$idna ne $utf} {
+ db eval {
+ INSERT OR REPLACE INTO domains (domain, forbidden)
+ VALUES ($idna, 0);
+ }
+ }
+ } else {
+ if {[string match {\*.*} $line]} {
+ set line [string range $line 2 end]
+ set idna [string tolower [::tcl::idna encode $line]]
+ set utf [::tcl::idna decode [string tolower $line]]
+ db eval {
+ INSERT OR REPLACE INTO forbiddenSuper (domain)
+ VALUES ($utf);
+ }
+ if {$idna ne $utf} {
+ db eval {
+ INSERT OR REPLACE INTO forbiddenSuper (domain)
+ VALUES ($idna);
+ }
+ }
+ } else {
+ set idna [string tolower [::tcl::idna encode $line]]
+ set utf [::tcl::idna decode [string tolower $line]]
+ }
+ db eval {
+ INSERT OR REPLACE INTO domains (domain, forbidden)
+ VALUES ($utf, 1);
+ }
+ if {$idna ne $utf} {
+ db eval {
+ INSERT OR REPLACE INTO domains (domain, forbidden)
+ VALUES ($idna, 1);
+ }
+ }
+ }
+ if {$utf ne [::tcl::idna decode [string tolower $idna]]} {
+ log warn "mismatch in IDNA handling for %s (%d, %s, %s)" \
+ $idna $line $utf [::tcl::idna decode $idna]
+ }
+ }
+
+ dict with meta {
+ set installDate [clock seconds]
+ db eval {
+ INSERT OR REPLACE INTO domainCacheMetadata
+ (id, retrievalDate, installDate)
+ VALUES (1, $retrievalDate, $installDate);
+ }
+ }
+ }
+ set n [expr {[db total_changes] - $n}]
+ log info "constructed domain info with %d entries" $n
+ }
+
+ # This forces the rebuild of the domain data, loading it from
+ method forceLoadDomainData {} {
+ db transaction {
+ db eval {
+ DELETE FROM domains;
+ DELETE FROM forbiddenSuper;
+ INSERT OR REPLACE INTO domainCacheMetadata
+ (id, retrievalDate, installDate)
+ VALUES (1, -1, -1);
+ }
+ my InitDomainList
+ }
+ }
+
+ destructor {
+ catch {
+ after cancel $purgeTimer
+ }
+ catch {
+ after cancel $refreshTimer
+ }
+ catch {
+ db close
+ }
+ return
+ }
+
+ method GetCookiesForHostAndPath {listVar secure host path fullhost} {
+ upvar 1 $listVar result
+ log debug "check for cookies for %s" [locn $secure $host $path]
+ set exact [expr {$host eq $fullhost}]
+ db eval {
+ SELECT key, value FROM persistentCookies
+ WHERE domain = $host AND path = $path AND secure <= $secure
+ AND (NOT originonly OR domain = $fullhost)
+ AND originonly = $exact
+ } {
+ lappend result $key $value
+ db eval {
+ UPDATE persistentCookies SET lastuse = $now WHERE id = $id
+ }
+ }
+ set now [clock seconds]
+ db eval {
+ SELECT id, key, value FROM sessionCookies
+ WHERE domain = $host AND path = $path AND secure <= $secure
+ AND (NOT originonly OR domain = $fullhost)
+ AND originonly = $exact
+ } {
+ lappend result $key $value
+ db eval {
+ UPDATE sessionCookies SET lastuse = $now WHERE id = $id
+ }
+ }
+ }
+
+ method getCookies {proto host path} {
+ set result {}
+ set paths [splitPath $path]
+ if {[regexp {[^0-9.]} $host]} {
+ set domains [splitDomain [string tolower [::tcl::idna encode $host]]]
+ } else {
+ # Ugh, it's a numeric domain! Restrict it to just itself...
+ set domains [list $host]
+ }
+ set secure [string equal -nocase $proto "https"]
+ # Open question: how to move these manipulations into the database
+ # engine (if that's where they *should* be).
+ #
+ # Suggestion from kbk:
+ #LENGTH(theColumn) <= LENGTH($queryStr) AND
+ #SUBSTR(theColumn, LENGTH($queryStr) LENGTH(theColumn)+1) = $queryStr
+ #
+ # However, we instead do most of the work in Tcl because that lets us
+ # do the splitting exactly right, and it's far easier to work with
+ # strings in Tcl than in SQL.
+ db transaction {
+ foreach domain $domains {
+ foreach p $paths {
+ my GetCookiesForHostAndPath result $secure $domain $p $host
+ }
+ }
+ return $result
+ }
+ }
+
+ method BadDomain options {
+ if {![dict exists $options domain]} {
+ log error "no domain present in options"
+ return 0
+ }
+ dict with options {}
+ if {$domain ne $origin} {
+ log debug "cookie domain varies from origin (%s, %s)" \
+ $domain $origin
+ if {[string match .* $domain]} {
+ set dotd $domain
+ } else {
+ set dotd .$domain
+ }
+ if {![string equal -length [string length $dotd] \
+ [string reverse $dotd] [string reverse $origin]]} {
+ log warn "bad cookie: domain not suffix of origin"
+ return 1
+ }
+ }
+ if {![regexp {[^0-9.]} $domain]} {
+ if {$domain eq $origin} {
+ # May set for itself
+ return 0
+ }
+ log warn "bad cookie: for a numeric address"
+ return 1
+ }
+ db eval {
+ SELECT forbidden FROM domains WHERE domain = $domain
+ } {
+ if {$forbidden} {
+ log warn "bad cookie: for a forbidden address"
+ }
+ return $forbidden
+ }
+ if {[regexp {^[^.]+\.(.+)$} $domain -> super] && [db exists {
+ SELECT 1 FROM forbiddenSuper WHERE domain = $super
+ }]} then {
+ log warn "bad cookie: for a forbidden address"
+ return 1
+ }
+ return 0
+ }
+
+ # A defined extension point to allow users to easily impose extra policies
+ # on whether to accept cookies from a particular domain and path.
+ method policyAllow {operation domain path} {
+ return true
+ }
+
+ method storeCookie {options} {
+ db transaction {
+ if {[my BadDomain $options]} {
+ return
+ }
+ set now [clock seconds]
+ set persistent [dict exists $options expires]
+ dict with options {}
+ if {!$persistent} {
+ if {![my policyAllow session $domain $path]} {
+ log warn "bad cookie: $domain prohibited by user policy"
+ return
+ }
+ db eval {
+ INSERT OR REPLACE INTO sessionCookies (
+ secure, domain, path, key, value, originonly, creation,
+ lastuse)
+ VALUES ($secure, $domain, $path, $key, $value, $hostonly,
+ $now, $now);
+ DELETE FROM persistentCookies
+ WHERE domain = $domain AND path = $path AND key = $key
+ AND secure <= $secure AND originonly = $hostonly
+ }
+ incr deletions [db changes]
+ log debug "defined session cookie for %s" \
+ [locn $secure $domain $path $key]
+ } elseif {$expires < $now} {
+ if {![my policyAllow delete $domain $path]} {
+ log warn "bad cookie: $domain prohibited by user policy"
+ return
+ }
+ db eval {
+ DELETE FROM persistentCookies
+ WHERE domain = $domain AND path = $path AND key = $key
+ AND secure <= $secure AND originonly = $hostonly
+ }
+ set del [db changes]
+ db eval {
+ DELETE FROM sessionCookies
+ WHERE domain = $domain AND path = $path AND key = $key
+ AND secure <= $secure AND originonly = $hostonly
+ }
+ incr deletions [incr del [db changes]]
+ log debug "deleted %d cookies for %s" \
+ $del [locn $secure $domain $path $key]
+ } else {
+ if {![my policyAllow set $domain $path]} {
+ log warn "bad cookie: $domain prohibited by user policy"
+ return
+ }
+ db eval {
+ INSERT OR REPLACE INTO persistentCookies (
+ secure, domain, path, key, value, originonly, expiry,
+ creation, lastuse)
+ VALUES ($secure, $domain, $path, $key, $value, $hostonly,
+ $expires, $now, $now);
+ DELETE FROM sessionCookies
+ WHERE domain = $domain AND path = $path AND key = $key
+ AND secure <= $secure AND originonly = $hostonly
+ }
+ incr deletions [db changes]
+ log debug "defined persistent cookie for %s, expires at %s" \
+ [locn $secure $domain $path $key] \
+ [clock format $expires]
+ }
+ }
+ }
+
+ method PurgeCookies {} {
+ namespace upvar [info object namespace [self class]] \
+ vacuumtrigger trigger retainlimit retain
+ my PostponePurge
+ set now [clock seconds]
+ log debug "purging cookies that expired before %s" [clock format $now]
+ db transaction {
+ db eval {
+ DELETE FROM persistentCookies WHERE expiry < $now
+ }
+ incr deletions [db changes]
+ db eval {
+ DELETE FROM persistentCookies WHERE id IN (
+ SELECT id FROM persistentCookies ORDER BY lastuse ASC
+ LIMIT -1 OFFSET $retain)
+ }
+ incr deletions [db changes]
+ db eval {
+ DELETE FROM sessionCookies WHERE id IN (
+ SELECT id FROM sessionCookies ORDER BY lastuse
+ LIMIT -1 OFFSET $retain)
+ }
+ incr deletions [db changes]
+ }
+
+ # Once we've deleted a fair bit, vacuum the database. Must be done
+ # outside a transaction.
+ if {$deletions > $trigger} {
+ set deletions 0
+ log debug "vacuuming cookie database"
+ catch {
+ db eval {
+ VACUUM
+ }
+ }
+ }
+ }
+
+ forward Database db
+
+ method lookup {{host ""} {key ""}} {
+ set host [string tolower [::tcl::idna encode $host]]
+ db transaction {
+ if {$host eq ""} {
+ set result {}
+ db eval {
+ SELECT DISTINCT domain FROM cookies
+ ORDER BY domain
+ } {
+ lappend result [::tcl::idna decode [string tolower $domain]]
+ }
+ return $result
+ } elseif {$key eq ""} {
+ set result {}
+ db eval {
+ SELECT DISTINCT key FROM cookies
+ WHERE domain = $host
+ ORDER BY key
+ } {
+ lappend result $key
+ }
+ return $result
+ } else {
+ db eval {
+ SELECT value FROM cookies
+ WHERE domain = $host AND key = $key
+ LIMIT 1
+ } {
+ return $value
+ }
+ return -code error "no such key for that host"
+ }
+ }
+ }
+}
+
+# Local variables:
+# mode: tcl
+# fill-column: 78
+# End:
diff --git a/library/http/effective_tld_names.txt.gz b/library/http/effective_tld_names.txt.gz
new file mode 100644
index 0000000..9ce2b69
--- /dev/null
+++ b/library/http/effective_tld_names.txt.gz
Binary files differ
diff --git a/library/http/http.tcl b/library/http/http.tcl
index f82bced..7236bae 100644
--- a/library/http/http.tcl
+++ b/library/http/http.tcl
@@ -20,6 +20,7 @@ namespace eval http {
if {![info exists http]} {
array set http {
-accept */*
+ -cookiejar {}
-pipeline 1
-postfresh 0
-proxyhost {}
@@ -127,6 +128,18 @@ namespace eval http {
set defaultKeepalive 0
}
+ # Regular expression used to parse cookies
+ variable CookieRE {(?x) # EXPANDED SYNTAX
+ \s* # Ignore leading spaces
+ ([^][\u0000- ()<>@,;:\\""/?={}\u007f-\uffff]+) # Match the name
+ = # LITERAL: Equal sign
+ ([!\u0023-+\u002D-:<-\u005B\u005D-~]*) # Match the value
+ (?:
+ \s* ; \s* # LITERAL: semicolon
+ ([^\u0000]+) # Match the options
+ )?
+ }
+
namespace export geturl config reset wait formatQuery quoteString
namespace export register unregister registerError
# - Useful, but not exported: data, size, status, code, cleanup, error,
@@ -892,8 +905,12 @@ proc http::geturl {url args} {
}
return -code error "Illegal characters in URL path"
}
+ if {![regexp {^[^?#]+} $srvurl state(path)]} {
+ set state(path) /
+ }
} else {
set srvurl /
+ set state(path) /
}
if {$proto eq ""} {
set proto http
@@ -1354,12 +1371,16 @@ proc http::Connected {token proto phost srvurl} {
puts $sock "$how $srvurl HTTP/$state(-protocol)"
if {[dict exists $state(-headers) Host]} {
# Allow Host spoofing. [Bug 928154]
- puts $sock "Host: [dict get $state(-headers) Host]"
+ set hostHdr [dict get $state(-headers) Host]
+ regexp {^[^:]+} $hostHdr state(host)
+ puts $sock "Host: $hostHdr"
} elseif {$port == $defport} {
# Don't add port in this case, to handle broken servers. [Bug
# #504508]
+ set state(host) $host
puts $sock "Host: $host"
} else {
+ set state(host) $host
puts $sock "Host: $host:$port"
}
puts $sock "User-Agent: $http(-useragent)"
@@ -1421,6 +1442,22 @@ proc http::Connected {token proto phost srvurl} {
seek $state(-querychannel) $start
}
+ # Note that we don't do Cookie2; that's much nastier and not normally
+ # observed in practice either. It also doesn't fix the multitude of
+ # bugs in the basic cookie spec.
+ if {$http(-cookiejar) ne ""} {
+ set cookies ""
+ set separator ""
+ foreach {key value} [{*}$http(-cookiejar) \
+ getCookies $proto $host $state(path)] {
+ append cookies $separator $key = $value
+ set separator "; "
+ }
+ if {$cookies ne ""} {
+ puts $sock "Cookie: $cookies"
+ }
+ }
+
# Flush the request header and set up the fileevent that will either
# push the POST data or read the response.
#
@@ -2693,6 +2730,11 @@ proc http::Event {sock token} {
set state(connection) \
[string trim [string tolower $value]]
}
+ set-cookie {
+ if {$http(-cookiejar) ne ""} {
+ ParseCookie $token [string trim $value]
+ }
+ }
}
lappend state(meta) $key [string trim $value]
}
@@ -2990,6 +3032,83 @@ proc http::IsBinaryContentType {type} {
return true
}
+proc http::ParseCookie {token value} {
+ variable http
+ variable CookieRE
+ variable $token
+ upvar 0 $token state
+
+ if {![regexp $CookieRE $value -> cookiename cookieval opts]} {
+ # Bad cookie! No biscuit!
+ return
+ }
+
+ # Convert the options into a list before feeding into the cookie store;
+ # ugly, but quite easy.
+ set realopts {hostonly 1 path / secure 0 httponly 0}
+ dict set realopts origin $state(host)
+ dict set realopts domain $state(host)
+ foreach option [split [regsub -all {;\s+} $opts \u0000] \u0000] {
+ regexp {^(.*?)(?:=(.*))?$} $option -> optname optval
+ switch -exact -- [string tolower $optname] {
+ expires {
+ if {[catch {
+ #Sun, 06 Nov 1994 08:49:37 GMT
+ dict set realopts expires \
+ [clock scan $optval -format "%a, %d %b %Y %T %Z"]
+ }] && [catch {
+ # Google does this one
+ #Mon, 01-Jan-1990 00:00:00 GMT
+ dict set realopts expires \
+ [clock scan $optval -format "%a, %d-%b-%Y %T %Z"]
+ }] && [catch {
+ # This is in the RFC, but it is also in the original
+ # Netscape cookie spec, now online at:
+ # <URL:http://curl.haxx.se/rfc/cookie_spec.html>
+ #Sunday, 06-Nov-94 08:49:37 GMT
+ dict set realopts expires \
+ [clock scan $optval -format "%A, %d-%b-%y %T %Z"]
+ }]} {catch {
+ #Sun Nov 6 08:49:37 1994
+ dict set realopts expires \
+ [clock scan $optval -gmt 1 -format "%a %b %d %T %Y"]
+ }}
+ }
+ max-age {
+ # Normalize
+ if {[string is integer -strict $optval]} {
+ dict set realopts expires [expr {[clock seconds] + $optval}]
+ }
+ }
+ domain {
+ # From the domain-matches definition [RFC 2109, section 2]:
+ # Host A's name domain-matches host B's if [...]
+ # A is a FQDN string and has the form NB, where N is a
+ # non-empty name string, B has the form .B', and B' is a
+ # FQDN string. (So, x.y.com domain-matches .y.com but
+ # not y.com.)
+ if {$optval ne "" && ![string match *. $optval]} {
+ dict set realopts domain [string trimleft $optval "."]
+ dict set realopts hostonly [expr {
+ ! [string match .* $optval]
+ }]
+ }
+ }
+ path {
+ if {[string match /* $optval]} {
+ dict set realopts path $optval
+ }
+ }
+ secure - httponly {
+ dict set realopts [string tolower $optname] 1
+ }
+ }
+ }
+ dict set realopts key $cookiename
+ dict set realopts value $cookieval
+ {*}$http(-cookiejar) storeCookie $realopts
+}
+
# http::getTextLine --
#
# Get one line with the stream in crlf mode.
diff --git a/library/http/idna.tcl b/library/http/idna.tcl
new file mode 100644
index 0000000..2a7d289
--- /dev/null
+++ b/library/http/idna.tcl
@@ -0,0 +1,292 @@
+# cookiejar.tcl --
+#
+# Implementation of IDNA (Internationalized Domain Names for
+# Applications) encoding/decoding system, built on a punycode engine
+# developed directly from the code in RFC 3492, Appendix C (with
+# substantial modifications).
+#
+# This implementation includes code from that RFC, translated to Tcl; the
+# other parts are:
+# Copyright (c) 2014 Donal K. Fellows
+#
+# See the file "license.terms" for information on usage and redistribution of
+# this file, and for a DISCLAIMER OF ALL WARRANTIES.
+
+namespace eval ::tcl::idna {
+ namespace ensemble create -command puny -map {
+ encode punyencode
+ decode punydecode
+ }
+ namespace ensemble create -command ::tcl::idna -map {
+ encode IDNAencode
+ decode IDNAdecode
+ puny puny
+ version {::apply {{} {package present tcl::idna} ::}}
+ }
+
+ proc IDNAencode hostname {
+ set parts {}
+ # Split term from RFC 3490, Sec 3.1
+ foreach part [split $hostname "\u002E\u3002\uFF0E\uFF61"] {
+ if {[regexp {[^-A-Za-z0-9]} $part]} {
+ if {[regexp {[^-A-Za-z0-9\u00a1-\uffff]} $part ch]} {
+ scan $ch %c c
+ if {$ch < "!" || $ch > "~"} {
+ set ch [format "\\u%04x" $c]
+ }
+ throw [list IDNA INVALID_NAME_CHARACTER $ch] \
+ "bad character \"$ch\" in DNS name"
+ }
+ set part xn--[punyencode $part]
+ # Length restriction from RFC 5890, Sec 2.3.1
+ if {[string length $part] > 63} {
+ throw [list IDNA OVERLONG_PART $part] \
+ "hostname part too long"
+ }
+ }
+ lappend parts $part
+ }
+ return [join $parts .]
+ }
+ proc IDNAdecode hostname {
+ set parts {}
+ # Split term from RFC 3490, Sec 3.1
+ foreach part [split $hostname "\u002E\u3002\uFF0E\uFF61"] {
+ if {[string match -nocase "xn--*" $part]} {
+ set part [punydecode [string range $part 4 end]]
+ }
+ lappend parts $part
+ }
+ return [join $parts .]
+ }
+
+ variable digits [split "abcdefghijklmnopqrstuvwxyz0123456789" ""]
+ # Bootstring parameters for Punycode
+ variable base 36
+ variable tmin 1
+ variable tmax 26
+ variable skew 38
+ variable damp 700
+ variable initial_bias 72
+ variable initial_n 0x80
+
+ variable max_codepoint 0x10FFFF
+
+ proc adapt {delta first numchars} {
+ variable base
+ variable tmin
+ variable tmax
+ variable damp
+ variable skew
+
+ set delta [expr {$delta / ($first ? $damp : 2)}]
+ incr delta [expr {$delta / $numchars}]
+ set k 0
+ while {$delta > ($base - $tmin) * $tmax / 2} {
+ set delta [expr {$delta / ($base-$tmin)}]
+ incr k $base
+ }
+ return [expr {$k + ($base-$tmin+1) * $delta / ($delta+$skew)}]
+ }
+
+ # Main punycode encoding function
+ proc punyencode {string {case ""}} {
+ variable digits
+ variable tmin
+ variable tmax
+ variable base
+ variable initial_n
+ variable initial_bias
+
+ if {![string is boolean $case]} {
+ return -code error "\"$case\" must be boolean"
+ }
+
+ set in {}
+ foreach char [set string [split $string ""]] {
+ scan $char "%c" ch
+ lappend in $ch
+ }
+ set output {}
+
+ # Initialize the state:
+ set n $initial_n
+ set delta 0
+ set bias $initial_bias
+
+ # Handle the basic code points:
+ foreach ch $string {
+ if {$ch < "\u0080"} {
+ if {$case eq ""} {
+ append output $ch
+ } elseif {[string is true $case]} {
+ append output [string toupper $ch]
+ } elseif {[string is false $case]} {
+ append output [string tolower $ch]
+ }
+ }
+ }
+
+ set b [string length $output]
+
+ # h is the number of code points that have been handled, b is the
+ # number of basic code points.
+
+ if {$b > 0} {
+ append output "-"
+ }
+
+ # Main encoding loop:
+
+ for {set h $b} {$h < [llength $in]} {incr delta; incr n} {
+ # All non-basic code points < n have been handled already. Find
+ # the next larger one:
+
+ set m inf
+ foreach ch $in {
+ if {$ch >= $n && $ch < $m} {
+ set m $ch
+ }
+ }
+
+ # Increase delta enough to advance the decoder's <n,i> state to
+ # <m,0>, but guard against overflow:
+
+ if {$m-$n > (0xffffffff-$delta)/($h+1)} {
+ throw {PUNYCODE OVERFLOW} "overflow in delta computation"
+ }
+ incr delta [expr {($m-$n) * ($h+1)}]
+ set n $m
+
+ foreach ch $in {
+ if {$ch < $n && ([incr delta] & 0xffffffff) == 0} {
+ throw {PUNYCODE OVERFLOW} "overflow in delta computation"
+ }
+
+ if {$ch != $n} {
+ continue
+ }
+
+ # Represent delta as a generalized variable-length integer:
+
+ for {set q $delta; set k $base} true {incr k $base} {
+ set t [expr {min(max($k-$bias, $tmin), $tmax)}]
+ if {$q < $t} {
+ break
+ }
+ append output \
+ [lindex $digits [expr {$t + ($q-$t)%($base-$t)}]]
+ set q [expr {($q-$t) / ($base-$t)}]
+ }
+
+ append output [lindex $digits $q]
+ set bias [adapt $delta [expr {$h==$b}] [expr {$h+1}]]
+ set delta 0
+ incr h
+ }
+ }
+
+ return $output
+ }
+
+ # Main punycode decode function
+ proc punydecode {string {case ""}} {
+ variable tmin
+ variable tmax
+ variable base
+ variable initial_n
+ variable initial_bias
+ variable max_codepoint
+
+ if {![string is boolean $case]} {
+ return -code error "\"$case\" must be boolean"
+ }
+
+ # Initialize the state:
+
+ set n $initial_n
+ set i 0
+ set first 1
+ set bias $initial_bias
+
+ # Split the string into the "real" ASCII characters and the ones to
+ # feed into the main decoder. Note that we don't need to check the
+ # result of [regexp] because that RE will technically match any string
+ # at all.
+
+ regexp {^(?:(.*)-)?([^-]*)$} $string -> pre post
+ if {[string is true -strict $case]} {
+ set pre [string toupper $pre]
+ } elseif {[string is false -strict $case]} {
+ set pre [string tolower $pre]
+ }
+ set output [split $pre ""]
+ set out [llength $output]
+
+ # Main decoding loop:
+
+ for {set in 0} {$in < [string length $post]} {incr in} {
+ # Decode a generalized variable-length integer into delta, which
+ # gets added to i. The overflow checking is easier if we increase
+ # i as we go, then subtract off its starting value at the end to
+ # obtain delta.
+
+ for {set oldi $i; set w 1; set k $base} 1 {incr in} {
+ if {[set ch [string index $post $in]] eq ""} {
+ throw {PUNYCODE BAD_INPUT LENGTH} "exceeded input data"
+ }
+ if {[string match -nocase {[a-z]} $ch]} {
+ scan [string toupper $ch] %c digit
+ incr digit -65
+ } elseif {[string match {[0-9]} $ch]} {
+ set digit [expr {$ch + 26}]
+ } else {
+ throw {PUNYCODE BAD_INPUT CHAR} \
+ "bad decode character \"$ch\""
+ }
+ incr i [expr {$digit * $w}]
+ set t [expr {min(max($tmin, $k-$bias), $tmax)}]
+ if {$digit < $t} {
+ set bias [adapt [expr {$i-$oldi}] $first [incr out]]
+ set first 0
+ break
+ }
+ if {[set w [expr {$w * ($base - $t)}]] > 0x7fffffff} {
+ throw {PUNYCODE OVERFLOW} \
+ "excessively large integer computed in digit decode"
+ }
+ incr k $base
+ }
+
+ # i was supposed to wrap around from out+1 to 0, incrementing n
+ # each time, so we'll fix that now:
+
+ if {[incr n [expr {$i / $out}]] > 0x7fffffff} {
+ throw {PUNYCODE OVERFLOW} \
+ "excessively large integer computed in character choice"
+ } elseif {$n > $max_codepoint} {
+ if {$n >= 0x00d800 && $n < 0x00e000} {
+ # Bare surrogate?!
+ throw {PUNYCODE NON_BMP} \
+ [format "unsupported character U+%06x" $n]
+ }
+ throw {PUNYCODE NON_UNICODE} "bad codepoint $n"
+ }
+ set i [expr {$i % $out}]
+
+ # Insert n at position i of the output:
+
+ set output [linsert $output $i [format "%c" $n]]
+ incr i
+ }
+
+ return [join $output ""]
+ }
+}
+
+package provide tcl::idna 1.0
+
+# Local variables:
+# mode: tcl
+# fill-column: 78
+# End:
diff --git a/library/http/pkgIndex.tcl b/library/http/pkgIndex.tcl
index 4f74635..3bc111f 100644
--- a/library/http/pkgIndex.tcl
+++ b/library/http/pkgIndex.tcl
@@ -1,2 +1,4 @@
if {![package vsatisfies [package provide Tcl] 8.6-]} {return}
package ifneeded http 2.9.0 [list tclPkgSetup $dir http 2.9.0 {{http.tcl source {::http::config ::http::formatQuery ::http::geturl ::http::reset ::http::wait ::http::register ::http::unregister ::http::mapReply}}}]
+package ifneeded cookiejar 0.1 [list source [file join $dir cookiejar.tcl]]
+package ifneeded tcl::idna 1.0 [list source [file join $dir idna.tcl]]