# http.tcl --
#
#	Client-side HTTP for GET, POST, and HEAD commands.
#	These routines can be used in untrusted code that uses 
#	the Safesock security policy.  These procedures use a 
#	callback interface to avoid using vwait, which is not 
#	defined in the safe base.
#
# See the file "license.terms" for information on usage and
# redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
# RCS: @(#) $Id: http.tcl,v 1.47 2003/09/29 10:01:33 dkf Exp $

# Rough version history:
# 1.0	Old http_get interface
# 2.0	http:: namespace and http::geturl
# 2.1	Added callbacks to handle arriving data, and timeouts
# 2.2	Added ability to fetch into a channel
# 2.3	Added SSL support, and ability to post from a channel
#	This version also cleans up error cases and eliminates the
#	"ioerror" status in favor of raising an error
# 2.4	Added -binary option to http::geturl and charset element
#	to the state array.

package require Tcl 8.2
# keep this in sync with pkgIndex.tcl
# and with the install directories in Makefiles
package provide http 2.4.4

namespace eval http {
    variable http
    array set http {
	-accept */*
	-proxyhost {}
	-proxyport {}
	-proxyfilter http::ProxyRequired
    }
    set http(-useragent) "Tcl http client package [package provide http]"

    proc init {} {
	variable formMap
	variable alphanumeric a-zA-Z0-9
	for {set i 0} {$i <= 256} {incr i} {
	    set c [format %c $i]
	    if {![string match \[$alphanumeric\] $c]} {
		set formMap($c) %[format %.2x $i]
	    }
	}
	# These are handled specially
	array set formMap { " " + \n %0d%0a }
    }
    init

    variable urlTypes
    array set urlTypes {
	http	{80 ::socket}
    }

    variable encodings [string tolower [encoding names]]
    # This can be changed, but iso8859-1 is the RFC standard.
    variable defaultCharset "iso8859-1"

    namespace export geturl config reset wait formatQuery register unregister
    # Useful, but not exported: data size status code
}

# http::register --
#
#     See documentation for details.
#
# Arguments:
#     proto           URL protocol prefix, e.g. https
#     port            Default port for protocol
#     command         Command to use to create socket
# Results:
#     list of port and command that was registered.

proc http::register {proto port command} {
    variable urlTypes
    set urlTypes($proto) [list $port $command]
}

# http::unregister --
#
#     Unregisters URL protocol handler
#
# Arguments:
#     proto           URL protocol prefix, e.g. https
# Results:
#     list of port and command that was unregistered.

proc http::unregister {proto} {
    variable urlTypes
    if {![info exists urlTypes($proto)]} {
	return -code error "unsupported url type \"$proto\""
    }
    set old $urlTypes($proto)
    unset urlTypes($proto)
    return $old
}

# http::config --
#
#	See documentation for details.
#
# Arguments:
#	args		Options parsed by the procedure.
# Results:
#        TODO

proc http::config {args} {
    variable http
    set options [lsort [array names http -*]]
    set usage [join $options ", "]
    if {[llength $args] == 0} {
	set result {}
	foreach name $options {
	    lappend result $name $http($name)
	}
	return $result
    }
    set options [string map {- ""} $options]
    set pat ^-([join $options |])$
    if {[llength $args] == 1} {
	set flag [lindex $args 0]
	if {[regexp -- $pat $flag]} {
	    return $http($flag)
	} else {
	    return -code error "Unknown option $flag, must be: $usage"
	}
    } else {
	foreach {flag value} $args {
	    if {[regexp -- $pat $flag]} {
		set http($flag) $value
	    } else {
		return -code error "Unknown option $flag, must be: $usage"
	    }
	}
    }
}

# http::Finish --
#
#	Clean up the socket and eval close time callbacks
#
# Arguments:
#	token	    Connection token.
#	errormsg    (optional) If set, forces status to error.
#       skipCB      (optional) If set, don't call the -command callback.  This
#                   is useful when geturl wants to throw an exception instead
#                   of calling the callback.  That way, the same error isn't
#                   reported to two places.
#
# Side Effects:
#        Closes the socket

proc http::Finish { token {errormsg ""} {skipCB 0}} {
    variable $token
    upvar 0 $token state
    global errorInfo errorCode
    if {[string length $errormsg] != 0} {
	set state(error) [list $errormsg $errorInfo $errorCode]
	set state(status) error
    }
    catch {close $state(sock)}
    catch {after cancel $state(after)}
    if {[info exists state(-command)] && !$skipCB} {
	if {[catch {eval $state(-command) {$token}} err]} {
	    if {[string length $errormsg] == 0} {
		set state(error) [list $err $errorInfo $errorCode]
		set state(status) error
	    }
	}
	if {[info exists state(-command)]} {
	    # Command callback may already have unset our state
	    unset state(-command)
	}
    }
}

# http::reset --
#
#	See documentation for details.
#
# Arguments:
#	token	Connection token.
#	why	Status info.
#
# Side Effects:
#       See Finish

proc http::reset { token {why reset} } {
    variable $token
    upvar 0 $token state
    set state(status) $why
    catch {fileevent $state(sock) readable {}}
    catch {fileevent $state(sock) writable {}}
    Finish $token
    if {[info exists state(error)]} {
	set errorlist $state(error)
	unset state
	eval ::error $errorlist
    }
}

# http::geturl --
#
#	Establishes a connection to a remote url via http.
#
# Arguments:
#       url		The http URL to goget.
#       args		Option value pairs. Valid options include:
#				-blocksize, -validate, -headers, -timeout
# Results:
#	Returns a token for this connection.
#	This token is the name of an array that the caller should
#	unset to garbage collect the state.

proc http::geturl { url args } {
    variable http
    variable urlTypes
    variable defaultCharset

    # Initialize the state variable, an array.  We'll return the
    # name of this array as the token for the transaction.

    if {![info exists http(uid)]} {
	set http(uid) 0
    }
    set token [namespace current]::[incr http(uid)]
    variable $token
    upvar 0 $token state
    reset $token

    # Process command options.

    array set state {
	-binary		false
	-blocksize 	8192
	-queryblocksize 8192
	-validate 	false
	-headers 	{}
	-timeout 	0
	-type           application/x-www-form-urlencoded
	-queryprogress	{}
	state		header
	meta		{}
	coding		{}
	currentsize	0
	totalsize	0
	querylength	0
	queryoffset	0
        type            text/html
        body            {}
	status		""
	http            ""
    }
    # These flags have their types verified [Bug 811170]
    array set type {
	-binary		boolean
	-blocksize	integer
	-queryblocksize integer
	-validate	boolean
	-timeout	integer
    }	
    set state(charset)	$defaultCharset
    set options {-binary -blocksize -channel -command -handler -headers \
	    -progress -query -queryblocksize -querychannel -queryprogress\
	    -validate -timeout -type}
    set usage [join $options ", "]
    set options [string map {- ""} $options]
    set pat ^-([join $options |])$
    foreach {flag value} $args {
	if {[regexp $pat $flag]} {
	    # Validate numbers and booleans
	    if {[info exists type($flag)] && \
		    ![string is $type($flag) -strict $value]} {
		unset $token
		return -code error "Bad value for $flag ($value), must be $type($flag)"
	    }
	    set state($flag) $value
	} else {
	    unset $token
	    return -code error "Unknown option $flag, can be: $usage"
	}
    }

    # Make sure -query and -querychannel aren't both specified

    set isQueryChannel [info exists state(-querychannel)]
    set isQuery [info exists state(-query)]
    if {$isQuery && $isQueryChannel} {
	unset $token
	return -code error "Can't combine -query and -querychannel options!"
    }

    # Validate URL, determine the server host and port, and check proxy case
    # Recognize user:pass@host URLs also, although we do not do anything
    # with that info yet.

    set exp {^(([^:]*)://)?([^@]+@)?([^/:]+)(:([0-9]+))?(/.*)?$}
    if {![regexp -nocase $exp $url x prefix proto user host y port srvurl]} {
	unset $token
	return -code error "Unsupported URL: $url"
    }
    if {[string length $proto] == 0} {
	set proto http
	set url ${proto}://$url
    }
    if {![info exists urlTypes($proto)]} {
	unset $token
	return -code error "Unsupported URL type \"$proto\""
    }
    set defport [lindex $urlTypes($proto) 0]
    set defcmd [lindex $urlTypes($proto) 1]

    if {[string length $port] == 0} {
	set port $defport
    }
    if {[string length $srvurl] == 0} {
	set srvurl /
    }
    if {[string length $proto] == 0} {
	set url http://$url
    }
    set state(url) $url
    if {![catch {$http(-proxyfilter) $host} proxy]} {
	set phost [lindex $proxy 0]
	set pport [lindex $proxy 1]
    }

    # If a timeout is specified we set up the after event
    # and arrange for an asynchronous socket connection.

    if {$state(-timeout) > 0} {
	set state(after) [after $state(-timeout) \
		[list http::reset $token timeout]]
	set async -async
    } else {
	set async ""
    }

    # If we are using the proxy, we must pass in the full URL that
    # includes the server name.

    if {[info exists phost] && [string length $phost]} {
	set srvurl $url
	set conStat [catch {eval $defcmd $async {$phost $pport}} s]
    } else {
	set conStat [catch {eval $defcmd $async {$host $port}} s]
    }
    if {$conStat} {

	# something went wrong while trying to establish the connection
	# Clean up after events and such, but DON'T call the command callback
	# (if available) because we're going to throw an exception from here
	# instead.
	Finish $token "" 1
	cleanup $token
	return -code error $s
    }
    set state(sock) $s

    # Wait for the connection to complete

    if {$state(-timeout) > 0} {
	fileevent $s writable [list http::Connect $token]
	http::wait $token

	if {[string equal $state(status) "error"]} {
	    # something went wrong while trying to establish the connection
	    # Clean up after events and such, but DON'T call the command
	    # callback (if available) because we're going to throw an 
	    # exception from here instead.
	    set err [lindex $state(error) 0]
	    cleanup $token
	    return -code error $err
	} elseif {![string equal $state(status) "connect"]} {
	    # Likely to be connection timeout
	    return $token
	}
	set state(status) ""
    }

    # Send data in cr-lf format, but accept any line terminators

    fconfigure $s -translation {auto crlf} -buffersize $state(-blocksize)

    # The following is disallowed in safe interpreters, but the socket
    # is already in non-blocking mode in that case.

    catch {fconfigure $s -blocking off}
    set how GET
    if {$isQuery} {
	set state(querylength) [string length $state(-query)]
	if {$state(querylength) > 0} {
	    set how POST
	    set contDone 0
	} else {
	    # there's no query data
	    unset state(-query)
	    set isQuery 0
	}
    } elseif {$state(-validate)} {
	set how HEAD
    } elseif {$isQueryChannel} {
	set how POST
	# The query channel must be blocking for the async Write to
	# work properly.
	fconfigure $state(-querychannel) -blocking 1 -translation binary
	set contDone 0
    }

    if {[catch {
	puts $s "$how $srvurl HTTP/1.0"
	puts $s "Accept: $http(-accept)"
	if {$port == $defport} {
	    # Don't add port in this case, to handle broken servers.
	    # [Bug #504508]
	    puts $s "Host: $host"
	} else {
	    puts $s "Host: $host:$port"
	}
	puts $s "User-Agent: $http(-useragent)"
	foreach {key value} $state(-headers) {
	    set value [string map [list \n "" \r ""] $value]
	    set key [string trim $key]
	    if {[string equal $key "Content-Length"]} {
		set contDone 1
		set state(querylength) $value
	    }
	    if {[string length $key]} {
		puts $s "$key: $value"
	    }
	}
	if {$isQueryChannel && $state(querylength) == 0} {
	    # Try to determine size of data in channel
	    # If we cannot seek, the surrounding catch will trap us

	    set start [tell $state(-querychannel)]
	    seek $state(-querychannel) 0 end
	    set state(querylength) \
		    [expr {[tell $state(-querychannel)] - $start}]
	    seek $state(-querychannel) $start
	}

	# Flush the request header and set up the fileevent that will
	# either push the POST data or read the response.
	#
	# fileevent note:
	#
	# It is possible to have both the read and write fileevents active
	# at this point.  The only scenario it seems to affect is a server
	# that closes the connection without reading the POST data.
	# (e.g., early versions TclHttpd in various error cases).
	# Depending on the platform, the client may or may not be able to
	# get the response from the server because of the error it will
	# get trying to write the post data.  Having both fileevents active
	# changes the timing and the behavior, but no two platforms
	# (among Solaris, Linux, and NT)  behave the same, and none 
	# behave all that well in any case.  Servers should always read thier
	# POST data if they expect the client to read their response.
		
	if {$isQuery || $isQueryChannel} {
	    puts $s "Content-Type: $state(-type)"
	    if {!$contDone} {
		puts $s "Content-Length: $state(querylength)"
	    }
	    puts $s ""
	    fconfigure $s -translation {auto binary}
	    fileevent $s writable [list http::Write $token]
	} else {
	    puts $s ""
	    flush $s
	    fileevent $s readable [list http::Event $token]
	}

	if {! [info exists state(-command)]} {

	    # geturl does EVERYTHING asynchronously, so if the user
	    # calls it synchronously, we just do a wait here.

	    wait $token
	    if {[string equal $state(status) "error"]} {
		# Something went wrong, so throw the exception, and the
		# enclosing catch will do cleanup.
		return -code error [lindex $state(error) 0]
	    }		
	}
    } err]} {
	# The socket probably was never connected,
	# or the connection dropped later.

	# Clean up after events and such, but DON'T call the command callback
	# (if available) because we're going to throw an exception from here
	# instead.
	
	# if state(status) is error, it means someone's already called Finish
	# to do the above-described clean up.
	if {[string equal $state(status) "error"]} {
	    Finish $token $err 1
	}
	cleanup $token
	return -code error $err
    }

    return $token
}

# Data access functions:
# Data - the URL data
# Status - the transaction status: ok, reset, eof, timeout
# Code - the HTTP transaction code, e.g., 200
# Size - the size of the URL data

proc http::data {token} {
    variable $token
    upvar 0 $token state
    return $state(body)
}
proc http::status {token} {
    variable $token
    upvar 0 $token state
    return $state(status)
}
proc http::code {token} {
    variable $token
    upvar 0 $token state
    return $state(http)
}
proc http::ncode {token} {
    variable $token
    upvar 0 $token state
    if {[regexp {[0-9]{3}} $state(http) numeric_code]} {
	return $numeric_code
    } else {
	return $state(http)
    }
}
proc http::size {token} {
    variable $token
    upvar 0 $token state
    return $state(currentsize)
}

proc http::error {token} {
    variable $token
    upvar 0 $token state
    if {[info exists state(error)]} {
	return $state(error)
    }
    return ""
}

# http::cleanup
#
#	Garbage collect the state associated with a transaction
#
# Arguments
#	token	The token returned from http::geturl
#
# Side Effects
#	unsets the state array

proc http::cleanup {token} {
    variable $token
    upvar 0 $token state
    if {[info exists state]} {
	unset state
    }
}

# http::Connect
#
#	This callback is made when an asyncronous connection completes.
#
# Arguments
#	token	The token returned from http::geturl
#
# Side Effects
#	Sets the status of the connection, which unblocks
# 	the waiting geturl call

proc http::Connect {token} {
    variable $token
    upvar 0 $token state
    global errorInfo errorCode
    if {[eof $state(sock)] ||
	[string length [fconfigure $state(sock) -error]]} {
	    Finish $token "connect failed [fconfigure $state(sock) -error]" 1
    } else {
	set state(status) connect
	fileevent $state(sock) writable {}
    }
    return
}

# http::Write
#
#	Write POST query data to the socket
#
# Arguments
#	token	The token for the connection
#
# Side Effects
#	Write the socket and handle callbacks.

proc http::Write {token} {
    variable $token
    upvar 0 $token state
    set s $state(sock)
    
    # Output a block.  Tcl will buffer this if the socket blocks
    
    set done 0
    if {[catch {
	
	# Catch I/O errors on dead sockets

	if {[info exists state(-query)]} {
	    
	    # Chop up large query strings so queryprogress callback
	    # can give smooth feedback

	    puts -nonewline $s \
		    [string range $state(-query) $state(queryoffset) \
		    [expr {$state(queryoffset) + $state(-queryblocksize) - 1}]]
	    incr state(queryoffset) $state(-queryblocksize)
	    if {$state(queryoffset) >= $state(querylength)} {
		set state(queryoffset) $state(querylength)
		set done 1
	    }
	} else {
	    
	    # Copy blocks from the query channel

	    set outStr [read $state(-querychannel) $state(-queryblocksize)]
	    puts -nonewline $s $outStr
	    incr state(queryoffset) [string length $outStr]
	    if {[eof $state(-querychannel)]} {
		set done 1
	    }
	}
    } err]} {
	# Do not call Finish here, but instead let the read half of
	# the socket process whatever server reply there is to get.

	set state(posterror) $err
	set done 1
    }
    if {$done} {
	catch {flush $s}
	fileevent $s writable {}
	fileevent $s readable [list http::Event $token]
    }

    # Callback to the client after we've completely handled everything

    if {[string length $state(-queryprogress)]} {
	eval $state(-queryprogress) [list $token $state(querylength)\
		$state(queryoffset)]
    }
}

# http::Event
#
#	Handle input on the socket
#
# Arguments
#	token	The token returned from http::geturl
#
# Side Effects
#	Read the socket and handle callbacks.

proc http::Event {token} {
    variable $token
    upvar 0 $token state
    set s $state(sock)

     if {[eof $s]} {
	Eof $token
	return
    }
    if {[string equal $state(state) "header"]} {
	if {[catch {gets $s line} n]} {
	    Finish $token $n
	} elseif {$n == 0} {
	    variable encodings
	    set state(state) body
	    if {$state(-binary) || ![string match -nocase text* $state(type)]
		    || [string match *gzip* $state(coding)]
		    || [string match *compress* $state(coding)]} {
		# Turn off conversions for non-text data
		fconfigure $s -translation binary
		if {[info exists state(-channel)]} {
		    fconfigure $state(-channel) -translation binary
		}
	    } else {
		# If we are getting text, set the incoming channel's
		# encoding correctly.  iso8859-1 is the RFC default, but
		# this could be any IANA charset.  However, we only know
		# how to convert what we have encodings for.
		set idx [lsearch -exact $encodings \
			[string tolower $state(charset)]]
		if {$idx >= 0} {
		    fconfigure $s -encoding [lindex $encodings $idx]
		}
	    }
	    if {[info exists state(-channel)] && \
		    ![info exists state(-handler)]} {
		# Initiate a sequence of background fcopies
		fileevent $s readable {}
		CopyStart $s $token
	    }
	} elseif {$n > 0} {
	    if {[regexp -nocase {^content-type:(.+)$} $line x type]} {
		set state(type) [string trim $type]
		# grab the optional charset information
		regexp -nocase {charset\s*=\s*(\S+)} $type x state(charset)
	    }
	    if {[regexp -nocase {^content-length:(.+)$} $line x length]} {
		set state(totalsize) [string trim $length]
	    }
	    if {[regexp -nocase {^content-encoding:(.+)$} $line x coding]} {
		set state(coding) [string trim $coding]
	    }
	    if {[regexp -nocase {^([^:]+):(.+)$} $line x key value]} {
		lappend state(meta) $key [string trim $value]
	    } elseif {[string match HTTP* $line]} {
		set state(http) $line
	    }
	}
    } else {
	if {[catch {
	    if {[info exists state(-handler)]} {
		set n [eval $state(-handler) {$s $token}]
	    } else {
		set block [read $s $state(-blocksize)]
		set n [string length $block]
		if {$n >= 0} {
		    append state(body) $block
		}
	    }
	    if {$n >= 0} {
		incr state(currentsize) $n
	    }
	} err]} {
	    Finish $token $err
	} else {
	    if {[info exists state(-progress)]} {
		eval $state(-progress) \
			{$token $state(totalsize) $state(currentsize)}
	    }
	}
    }
}

# http::CopyStart
#
#	Error handling wrapper around fcopy
#
# Arguments
#	s	The socket to copy from
#	token	The token returned from http::geturl
#
# Side Effects
#	This closes the connection upon error

proc http::CopyStart {s token} {
    variable $token
    upvar 0 $token state
    if {[catch {
	fcopy $s $state(-channel) -size $state(-blocksize) -command \
	    [list http::CopyDone $token]
    } err]} {
	Finish $token $err
    }
}

# http::CopyDone
#
#	fcopy completion callback
#
# Arguments
#	token	The token returned from http::geturl
#	count	The amount transfered
#
# Side Effects
#	Invokes callbacks

proc http::CopyDone {token count {error {}}} {
    variable $token
    upvar 0 $token state
    set s $state(sock)
    incr state(currentsize) $count
    if {[info exists state(-progress)]} {
	eval $state(-progress) {$token $state(totalsize) $state(currentsize)}
    }
    # At this point the token may have been reset
    if {[string length $error]} {
	Finish $token $error
    } elseif {[catch {eof $s} iseof] || $iseof} {
	Eof $token
    } else {
	CopyStart $s $token
    }
}

# http::Eof
#
#	Handle eof on the socket
#
# Arguments
#	token	The token returned from http::geturl
#
# Side Effects
#	Clean up the socket

proc http::Eof {token} {
    variable $token
    upvar 0 $token state
    if {[string equal $state(state) "header"]} {
	# Premature eof
	set state(status) eof
    } else {
	set state(status) ok
    }
    set state(state) eof
    Finish $token
}

# http::wait --
#
#	See documentation for details.
#
# Arguments:
#	token	Connection token.
#
# Results:
#        The status after the wait.

proc http::wait {token} {
    variable $token
    upvar 0 $token state

    if {![info exists state(status)] || [string length $state(status)] == 0} {
	# We must wait on the original variable name, not the upvar alias
	vwait $token\(status)
    }

    return $state(status)
}

# http::formatQuery --
#
#	See documentation for details.
#	Call http::formatQuery with an even number of arguments, where 
#	the first is a name, the second is a value, the third is another 
#	name, and so on.
#
# Arguments:
#	args	A list of name-value pairs.
#
# Results:
#        TODO

proc http::formatQuery {args} {
    set result ""
    set sep ""
    foreach i $args {
	append result $sep [mapReply $i]
	if {[string equal $sep "="]} {
	    set sep &
	} else {
	    set sep =
	}
    }
    return $result
}

# http::mapReply --
#
#	Do x-www-urlencoded character mapping
#
# Arguments:
#	string	The string the needs to be encoded
#
# Results:
#       The encoded string

proc http::mapReply {string} {
    variable formMap
    variable alphanumeric

    # The spec says: "non-alphanumeric characters are replaced by '%HH'"
    # 1 leave alphanumerics characters alone
    # 2 Convert every other character to an array lookup
    # 3 Escape constructs that are "special" to the tcl parser
    # 4 "subst" the result, doing all the array substitutions

    regsub -all \[^$alphanumeric\] $string {$formMap(&)} string
    regsub -all {[][{})\\]\)} $string {\\&} string
    return [subst -nocommand $string]
}

# http::ProxyRequired --
#	Default proxy filter. 
#
# Arguments:
#	host	The destination host
#
# Results:
#       The current proxy settings

proc http::ProxyRequired {host} {
    variable http
    if {[info exists http(-proxyhost)] && [string length $http(-proxyhost)]} {
	if {![info exists http(-proxyport)] || \
		![string length $http(-proxyport)]} {
	    set http(-proxyport) 8080
	}
	return [list $http(-proxyhost) $http(-proxyport)]
    }
}