1 files changed, 292 insertions, 0 deletions
diff --git a/library/http/idna.tcl b/library/http/idna.tcl
new file mode 100644
index 0000000..2a7d289
--- /dev/null
+++ b/library/http/idna.tcl
@@ -0,0 +1,292 @@
+# cookiejar.tcl --
+#
+#	Implementation of IDNA (Internationalized Domain Names for
+#	Applications) encoding/decoding system, built on a punycode engine
+#	developed directly from the code in RFC 3492, Appendix C (with
+#	substantial modifications).
+#
+# This implementation includes code from that RFC, translated to Tcl; the
+# other parts are:
+# Copyright (c) 2014 Donal K. Fellows
+#
+# See the file "license.terms" for information on usage and redistribution of
+# this file, and for a DISCLAIMER OF ALL WARRANTIES.
+
+namespace eval ::tcl::idna {
+    namespace ensemble create -command puny -map {
+	encode punyencode
+	decode punydecode
+    }
+    namespace ensemble create -command ::tcl::idna -map {
+	encode IDNAencode
+	decode IDNAdecode
+	puny puny
+	version {::apply {{} {package present tcl::idna} ::}}
+    }
+
+    proc IDNAencode hostname {
+	set parts {}
+	# Split term from RFC 3490, Sec 3.1
+	foreach part [split $hostname "\u002E\u3002\uFF0E\uFF61"] {
+	    if {[regexp {[^-A-Za-z0-9]} $part]} {
+		if {[regexp {[^-A-Za-z0-9\u00a1-\uffff]} $part ch]} {
+		    scan $ch %c c
+		    if {$ch < "!" || $ch > "~"} {
+			set ch [format "\\u%04x" $c]
+		    }
+		    throw [list IDNA INVALID_NAME_CHARACTER $ch] \
+			"bad character \"$ch\" in DNS name"
+		}
+		set part xn--[punyencode $part]
+		# Length restriction from RFC 5890, Sec 2.3.1
+		if {[string length $part] > 63} {
+		    throw [list IDNA OVERLONG_PART $part] \
+			"hostname part too long"
+		}
+	    }
+	    lappend parts $part
+	}
+	return [join $parts .]
+    }
+    proc IDNAdecode hostname {
+	set parts {}
+	# Split term from RFC 3490, Sec 3.1
+	foreach part [split $hostname "\u002E\u3002\uFF0E\uFF61"] {
+	    if {[string match -nocase "xn--*" $part]} {
+		set part [punydecode [string range $part 4 end]]
+	    }
+	    lappend parts $part
+	}
+	return [join $parts .]
+    }
+
+    variable digits [split "abcdefghijklmnopqrstuvwxyz0123456789" ""]
+    # Bootstring parameters for Punycode
+    variable base 36
+    variable tmin 1
+    variable tmax 26
+    variable skew 38
+    variable damp 700
+    variable initial_bias 72
+    variable initial_n 0x80
+
+    variable max_codepoint 0x10FFFF
+
+    proc adapt {delta first numchars} {
+	variable base
+	variable tmin
+	variable tmax
+	variable damp
+	variable skew
+
+	set delta [expr {$delta / ($first ? $damp : 2)}]
+	incr delta [expr {$delta / $numchars}]
+	set k 0
+	while {$delta > ($base - $tmin) * $tmax / 2} {
+	    set delta [expr {$delta / ($base-$tmin)}]
+	    incr k $base
+	}
+	return [expr {$k + ($base-$tmin+1) * $delta / ($delta+$skew)}]
+    }
+
+    # Main punycode encoding function
+    proc punyencode {string {case ""}} {
+	variable digits
+	variable tmin
+	variable tmax
+	variable base
+	variable initial_n
+	variable initial_bias
+
+	if {![string is boolean $case]} {
+	    return -code error "\"$case\" must be boolean"
+	}
+
+	set in {}
+	foreach char [set string [split $string ""]] {
+	    scan $char "%c" ch
+	    lappend in $ch
+	}
+	set output {}
+
+	# Initialize the state:
+	set n $initial_n
+	set delta 0
+	set bias $initial_bias
+
+	# Handle the basic code points:
+	foreach ch $string {
+	    if {$ch < "\u0080"} {
+		if {$case eq ""} {
+		    append output $ch
+		} elseif {[string is true $case]} {
+		    append output [string toupper $ch]
+		} elseif {[string is false $case]} {
+		    append output [string tolower $ch]
+		}
+	    }
+	}
+
+	set b [string length $output]
+
+	# h is the number of code points that have been handled, b is the
+	# number of basic code points.
+
+	if {$b > 0} {
+	    append output "-"
+	}
+
+	# Main encoding loop:
+
+	for {set h $b} {$h < [llength $in]} {incr delta; incr n} {
+	    # All non-basic code points < n have been handled already.  Find
+	    # the next larger one:
+
+	    set m inf
+	    foreach ch $in {
+		if {$ch >= $n && $ch < $m} {
+		    set m $ch
+		}
+	    }
+
+	    # Increase delta enough to advance the decoder's <n,i> state to
+	    # <m,0>, but guard against overflow:
+
+	    if {$m-$n > (0xffffffff-$delta)/($h+1)} {
+		throw {PUNYCODE OVERFLOW} "overflow in delta computation"
+	    }
+	    incr delta [expr {($m-$n) * ($h+1)}]
+	    set n $m
+
+	    foreach ch $in {
+		if {$ch < $n && ([incr delta] & 0xffffffff) == 0} {
+		    throw {PUNYCODE OVERFLOW} "overflow in delta computation"
+		}
+
+		if {$ch != $n} {
+		    continue
+		}
+
+		# Represent delta as a generalized variable-length integer:
+
+		for {set q $delta; set k $base} true {incr k $base} {
+		    set t [expr {min(max($k-$bias, $tmin), $tmax)}]
+		    if {$q < $t} {
+			break
+		    }
+		    append output \
+			[lindex $digits [expr {$t + ($q-$t)%($base-$t)}]]
+		    set q [expr {($q-$t) / ($base-$t)}]
+		}
+
+		append output [lindex $digits $q]
+		set bias [adapt $delta [expr {$h==$b}] [expr {$h+1}]]
+		set delta 0
+		incr h
+	    }
+	}
+
+	return $output
+    }
+
+    # Main punycode decode function
+    proc punydecode {string {case ""}} {
+	variable tmin
+	variable tmax
+	variable base
+	variable initial_n
+	variable initial_bias
+	variable max_codepoint
+
+	if {![string is boolean $case]} {
+	    return -code error "\"$case\" must be boolean"
+	}
+
+	# Initialize the state:
+
+	set n $initial_n
+	set i 0
+	set first 1
+	set bias $initial_bias
+
+	# Split the string into the "real" ASCII characters and the ones to
+	# feed into the main decoder. Note that we don't need to check the
+	# result of [regexp] because that RE will technically match any string
+	# at all.
+
+	regexp {^(?:(.*)-)?([^-]*)$} $string -> pre post
+	if {[string is true -strict $case]} {
+	    set pre [string toupper $pre]
+	} elseif {[string is false -strict $case]} {
+	    set pre [string tolower $pre]
+	}
+	set output [split $pre ""]
+	set out [llength $output]
+
+	# Main decoding loop:
+
+	for {set in 0} {$in < [string length $post]} {incr in} {
+	    # Decode a generalized variable-length integer into delta, which
+	    # gets added to i. The overflow checking is easier if we increase
+	    # i as we go, then subtract off its starting value at the end to
+	    # obtain delta.
+
+	    for {set oldi $i; set w 1; set k $base} 1 {incr in} {
+		if {[set ch [string index $post $in]] eq ""} {
+		    throw {PUNYCODE BAD_INPUT LENGTH} "exceeded input data"
+		}
+		if {[string match -nocase {[a-z]} $ch]} {
+		    scan [string toupper $ch] %c digit
+		    incr digit -65
+		} elseif {[string match {[0-9]} $ch]} {
+		    set digit [expr {$ch + 26}]
+		} else {
+		    throw {PUNYCODE BAD_INPUT CHAR} \
+			    "bad decode character \"$ch\""
+		}
+		incr i [expr {$digit * $w}]
+		set t [expr {min(max($tmin, $k-$bias), $tmax)}]
+		if {$digit < $t} {
+		    set bias [adapt [expr {$i-$oldi}] $first [incr out]]
+		    set first 0
+		    break
+		}
+		if {[set w [expr {$w * ($base - $t)}]] > 0x7fffffff} {
+		    throw {PUNYCODE OVERFLOW} \
+			"excessively large integer computed in digit decode"
+		}
+		incr k $base
+	    }
+
+	    # i was supposed to wrap around from out+1 to 0, incrementing n
+	    # each time, so we'll fix that now:
+
+	    if {[incr n [expr {$i / $out}]] > 0x7fffffff} {
+		throw {PUNYCODE OVERFLOW} \
+		    "excessively large integer computed in character choice"
+	    } elseif {$n > $max_codepoint} {
+		if {$n >= 0x00d800 && $n < 0x00e000} {
+		    # Bare surrogate?!
+		    throw {PUNYCODE NON_BMP} \
+			[format "unsupported character U+%06x" $n]
+		}
+		throw {PUNYCODE NON_UNICODE} "bad codepoint $n"
+	    }
+	    set i [expr {$i % $out}]
+
+	    # Insert n at position i of the output:
+
+	    set output [linsert $output $i [format "%c" $n]]
+	    incr i
+	}
+
+	return [join $output ""]
+    }
+}
+
+package provide tcl::idna 1.0
+
+# Local variables:
+# mode: tcl
+# fill-column: 78
+# End: