summaryrefslogtreecommitdiffstats
path: root/library/http/idna.tcl
diff options
context:
space:
mode:
Diffstat (limited to 'library/http/idna.tcl')
-rw-r--r--library/http/idna.tcl292
1 files changed, 292 insertions, 0 deletions
diff --git a/library/http/idna.tcl b/library/http/idna.tcl
new file mode 100644
index 0000000..2a7d289
--- /dev/null
+++ b/library/http/idna.tcl
@@ -0,0 +1,292 @@
+# cookiejar.tcl --
+#
+# Implementation of IDNA (Internationalized Domain Names for
+# Applications) encoding/decoding system, built on a punycode engine
+# developed directly from the code in RFC 3492, Appendix C (with
+# substantial modifications).
+#
+# This implementation includes code from that RFC, translated to Tcl; the
+# other parts are:
+# Copyright (c) 2014 Donal K. Fellows
+#
+# See the file "license.terms" for information on usage and redistribution of
+# this file, and for a DISCLAIMER OF ALL WARRANTIES.
+
+namespace eval ::tcl::idna {
+ namespace ensemble create -command puny -map {
+ encode punyencode
+ decode punydecode
+ }
+ namespace ensemble create -command ::tcl::idna -map {
+ encode IDNAencode
+ decode IDNAdecode
+ puny puny
+ version {::apply {{} {package present tcl::idna} ::}}
+ }
+
+ proc IDNAencode hostname {
+ set parts {}
+ # Split term from RFC 3490, Sec 3.1
+ foreach part [split $hostname "\u002E\u3002\uFF0E\uFF61"] {
+ if {[regexp {[^-A-Za-z0-9]} $part]} {
+ if {[regexp {[^-A-Za-z0-9\u00a1-\uffff]} $part ch]} {
+ scan $ch %c c
+ if {$ch < "!" || $ch > "~"} {
+ set ch [format "\\u%04x" $c]
+ }
+ throw [list IDNA INVALID_NAME_CHARACTER $ch] \
+ "bad character \"$ch\" in DNS name"
+ }
+ set part xn--[punyencode $part]
+ # Length restriction from RFC 5890, Sec 2.3.1
+ if {[string length $part] > 63} {
+ throw [list IDNA OVERLONG_PART $part] \
+ "hostname part too long"
+ }
+ }
+ lappend parts $part
+ }
+ return [join $parts .]
+ }
+ proc IDNAdecode hostname {
+ set parts {}
+ # Split term from RFC 3490, Sec 3.1
+ foreach part [split $hostname "\u002E\u3002\uFF0E\uFF61"] {
+ if {[string match -nocase "xn--*" $part]} {
+ set part [punydecode [string range $part 4 end]]
+ }
+ lappend parts $part
+ }
+ return [join $parts .]
+ }
+
+ variable digits [split "abcdefghijklmnopqrstuvwxyz0123456789" ""]
+ # Bootstring parameters for Punycode
+ variable base 36
+ variable tmin 1
+ variable tmax 26
+ variable skew 38
+ variable damp 700
+ variable initial_bias 72
+ variable initial_n 0x80
+
+ variable max_codepoint 0x10FFFF
+
+ proc adapt {delta first numchars} {
+ variable base
+ variable tmin
+ variable tmax
+ variable damp
+ variable skew
+
+ set delta [expr {$delta / ($first ? $damp : 2)}]
+ incr delta [expr {$delta / $numchars}]
+ set k 0
+ while {$delta > ($base - $tmin) * $tmax / 2} {
+ set delta [expr {$delta / ($base-$tmin)}]
+ incr k $base
+ }
+ return [expr {$k + ($base-$tmin+1) * $delta / ($delta+$skew)}]
+ }
+
+ # Main punycode encoding function
+ proc punyencode {string {case ""}} {
+ variable digits
+ variable tmin
+ variable tmax
+ variable base
+ variable initial_n
+ variable initial_bias
+
+ if {![string is boolean $case]} {
+ return -code error "\"$case\" must be boolean"
+ }
+
+ set in {}
+ foreach char [set string [split $string ""]] {
+ scan $char "%c" ch
+ lappend in $ch
+ }
+ set output {}
+
+ # Initialize the state:
+ set n $initial_n
+ set delta 0
+ set bias $initial_bias
+
+ # Handle the basic code points:
+ foreach ch $string {
+ if {$ch < "\u0080"} {
+ if {$case eq ""} {
+ append output $ch
+ } elseif {[string is true $case]} {
+ append output [string toupper $ch]
+ } elseif {[string is false $case]} {
+ append output [string tolower $ch]
+ }
+ }
+ }
+
+ set b [string length $output]
+
+ # h is the number of code points that have been handled, b is the
+ # number of basic code points.
+
+ if {$b > 0} {
+ append output "-"
+ }
+
+ # Main encoding loop:
+
+ for {set h $b} {$h < [llength $in]} {incr delta; incr n} {
+ # All non-basic code points < n have been handled already. Find
+ # the next larger one:
+
+ set m inf
+ foreach ch $in {
+ if {$ch >= $n && $ch < $m} {
+ set m $ch
+ }
+ }
+
+ # Increase delta enough to advance the decoder's <n,i> state to
+ # <m,0>, but guard against overflow:
+
+ if {$m-$n > (0xffffffff-$delta)/($h+1)} {
+ throw {PUNYCODE OVERFLOW} "overflow in delta computation"
+ }
+ incr delta [expr {($m-$n) * ($h+1)}]
+ set n $m
+
+ foreach ch $in {
+ if {$ch < $n && ([incr delta] & 0xffffffff) == 0} {
+ throw {PUNYCODE OVERFLOW} "overflow in delta computation"
+ }
+
+ if {$ch != $n} {
+ continue
+ }
+
+ # Represent delta as a generalized variable-length integer:
+
+ for {set q $delta; set k $base} true {incr k $base} {
+ set t [expr {min(max($k-$bias, $tmin), $tmax)}]
+ if {$q < $t} {
+ break
+ }
+ append output \
+ [lindex $digits [expr {$t + ($q-$t)%($base-$t)}]]
+ set q [expr {($q-$t) / ($base-$t)}]
+ }
+
+ append output [lindex $digits $q]
+ set bias [adapt $delta [expr {$h==$b}] [expr {$h+1}]]
+ set delta 0
+ incr h
+ }
+ }
+
+ return $output
+ }
+
+ # Main punycode decode function
+ proc punydecode {string {case ""}} {
+ variable tmin
+ variable tmax
+ variable base
+ variable initial_n
+ variable initial_bias
+ variable max_codepoint
+
+ if {![string is boolean $case]} {
+ return -code error "\"$case\" must be boolean"
+ }
+
+ # Initialize the state:
+
+ set n $initial_n
+ set i 0
+ set first 1
+ set bias $initial_bias
+
+ # Split the string into the "real" ASCII characters and the ones to
+ # feed into the main decoder. Note that we don't need to check the
+ # result of [regexp] because that RE will technically match any string
+ # at all.
+
+ regexp {^(?:(.*)-)?([^-]*)$} $string -> pre post
+ if {[string is true -strict $case]} {
+ set pre [string toupper $pre]
+ } elseif {[string is false -strict $case]} {
+ set pre [string tolower $pre]
+ }
+ set output [split $pre ""]
+ set out [llength $output]
+
+ # Main decoding loop:
+
+ for {set in 0} {$in < [string length $post]} {incr in} {
+ # Decode a generalized variable-length integer into delta, which
+ # gets added to i. The overflow checking is easier if we increase
+ # i as we go, then subtract off its starting value at the end to
+ # obtain delta.
+
+ for {set oldi $i; set w 1; set k $base} 1 {incr in} {
+ if {[set ch [string index $post $in]] eq ""} {
+ throw {PUNYCODE BAD_INPUT LENGTH} "exceeded input data"
+ }
+ if {[string match -nocase {[a-z]} $ch]} {
+ scan [string toupper $ch] %c digit
+ incr digit -65
+ } elseif {[string match {[0-9]} $ch]} {
+ set digit [expr {$ch + 26}]
+ } else {
+ throw {PUNYCODE BAD_INPUT CHAR} \
+ "bad decode character \"$ch\""
+ }
+ incr i [expr {$digit * $w}]
+ set t [expr {min(max($tmin, $k-$bias), $tmax)}]
+ if {$digit < $t} {
+ set bias [adapt [expr {$i-$oldi}] $first [incr out]]
+ set first 0
+ break
+ }
+ if {[set w [expr {$w * ($base - $t)}]] > 0x7fffffff} {
+ throw {PUNYCODE OVERFLOW} \
+ "excessively large integer computed in digit decode"
+ }
+ incr k $base
+ }
+
+ # i was supposed to wrap around from out+1 to 0, incrementing n
+ # each time, so we'll fix that now:
+
+ if {[incr n [expr {$i / $out}]] > 0x7fffffff} {
+ throw {PUNYCODE OVERFLOW} \
+ "excessively large integer computed in character choice"
+ } elseif {$n > $max_codepoint} {
+ if {$n >= 0x00d800 && $n < 0x00e000} {
+ # Bare surrogate?!
+ throw {PUNYCODE NON_BMP} \
+ [format "unsupported character U+%06x" $n]
+ }
+ throw {PUNYCODE NON_UNICODE} "bad codepoint $n"
+ }
+ set i [expr {$i % $out}]
+
+ # Insert n at position i of the output:
+
+ set output [linsert $output $i [format "%c" $n]]
+ incr i
+ }
+
+ return [join $output ""]
+ }
+}
+
+package provide tcl::idna 1.0
+
+# Local variables:
+# mode: tcl
+# fill-column: 78
+# End: