summaryrefslogtreecommitdiffstats
path: root/tcllib/modules/math/kruskal.tcl
blob: a08083bcb0a5e53170720e6e575b9f888d668bbe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# kruskal.tcl --

#     Procedures related to ranking and the Kruskal-Wallis test

#


# test-Kruskal-Wallis --

#     Perform a one-way analysis of variance according

#     to Kruskal-Wallis

#

# Arguments:

#     confidence    Confidence level (between 0 and 1)

#     args          Two or more lists of data

#

# Result:

#     0 if the medians of the groups differ, 1 if they

#     are the same (accept the null hypothesis)

#

proc ::math::statistics::test-Kruskal-Wallis {confidence args} {

    foreach {H p} [eval analyse-Kruskal-Wallis $args] {break}

    expr {$p < 1.0 - $confidence}
}

# analyse-Kruskal-Wallis --

#     Perform a one-way analysis of variance according

#     to Kruskal-Wallis and return the details

#

# Arguments:

#     args          Two or more lists of data

#

# Result:

#     Kruskal-Wallis statistic H and the probability p

#     that this value occurs if the

#

proc ::math::statistics::analyse-Kruskal-Wallis {args} {

    set setCount [llength $args]

    #

    # Rank the data with respect to the whole set

    #

    set rankList [eval group-rank $args]

    set length [llength $rankList]

    #

    # Re-establish original sets of values, but using the ranks

    #

    foreach item $rankList {
        lappend rankValues([lindex $item 0]) [lindex $item 2]
    }

    #

    # Now compute H

    #

    set H 0
    for {set i 0} {$i < $setCount} {incr i} {
        set total [expr [join $rankValues($i) +]]
        set count [llength $rankValues($i)]
        set H [expr {$H + pow($total,2)/double($count)}]
    }
    set H [expr {$H*(12.0/($length*($length + 1))) - (3*($length + 1))}]
    incr setCount -1
    set p [expr {1 - [::math::statistics::cdf-chisquare $setCount $H]}]
    return [list $H $p]
}

# group-rank --

#     Rank groups of data with respect to the whole set

#

# Arguments:

#     args          Two or more lists of data

#

# Result:

#     List of ranking data: for each data item, the group-ID,

#     the value and the rank (may be a fraction, in case of ties)

#

proc ::math::statistics::group-rank {args} {

    set index 0
    set rankList [list]
    set setCount [llength $args]
    #

    # Read lists of values

    #

    foreach item $args {
        set values($index) [lindex $args $index]
        #

        # Prepare ranking with rank=0

        #

        foreach value $values($index) {
            lappend rankList [list $index $value 0]
        }
        incr index 1
    }
    #

    # Sort the values

    #

    set rankList [lsort -real -index 1 $rankList]
    #

    # Assign the ranks (disregarding ties)

    #

    set length [llength $rankList]
    for {set i 0} {$i < $length} {incr i} {
        lset rankList $i 2 [expr {$i + 1}]
    }
    #

    # Value of the previous list element

    #

    set prevValue {}

    #

    # List of indices of list elements having the same value (ties)

    #

    set equalIndex [list]

    #

    # Test for ties and re-assign mean ranks for tied values

    #

    for {set i 0} {$i < $length} {incr i} {
        set value [lindex $rankList $i 1]
        if {($value != $prevValue) && ($i > 0) && ([llength $equalIndex] > 0)} {
            #

            # We are still missing the first tied value

            #

            set j [lindex $equalIndex 0]
            incr j -1
            set equalIndex [linsert $equalIndex 0 $j]

            #

            # Re-assign rank as mean rank of tied values

            #

            set firstRank [lindex $rankList [lindex $equalIndex 0] 2]
            set lastRank  [lindex $rankList [lindex $equalIndex end] 2]
            set newRank   [expr {($firstRank+$lastRank)/2.0}]
            foreach j $equalIndex {
                lset rankList $j 2 $newRank
            }

            #

            # Clear list of equal elements

            #

            set equalIndex [list]
        } elseif {$value == $prevValue} {
            #

            # Remember index of equal value element

            #

            lappend equalIndex $i
        }
        set prevValue $value
    }

    return $rankList
}