blob: da55ad3790abf57a91c7f0d28a781b0e9de5fdec (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
|
#!/usr/bin/env tclsh
## -*- tcl -*-
# Make CSV data the specified column unique.
package require csv
package require cmdline
# ----------------------------------------------------
# csvuniq ?-sep sepchar? column file.in|- file.out|-
#
# Argument processing and checks.
set sepChar ,
set usage "Usage: $argv0 ?-sep sepchar? column file.in|- file.out|-"
while {[set ok [cmdline::getopt argv {sep.arg} opt val]] > 0} {
#puts stderr "= $opt $val"
switch -exact -- $opt {
sep {set sepChar $val}
}
}
if {($ok < 0) || ([llength $argv] != 3)} {
puts stderr $usage
exit -1
}
foreach {uniCol in out} $argv break
if {
![string is integer $uniCol] ||
($uniCol < 0) ||
![string compare $in ""] ||
![string compare $out ""]
} {
puts stderr $usage
exit -1
}
if {![string compare $in -]} {
set in stdin
} else {
set in [open $in r]
}
if {![string compare $out -]} {
set out stdout
} else {
set out [open $out w]
}
# ----------------------------------------------------
# Actual processing, uses the following information from the
# commandline:
#
# in - channel for input
# out - channel for output
# sepChar - separator character
# uniCol - column to make unique
set last ""
set first 1
while {![eof $in]} {
if {[gets $in line] < 0} {
continue
}
set data [::csv::split $line $sepChar]
if {$first} {
set first 0
set last [lindex $data $uniCol]
puts $out [::csv::join $data $sepChar]
} elseif {[string compare $last [lindex $data $uniCol]] != 0} {
set last [lindex $data $uniCol]
puts $out [::csv::join $data $sepChar]
} ; # else {no change in column, ignore record}
}
exit ; # automatically closes the channels
|