blob: 979738612437f317b80d8dee6b73afa71eacf42a (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
|
#!/usr/bin/env tclsh
## -*- tcl -*-
# Perform a diff on two CSV files.
# The result is a CSV file
package require csv
package require cmdline
# ----------------------------------------------------
# csvdiff ?-sep sepchar? ?-key LIST? file1 file2
#
# Argument processing and checks.
set sepChar ,
set usage "Usage: $argv0 ?-n? ?-sep sepchar? ?-key LIST? file1 file2\n\tLIST=idx,...\n\tidx in \{n, -m, n-, n-m\}"
set keySpec "0-"
# lineout = boolean flag, indicates if linenumbers has to be written
# as part of the output (1) or not (0). Defaults to 0.
set lineout 0
while {[set ok [cmdline::getopt argv {sep.arg key.arg n} opt val]] > 0} {
#puts stderr "= $opt $val"
switch -exact -- $opt {
sep {set sepChar $val}
key {set keySpec $val}
n {set lineout 1}
}
}
if {($ok < 0) || ([llength $argv] != 2)} {
puts stderr $usage
exit -1
}
foreach {fileA fileB} $argv break
if {[llength $keySpec] == 0} {
#puts stderr >>$keySpec<<
#puts stderr B
puts stderr $usage
exit -1
}
set idx [list]
foreach i $keySpec {
if {[regexp -- {[0-9]+-[0-9]+} $i]} {
foreach {f t} [split $i -] break
lappend idx [list $f $t]
} elseif {[regexp -- {[0-9]+-} $i]} {
foreach {f t} [split $i -] break
lappend idx [list $f end]
} elseif {[regexp -- {-[0-9]+} $i]} {
foreach {f t} [split $i -] break
lappend idx [list 0 $t]
} elseif {[regexp -- {[0-9]+} $i]} {
lappend idx [list $i $i]
} else {
#puts stderr >>$idx<<
#puts stderr C
puts stderr $usage
exit -1
}
}
set keySpec $idx
set inA [open $fileA r]
set inB [open $fileB r]
# ----------------------------------------------------
# Actual processing, uses the following information from the
# commandline:
#
# inA - channel for input A
# inB - channel for input B
# sepChar - separator character
# We read file2 completely and then go through the records of
# file1. For any record we don't find we write a "deleted" record. If
# we find the matching record we remove it from the internal
# storage. In a second sweep through the internal array we write
# "added" records for the remaining data as that was not in file1 but
# is in file2.
proc keyof {data} {
global keySpec
set key [list]
foreach i $keySpec {
foreach {f t} $i break
eval lappend key [lrange $data $f $t]
}
return $key
}
set order [list]
array set map {}
set linenum 0
while {![eof $inB]} {
if {[gets $inB line] < 0} {
continue
}
incr linenum
set data [::csv::split $line $sepChar]
set key [keyof $data]
if {[info exist map($key)]} {
puts stderr "warning: $key occurs multiple times in $fileB (lines $linenum and $map($key))"
}
set map($key) $linenum
lappend order $data
}
close $inB
set linenum 0
if {$lineout} {
array set lmap {}
}
while {![eof $inA]} {
if {[gets $inA line] < 0} {
continue
}
incr linenum
set data [::csv::split $line $sepChar]
set key [keyof $data]
if {$lineout} {set lmap($key) $linenum}
if {[info exists map($key)]} {
if {$map($key) < 0} {
puts stderr "warning: $key occurs multiple times\
in $fileA (lines $linenum and [expr {-$map($key)}]"
} else {
set map($key) [expr {-$linenum}]
}
continue
}
if {$lineout} {
puts stdout [::csv::join [linsert $data 0 - $linenum] $sepChar]
} else {
puts stdout [::csv::join [linsert $data 0 -] $sepChar]
}
}
close $inA
foreach data $order {
set key [keyof $data]
if {$map($key) > 0} {
if {$lineout} {
puts stdout [::csv::join [linsert $data 0 + $lmap($key)] $sepChar]
} else {
puts stdout [::csv::join [linsert $data 0 +] $sepChar]
}
}
}
exit
|