tcllib/examples/csv/csvdiff


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162

#!/usr/bin/env tclsh
## -*- tcl -*-
# Perform a diff on two CSV files.
# The result is a CSV file

package require csv
package require cmdline

# ----------------------------------------------------
# csvdiff ?-sep sepchar? ?-key LIST? file1 file2
#
# Argument processing and checks.

set sepChar ,
set usage   "Usage: $argv0 ?-n? ?-sep sepchar? ?-key LIST? file1 file2\n\tLIST=idx,...\n\tidx in \{n, -m, n-, n-m\}"
set keySpec "0-"

# lineout = boolean flag, indicates if linenumbers has to be written
# as part of the output (1) or not (0). Defaults to 0.

set lineout 0
while {[set ok [cmdline::getopt argv {sep.arg key.arg n} opt val]] > 0} {
    #puts stderr "= $opt $val"
    switch -exact -- $opt {
	sep   {set sepChar $val}
	key   {set keySpec $val}
	n     {set lineout 1}
    }
}
if {($ok < 0) || ([llength $argv] != 2)} {
    puts stderr $usage
    exit -1
}

foreach {fileA fileB} $argv break


if {[llength $keySpec] == 0} {
    #puts stderr >>$keySpec<<
    #puts stderr B
    puts stderr $usage
    exit -1    
}

set idx [list]
foreach i $keySpec {
    if {[regexp -- {[0-9]+-[0-9]+} $i]} {
	foreach {f t} [split $i -] break
	lappend idx [list $f $t]
    } elseif {[regexp -- {[0-9]+-} $i]} {
	foreach {f t} [split $i -] break
	lappend idx [list $f end]
    } elseif {[regexp -- {-[0-9]+} $i]} {
	foreach {f t} [split $i -] break
	lappend idx [list 0 $t]
    } elseif {[regexp -- {[0-9]+} $i]} {
	lappend idx [list $i $i]
    } else {
	#puts stderr >>$idx<<
	#puts stderr C
	puts stderr $usage
	exit -1
    }
}
set keySpec $idx


set inA [open $fileA r]
set inB [open $fileB r]

# ----------------------------------------------------
# Actual processing, uses the following information from the
# commandline:
#
# inA     - channel for input A
# inB     - channel for input B
# sepChar - separator character

# We read file2 completely and then go through the records of
# file1. For any record we don't find we write a "deleted" record. If
# we find the matching record we remove it from the internal
# storage. In a second sweep through the internal array we write
# "added" records for the remaining data as that was not in file1 but
# is in file2.

proc keyof {data} {
    global keySpec
    set key [list]
    foreach i $keySpec {
	foreach {f t} $i break
	eval lappend key [lrange $data $f $t]
    }
    return $key
}


set order [list]
array set map {}
set linenum 0
while {![eof $inB]} {
    if {[gets $inB line] < 0} {
	continue
    }
    incr linenum
    set  data [::csv::split $line $sepChar]
    set  key  [keyof $data]

    if {[info exist map($key)]} {
	puts stderr "warning: $key occurs multiple times in $fileB (lines $linenum and $map($key))"
    }
    set map($key) $linenum
    lappend order $data
}
close $inB

set linenum 0

if {$lineout} {
    array set lmap {}
}

while {![eof $inA]} {
    if {[gets $inA line] < 0} {
	continue
    }
    incr linenum
    set  data [::csv::split $line $sepChar]
    set  key  [keyof $data]

    if {$lineout} {set lmap($key) $linenum}

    if {[info exists map($key)]} {
	if {$map($key) < 0} {
	    puts stderr "warning: $key occurs multiple times\
		    in $fileA (lines $linenum and [expr {-$map($key)}]"
	} else {
	    set map($key) [expr {-$linenum}]
	}
	continue
    }

    if {$lineout} {
	puts stdout [::csv::join [linsert $data 0 - $linenum] $sepChar]
    } else {
	puts stdout [::csv::join [linsert $data 0 -] $sepChar]
    }
}
close $inA

foreach data $order {
    set key [keyof $data]
    if {$map($key) > 0} {
	if {$lineout} {
	    puts stdout [::csv::join [linsert $data 0 + $lmap($key)] $sepChar]
	} else {
	    puts stdout [::csv::join [linsert $data 0 +] $sepChar]
	}
    }
}

exit