summaryrefslogtreecommitdiffstats
path: root/tcllib/examples/csv/csvdiff
diff options
context:
space:
mode:
authorWilliam Joye <wjoye@cfa.harvard.edu>2016-10-27 19:39:39 (GMT)
committerWilliam Joye <wjoye@cfa.harvard.edu>2016-10-27 19:39:39 (GMT)
commitea28451286d3ea4a772fa174483f9a7a66bb1ab3 (patch)
tree6ee9d8a7848333a7ceeee3b13d492e40225f8b86 /tcllib/examples/csv/csvdiff
parentb5ca09bae0d6a1edce939eea03594dd56383f2c8 (diff)
parent7c621da28f07e449ad90c387344f07a453927569 (diff)
downloadblt-ea28451286d3ea4a772fa174483f9a7a66bb1ab3.zip
blt-ea28451286d3ea4a772fa174483f9a7a66bb1ab3.tar.gz
blt-ea28451286d3ea4a772fa174483f9a7a66bb1ab3.tar.bz2
Merge commit '7c621da28f07e449ad90c387344f07a453927569' as 'tcllib'
Diffstat (limited to 'tcllib/examples/csv/csvdiff')
-rwxr-xr-xtcllib/examples/csv/csvdiff162
1 files changed, 162 insertions, 0 deletions
diff --git a/tcllib/examples/csv/csvdiff b/tcllib/examples/csv/csvdiff
new file mode 100755
index 0000000..9797386
--- /dev/null
+++ b/tcllib/examples/csv/csvdiff
@@ -0,0 +1,162 @@
+#!/usr/bin/env tclsh
+## -*- tcl -*-
+# Perform a diff on two CSV files.
+# The result is a CSV file
+
+package require csv
+package require cmdline
+
+# ----------------------------------------------------
+# csvdiff ?-sep sepchar? ?-key LIST? file1 file2
+#
+# Argument processing and checks.
+
+set sepChar ,
+set usage "Usage: $argv0 ?-n? ?-sep sepchar? ?-key LIST? file1 file2\n\tLIST=idx,...\n\tidx in \{n, -m, n-, n-m\}"
+set keySpec "0-"
+
+# lineout = boolean flag, indicates if linenumbers has to be written
+# as part of the output (1) or not (0). Defaults to 0.
+
+set lineout 0
+while {[set ok [cmdline::getopt argv {sep.arg key.arg n} opt val]] > 0} {
+ #puts stderr "= $opt $val"
+ switch -exact -- $opt {
+ sep {set sepChar $val}
+ key {set keySpec $val}
+ n {set lineout 1}
+ }
+}
+if {($ok < 0) || ([llength $argv] != 2)} {
+ puts stderr $usage
+ exit -1
+}
+
+foreach {fileA fileB} $argv break
+
+
+if {[llength $keySpec] == 0} {
+ #puts stderr >>$keySpec<<
+ #puts stderr B
+ puts stderr $usage
+ exit -1
+}
+
+set idx [list]
+foreach i $keySpec {
+ if {[regexp -- {[0-9]+-[0-9]+} $i]} {
+ foreach {f t} [split $i -] break
+ lappend idx [list $f $t]
+ } elseif {[regexp -- {[0-9]+-} $i]} {
+ foreach {f t} [split $i -] break
+ lappend idx [list $f end]
+ } elseif {[regexp -- {-[0-9]+} $i]} {
+ foreach {f t} [split $i -] break
+ lappend idx [list 0 $t]
+ } elseif {[regexp -- {[0-9]+} $i]} {
+ lappend idx [list $i $i]
+ } else {
+ #puts stderr >>$idx<<
+ #puts stderr C
+ puts stderr $usage
+ exit -1
+ }
+}
+set keySpec $idx
+
+
+set inA [open $fileA r]
+set inB [open $fileB r]
+
+# ----------------------------------------------------
+# Actual processing, uses the following information from the
+# commandline:
+#
+# inA - channel for input A
+# inB - channel for input B
+# sepChar - separator character
+
+# We read file2 completely and then go through the records of
+# file1. For any record we don't find we write a "deleted" record. If
+# we find the matching record we remove it from the internal
+# storage. In a second sweep through the internal array we write
+# "added" records for the remaining data as that was not in file1 but
+# is in file2.
+
+proc keyof {data} {
+ global keySpec
+ set key [list]
+ foreach i $keySpec {
+ foreach {f t} $i break
+ eval lappend key [lrange $data $f $t]
+ }
+ return $key
+}
+
+
+
+set order [list]
+array set map {}
+set linenum 0
+while {![eof $inB]} {
+ if {[gets $inB line] < 0} {
+ continue
+ }
+ incr linenum
+ set data [::csv::split $line $sepChar]
+ set key [keyof $data]
+
+ if {[info exist map($key)]} {
+ puts stderr "warning: $key occurs multiple times in $fileB (lines $linenum and $map($key))"
+ }
+ set map($key) $linenum
+ lappend order $data
+}
+close $inB
+
+set linenum 0
+
+if {$lineout} {
+ array set lmap {}
+}
+
+while {![eof $inA]} {
+ if {[gets $inA line] < 0} {
+ continue
+ }
+ incr linenum
+ set data [::csv::split $line $sepChar]
+ set key [keyof $data]
+
+ if {$lineout} {set lmap($key) $linenum}
+
+ if {[info exists map($key)]} {
+ if {$map($key) < 0} {
+ puts stderr "warning: $key occurs multiple times\
+ in $fileA (lines $linenum and [expr {-$map($key)}]"
+ } else {
+ set map($key) [expr {-$linenum}]
+ }
+ continue
+ }
+
+ if {$lineout} {
+ puts stdout [::csv::join [linsert $data 0 - $linenum] $sepChar]
+ } else {
+ puts stdout [::csv::join [linsert $data 0 -] $sepChar]
+ }
+}
+close $inA
+
+foreach data $order {
+ set key [keyof $data]
+ if {$map($key) > 0} {
+ if {$lineout} {
+ puts stdout [::csv::join [linsert $data 0 + $lmap($key)] $sepChar]
+ } else {
+ puts stdout [::csv::join [linsert $data 0 +] $sepChar]
+ }
+ }
+}
+
+exit