Merge commit '7c621da28f07e449ad90c387344f07a453927569' as 'tcllib'

author: William Joye <wjoye@cfa.harvard.edu> 2016-10-27 19:39:39 (GMT)
committer: William Joye <wjoye@cfa.harvard.edu> 2016-10-27 19:39:39 (GMT)
commit: ea28451286d3ea4a772fa174483f9a7a66bb1ab3 (patch)
tree: 6ee9d8a7848333a7ceeee3b13d492e40225f8b86 /tcllib/examples/csv/csvdiff
parent: b5ca09bae0d6a1edce939eea03594dd56383f2c8 (diff)
parent: 7c621da28f07e449ad90c387344f07a453927569 (diff)
download: blt-ea28451286d3ea4a772fa174483f9a7a66bb1ab3.zip
blt-ea28451286d3ea4a772fa174483f9a7a66bb1ab3.tar.gz
blt-ea28451286d3ea4a772fa174483f9a7a66bb1ab3.tar.bz2
1 files changed, 162 insertions, 0 deletions
diff --git a/tcllib/examples/csv/csvdiff b/tcllib/examples/csv/csvdiff
new file mode 100755
index 0000000..9797386
--- /dev/null
+++ b/tcllib/examples/csv/csvdiff
@@ -0,0 +1,162 @@
+#!/usr/bin/env tclsh
+## -*- tcl -*-
+# Perform a diff on two CSV files.
+# The result is a CSV file
+
+package require csv
+package require cmdline
+
+# ----------------------------------------------------
+# csvdiff ?-sep sepchar? ?-key LIST? file1 file2
+#
+# Argument processing and checks.
+
+set sepChar ,
+set usage   "Usage: $argv0 ?-n? ?-sep sepchar? ?-key LIST? file1 file2\n\tLIST=idx,...\n\tidx in \{n, -m, n-, n-m\}"
+set keySpec "0-"
+
+# lineout = boolean flag, indicates if linenumbers has to be written
+# as part of the output (1) or not (0). Defaults to 0.
+
+set lineout 0
+while {[set ok [cmdline::getopt argv {sep.arg key.arg n} opt val]] > 0} {
+    #puts stderr "= $opt $val"
+    switch -exact -- $opt {
+	sep   {set sepChar $val}
+	key   {set keySpec $val}
+	n     {set lineout 1}
+    }
+}
+if {($ok < 0) || ([llength $argv] != 2)} {
+    puts stderr $usage
+    exit -1
+}
+
+foreach {fileA fileB} $argv break
+
+
+if {[llength $keySpec] == 0} {
+    #puts stderr >>$keySpec<<
+    #puts stderr B
+    puts stderr $usage
+    exit -1    
+}
+
+set idx [list]
+foreach i $keySpec {
+    if {[regexp -- {[0-9]+-[0-9]+} $i]} {
+	foreach {f t} [split $i -] break
+	lappend idx [list $f $t]
+    } elseif {[regexp -- {[0-9]+-} $i]} {
+	foreach {f t} [split $i -] break
+	lappend idx [list $f end]
+    } elseif {[regexp -- {-[0-9]+} $i]} {
+	foreach {f t} [split $i -] break
+	lappend idx [list 0 $t]
+    } elseif {[regexp -- {[0-9]+} $i]} {
+	lappend idx [list $i $i]
+    } else {
+	#puts stderr >>$idx<<
+	#puts stderr C
+	puts stderr $usage
+	exit -1
+    }
+}
+set keySpec $idx
+
+
+set inA [open $fileA r]
+set inB [open $fileB r]
+
+# ----------------------------------------------------
+# Actual processing, uses the following information from the
+# commandline:
+#
+# inA     - channel for input A
+# inB     - channel for input B
+# sepChar - separator character
+
+# We read file2 completely and then go through the records of
+# file1. For any record we don't find we write a "deleted" record. If
+# we find the matching record we remove it from the internal
+# storage. In a second sweep through the internal array we write
+# "added" records for the remaining data as that was not in file1 but
+# is in file2.
+
+proc keyof {data} {
+    global keySpec
+    set key [list]
+    foreach i $keySpec {
+	foreach {f t} $i break
+	eval lappend key [lrange $data $f $t]
+    }
+    return $key
+}
+
+
+
+set order [list]
+array set map {}
+set linenum 0
+while {![eof $inB]} {
+    if {[gets $inB line] < 0} {
+	continue
+    }
+    incr linenum
+    set  data [::csv::split $line $sepChar]
+    set  key  [keyof $data]
+
+    if {[info exist map($key)]} {
+	puts stderr "warning: $key occurs multiple times in $fileB (lines $linenum and $map($key))"
+    }
+    set map($key) $linenum
+    lappend order $data
+}
+close $inB
+
+set linenum 0
+
+if {$lineout} {
+    array set lmap {}
+}
+
+while {![eof $inA]} {
+    if {[gets $inA line] < 0} {
+	continue
+    }
+    incr linenum
+    set  data [::csv::split $line $sepChar]
+    set  key  [keyof $data]
+
+    if {$lineout} {set lmap($key) $linenum}
+
+    if {[info exists map($key)]} {
+	if {$map($key) < 0} {
+	    puts stderr "warning: $key occurs multiple times\
+		    in $fileA (lines $linenum and [expr {-$map($key)}]"
+	} else {
+	    set map($key) [expr {-$linenum}]
+	}
+	continue
+    }
+
+    if {$lineout} {
+	puts stdout [::csv::join [linsert $data 0 - $linenum] $sepChar]
+    } else {
+	puts stdout [::csv::join [linsert $data 0 -] $sepChar]
+    }
+}
+close $inA
+
+foreach data $order {
+    set key [keyof $data]
+    if {$map($key) > 0} {
+	if {$lineout} {
+	    puts stdout [::csv::join [linsert $data 0 + $lmap($key)] $sepChar]
+	} else {
+	    puts stdout [::csv::join [linsert $data 0 +] $sepChar]
+	}
+    }
+}
+
+exit
author	William Joye <wjoye@cfa.harvard.edu>	2016-10-27 19:39:39 (GMT)
committer	William Joye <wjoye@cfa.harvard.edu>	2016-10-27 19:39:39 (GMT)
commit	ea28451286d3ea4a772fa174483f9a7a66bb1ab3 (patch)
tree	6ee9d8a7848333a7ceeee3b13d492e40225f8b86 /tcllib/examples/csv/csvdiff
parent	b5ca09bae0d6a1edce939eea03594dd56383f2c8 (diff)
parent	7c621da28f07e449ad90c387344f07a453927569 (diff)
download	blt-ea28451286d3ea4a772fa174483f9a7a66bb1ab3.zip blt-ea28451286d3ea4a772fa174483f9a7a66bb1ab3.tar.gz blt-ea28451286d3ea4a772fa174483f9a7a66bb1ab3.tar.bz2