#! /usr/bin/env bash

# Give textual printout showing diffs in file $2 relative to $3 (which are to be .csv files)
# (Entries in $2 can use awk regular expression syntax.)
# Arguments $1 is comma separated list of the column headers for the data forming a line's unique ID.
# Shown are changes by unique ID as well as deletions and additions of unique IDs.
# Each csv converted to a "simple" csv by csv2scsv to deal with quoted strings containing commas in a field.
# Prints output in a tri-partite .txt format for deletions, additions, and changes.
# Note aslo the usage of TEMP below not otherwise documented here.

# $var:t    ${var##*/}
# $var:h    ${var%/*}
# $var:e    ${var##*.}
# $var:r    ${var%.*}

debug=0

addfile=/tmp/csvdiff.add.$$.txt
delfile=/tmp/csvdiff.del.$$.txt
changefile=/tmp/csvdiff.change.$$.txt

(csv2scsv $2; echo "fileseparator"; csv2scsv $3) | awk -F,\
  '\
   BEGIN {file="new"; split("'"$1"'",fields);} \
   NR==1 {print >"'$addfile'"; for (i=1;i<=NF;i++) {newcolname[i]=$i; newcolnum[$i]=i}; next;} \
   $0=="fileseparator" {file="fileseparator";next;} \
   file=="fileseparator" {print >"'$delfile'"; print >"'$changefile'"; for (i=1;i<=NF;i++) {oldcolname[i]=$i; oldcolnum[$i]=i}; file="old"; next;} \
   file=="old" {lineID=""; i=1; while (fields[i]!="") {if (oldcolnum[fields[i]]>0) {lineID=lineID $oldcolnum[fields[i]]}; i++;}; oldexists[lineID]="yes";\
                if (newdata[lineID","0]=="") print >"'$delfile'";\
                else {if (newdata[lineID","0]!=$0) {\
                        replace="";\
                        for (i=1;i<=NF;i++) {\
                          newitem=newdata[lineID","newcolnum[oldcolname[i]]];\
                          if (newitem!=$i && newitem!~"TEMP") {if (newitem=="") newitem="X"; replace=replace""newitem};\
                          replace=replace","}\
                        if (replace!~"^,*$") {print >"'$changefile'"; print replace >"'$changefile'";}}}}\
   file=="new" {lineID=""; i=1; while (fields[i]!="") {if (newcolnum[fields[i]]>0) {lineID=lineID $newcolnum[fields[i]]}; i++;}; for (i=0;i<=NF;i++) newdata[lineID","i]=$i;}\
   END {for (entry in newdata){ if (entry~/,0$/) {\
                      lineID=entry; sub(/,0/,"",lineID);\
                      if (oldexists[lineID]!="yes") print newdata[lineID","0] >"'$addfile'";}}\
       }'

echo Deletions:
csv2txt $delfile
echo Additions:
csv2txt $addfile
echo Changes:
csv2txt $changefile

#rm $delfile $addfile $changefile
