#!/usr/bin/perl
# Original code: mergetbl,v 2.6 1993/08/24 14:52:45 hobbs
#
$RCS_ID = '$Id: nsq-merge,v 1.0 1998/05/14 11:08:19 carlos Exp $' ;
$0 =~ s-.*/-- ;
$DSTG = "..DEL.." ;	# default delete string
$HelpInfo = <<EOH ;

			NoSQL operator: $0

Usage:  $0  [options]  < old_table  column  ...  merge_table

Options:
    -add     Add option. Add rows where the key column(s) match.
    -d       Delete option. Delete rows where the key column(s) match and
	     the data value in the delete column is equal to the delete
	     string, "$DSTG" (without the quotes) by default.
    -dSTG    Like the delete option above but use 'STG' as the delete string.
    -help    Print this help info.
    -n       Strip header from output.
    -rev     Reverse option. The tables are sorted in reverse order.
    -sub     Subset case. The structure of merge_tbl is a subset of the
	     structure of old_table.

This operator merges and/or deletes rows of 'old_table' based on data values
in 'merge_table' in the specified key column(s).  Both tables should be sorted
on the specified key column(s).

In the normal case, one or more rows in 'merge_table' either replace one or
more existing rows in 'old_table' if the key column(s) match, or are inserted
in order if the key column(s) do NOT match.

If the delete option is specified on the command line, one or more existing
rows in 'old_table' will be deleted if there is a key column(s) match and the
data in the delete column is equal to the delete string, "$DSTG" (without
the quotes) by default.  The delete column is the first non-key column in
'merge_table'.

If the add option is specified on the command line, all rows from 'old_table'
and 'merge_table' will be put into the new table, in the proper order.

Both tables must have the same data structure, except for the '-sub' case.
In all cases the key columns must be in the same order and location in the
two tables.

The header for the new rdbtable is taken from 'merge_table', also except for
the '-sub' case, thus allowing for a minor change of header documentation to
be made.

This operator writes an rdbtable via STDOUT.  Options may be abbreviated.

$RCS_ID

			----------------------
NoSQL RDBMS, Copyright (C) 1998 Carlo Strozzi.
This program comes with ABSOLUTELY NO WARRANTY; for details
refer to the GNU General Public License.

You should have received a copy of the GNU General Public License
along with this program;  if not, write to the Free Software
Foundation, Inc., 59 Temple Place Suite 330, Boston, MA 02111-1307
USA.
			----------------------

EOH
    # -sub: definition not clear for multiple key values in old_table.
while ( $ARGV[0] =~ /^-/ ){				# Get args
    $_ = shift ;
    if( /-a.*/ ){ $ADD++ ; next ; }
    if( /-d(.*)/ ){ $DEL++ ; $DSTG = $1 if $1 ; next ; }
    if( /-ht.*/ ){ $HTI++ ; next ; }
    if( /-h.*/ ){ print $HelpInfo ; exit 1 ; }
    if( /-n.*/ ){ $NHDR++ ; next ; }
    if( /-r.*/ ){ $REV++ ; next ; }
    if( /-s.*/ ){ $SUB++ ; next ; }
    if( /-x.*/ ){ $XBUG++ ; next ; }
    die "\nBad arg: $_\n", "For help type \"$0 -help\".\n" ; 
}
die "\n$0: Not enough info given.\n", "For help type \"$0 -help\".\n"
    unless @ARGV >= 2 ;
die "\n$0: Can't use both add and delete options.\n",
    "For help type \"$0 -help\".\n" if $DEL && $ADD ;
$mgtbl = pop(@ARGV) ;
open( MT, $mgtbl ) || die "\nCan't open merge_tbl: $mgtbl.\n" ;
while( <STDIN> ){					# read old_tbl header
    print if( $SUB && ! $NHDR ) ;
    if( /^\s*#/ ){	# comment 
	next ; }
    chop ;
    if( ++$lln == 1 ){
	@CN = split( /\t/, $_ );	# col names
	$NC = @CN ;
	next ; }
    @CD = split( /\t/, $_ );		# col definitions
    for (@CD){
	($_) = /(\S+)/ ; }	# keep only 1st word
    last ; }
while( <MT> ){						# read merge_tbl header
    print unless $SUB || $NHDR ;
    if( /^\s*#/ ){	# comment 
	next ; }
    chop ;
    if( ++$mln == 1 ){
	@MCN = split( /\t/, $_ );	# col names
	next ; }
    # @MCD = split( /\t/, $_ );		# col definitions (not used)
    last ; }
unless( $SUB ){
    if( @CN != @MCN ){
	    die "\nDifferent column count in merge_tbl, old_tbl.\n" ; }
}
for $col (@ARGV){		# chk column name ndx, set @KEY, #numcmp, $delx
    for( $delx = -1, $k=$i=0 ; $i < @CN ; $i++ ){
	if( $col eq $CN[$i] ){
	    $k++ ;
	    push( @KEY, $i ) ;
	    $x = ($CD[$i] =~ /N/i ? 1 : 0 ) ;
	    push( @numcmp, $x ) ;
	    unless( $MCN[$i] eq $CN[$i] ){	# chk key col same both tbls
		die "Key column not same in tables: $col\n" ; }
	    next ; }
	$delx = $i if $delx < 0 ;	# delete column index
    }
    die "\nColumn name no match: $col\n" unless $k ;
}
if( $SUB ){				# gen %STX (Subset Translation Index)
    cm: for( $j=0 ; $j < @MCN ; $j++ ){
	for( $i=0 ; $i < @KEY ; $i++ ){
	    next cm if $MCN[$j] eq $CN[$KEY[$i]] ; }
	for( $k=$i=0 ; $i < @CN ; $i++ ){ # non KEY col
	    if( $MCN[$j] eq $CN[$i] ){
		$k++ ;
		$STX{$j} = $i ;
		last ; }
	}
	die "\nMerge column name no match: $MCN[$j]\n" unless $k ;
    }
}
&read_old ; &read_merge ;
while( 1 ){						# main loop
    if( $eof ){
	if( ! $eofa ){
	    until( $eofa ){
		print $a, "\n" ;
		&read_old ; } }
	if( ! $eofb ){
	    die "\nBad merge_tbl order or format\n" if $SUB ;
	    until( $eofb ){
		print $b, "\n" ;
		&read_merge ; } }
	exit 0 ;
    }
    if( ($c = &cmp_key) < 0 ){	# old < merge
	if( ! $REV ){
	    print $a, "\n" ;
	    &read_old ; }
	else{
	    print $b, "\n" ;
	    &read_merge ; } }
    elsif( $c > 0 ){		# old > merge
	if( $SUB ){
	    die "\nMerge_tbl key column NON match: $G[$KEY[0]]\n" ; }
	if( ! $REV ){
	    print $b, "\n" ;
	    &read_merge ; }
	else{
	    print $a, "\n" ;
	    &read_old ; } }
    else{			# old == merge
	&do_replace ; }
}
sub do_replace {				# replace or delete row(s)
    do {
	unless( $DEL && &del_stg ){
	    if( $SUB ){		# merge line b into line a
		@N = @F ;
		while(( $s, $i ) = each %STX){
		    $N[$i] = $G[$s] ; }
		@G = @N ;
		$b = join( "\t", @G ) ; }
	    print $b, "\n" ; }
	@PG = @G ;		# prev @G
	&read_merge ;
    } while( ! $eofb && &same_keyb ) ;
    do {
	print $a, "\n" if $ADD ;
	@PF = @F ;		# prev @F
	&read_old ;
    } while( ! $eofa && &same_keya ) ;
}
sub same_keya {		# return 1 iff curr row a key = prev row a key
    for( $i=0 ; $i < @KEY ; $i++ ){
	$k = $KEY[$i] ;
	if( $numcmp[$i] ){
	    return 0 if $F[$k] != $PF[$k] ; }
	else{
	    return 0 if $F[$k] ne $PF[$k] ; }
    }
    1 ;
}
sub same_keyb {		# return 1 iff curr row b key = prev row b key
    for( $i=0 ; $i < @KEY ; $i++ ){
	$k = $KEY[$i] ;
	if( $numcmp[$i] ){
	    return 0 if $G[$k] != $PG[$k] ; }
	else{
	    return 0 if $G[$k] ne $PG[$k] ; }
    }
    1 ;
}
sub del_stg {			# return 1 iff the delete string is present
    return 1 if $G[$delx] eq $DSTG ;
    0 ;
}
sub cmp_key {		# compares the value of key cols of line a & line b
			# returns -1, 0, 1 if a<b, a==b, or a>b
    for( $i=0 ; $i < @KEY ; $i++ ){
	$k = $KEY[$i] ;
	if( $numcmp[$i] ){
	    if( $F[$k] < $G[$k] ){	# numeric comparsion
		return -1 ; }
	    if( $F[$k] > $G[$k] ){
		return 1 ; }
	}
	else{
	    if( $F[$k] lt $G[$k] ){	# string comparsion
		return -1 ; }
	    if( $F[$k] gt $G[$k] ){
		return 1 ; }
	}
    }
    0 ;
}
sub read_old {			# read next line from old_tbl into $a & @F
    $a = <STDIN> ;
    if( $a ){
	chop $a ;
	@F = split( /\t/, $a, $NC ) ; }
    else{
	$eof++ ; $eofa++ ; }
}
sub read_merge {		# read next line from merge_tbl into $b & @G
    $b = <MT> ;
    if( $b ){
	chop $b ;
	@G = split( /\t/, $b, $NC ) ; }
    else{
	$eof++ ; $eofb++ ; }
}
