#!/usr/bin/perl
# Original code: indextbl,v 1.13 1994/06/20 10:46:33 hobbs
#
$RCS_ID = '$Id: nsq-index,v 1.0 1998/05/14 11:07:28 carlos Exp $' ;
$0 =~ s-.*/-- ;
$HelpInfo = <<EOH ;

	    NoSQL Utility: $0

Usage:  $0  [options]  rdbtbl  column  ...
        $0  [options]  -update  [ index_file  ... ]
        $0  [options]  -allup  rdbtable       <<< NOT IMPLEMENTED YET

Options:
    -help    Print this help info.
    -x       Debug option.

The first form of usage of this utility generates an index file for the
column(s) given, that refers to the specified rdbtbl.  An index file is
actually another (smaller) rdbtable containing only the column(s) given plus
a column for index information. An index file can be used by the operator
'nsq-search' to quickly locate rows of data in the referenced rdbtbl.

Index files are named by appending an 'x' and the column name(s) (seperated
by a dots) to the base name of the rdbtable it refers to. For example an index
file for the rdbtable "area.rdb" on column "strip" would be "area.x.strip".
An index file that was also on column "depth" would be "area.x.strip.depth".

The second form of usage of this utility updates the index file(s) given. If
no files are given all index files in the current directory are updated.  An
update of an index file is necessary when the rdbtable it refers to has been 
modified.

The third form of usage of this utility updates all index files in the current
directory that refer to the given rdbtable.

This operator writes or rewrites rdbtables with defined names in the current
directory.  Options may be abbreviated.

$RCS_ID

			----------------------
NoSQL RDBMS, Copyright (C) 1998 Carlo Strozzi.
This program comes with ABSOLUTELY NO WARRANTY; for details
refer to the GNU General Public License.

You should have received a copy of the GNU General Public License
along with this program;  if not, write to the Free Software
Foundation, Inc., 59 Temple Place Suite 330, Boston, MA 02111-1307
USA.
			----------------------

EOH
while ( $ARGV[0] =~ /^-/ ){				# Get args
    $_ = shift ;
    if( /-a.*/ ){ $MTBL = shift ; next ; }
    if( /-h.*/ ){ print $HelpInfo ; exit; }
    if( /-u.*/ ){ $UPD++ ; next ; }
    if( /-x.*/ ){ $xbug++ ; next ; }
    die "\nBad arg: $_\n", "For help type \"$0 -help\".\n" ; 
}
if( $UPD ){
    &do_upd ;
}
else{
    die "\n$0: Not enough info given.\n", "For help type \"$0 -help\".\n"
	unless @ARGV >= 2 ;
    $tbl = shift ;
    ($base = $tbl) =~ s/\.rdb$// ;
    @COLS = @ARGV ;
    $dbx = "$base.x." . join( ".", @COLS ) ;
    &do_ndx ;
}
sub do_ndx {		# generate index file $dbx from $tbl on columns @COLS
    warn "gen ndx file $dbx ($tbl) on: @COLS\n" ;
    open( RR, $tbl ) || die "Can't open rdbtbl: $tbl.\n" ;
    @ndx = () ; @XFN = () ; @XFD = () ;
    $lln = 0 ;
    while( <RR> ){					# read rdbtbl header
	next if /^\s*#/ ;		# comment 
	chop ;
	if( ++$lln == 1 ){
	    @CN = split( /\t/, $_ );	# col names
	    next ; }
	@CD = split( /\t/, $_ );	# col definitions
	last ; }
    $lnpos = tell ;			# read point of first row
    for $col (@COLS){		# get, chk column name ndx
	for( $k=-1, $i=0 ; $i < @CN ; $i++ ){
	    if( $col eq $CN[$i] ){
		$k = $i ;
		push( @ndx, $i ) ;		# index in $tbl
		push( @XFN, $col ) ;	# new index file, col name
		push( @XFD, $CD[$i] ) ;	# new index file, col definition
		last ; } }
	die "Column name no match: $col\n" if $k == -1 ;
    }
    push( @XFN, "lpos" ) ;
    push( @XFD, "6n Line pos in $tbl" ) ;
    return if $xbug ;
    open( SS, "| nsq-sort @XFN > $dbx" ) || die "Can't open pipe.\n" ;
    print SS "# NoSQL Index file for: $tbl\n" ;
    print SS join( "\t", @XFN ), "\n", join( "\t", @XFD ), "\n" ;
    $rcnt = 0 ;
    warn "Reading $tbl ...\n" ;
    while( <RR> ){					# read rdbtbl body
	$rcnt++ ;
	chop ;
	$lim = tr/\t/\t/ +1 ;
	@dat = () ;
	@a = split( /\t/, $_, $lim ) ;
	for (@ndx){
	    push( @dat, $a[$_] ) ; }
	print SS join( "\t", @dat ), "\t", $lnpos, "\n" ; # send to nsq-sort 
	warn "@dat  $lnpos\n" if $xbug ;
	$lnpos = tell ;		# read point of next row
    }
    warn "Rows: $rcnt, Sorting ...\n" ;
    close SS ;
}
sub do_upd { # get index file names, tbl names, col names, call do_ndx for each.
    unless( @ARGV ){
	opendir( DD, "." ) || die "Can't open curr dir\n" ;
	@ARGV = grep( /\.x\./, readdir( DD )) ; }
    for $dbx (@ARGV){
	($base = $dbx) =~ s/\.x\..*$// ;
	$tbl = "$base.rdb" ;
	next if $MTBL && $MTBL ne $tbl ;
	open( XF, $dbx ) || die "Can't open input: $dbx\n" ;
	while( <XF> ){
	    next if /^\s*#/ ;	# skip comments
	    @COLS = split( /\t/, $_ ) ;
	    pop( @COLS ) ;	# remove line pos col
	    close XF ;
	    last ; }
	# warn "$tbl, $dbx, @COLS\n" ; #<<<<<<<<<<<<
	&do_ndx ;
    }
}
