#!/bin/sh
#
# Selects specific columns from an rdbtable.
#
# Author: Carlo Strozzi <carlos@linux.it>

RCS_ID='$Id$'

my_name=$(basename $0)

while [ $# -ge 1 ] ; do
  case $1 in
    -h*) cat <<_EOH_

        NoSQL operator: ${my_name}

Usage:  ${my_name}  [options] [expression]

Options:
    -help     Print this help info.
    -n        Strip header from output.
    -e  "abc" Escape characters in set [a,b,c] by prepending a
              backslash to each of them on STDOUT.

Takes a list of column names of the input rdbtable and prints them to
STDOUT in the specified order.  Chars that are special to the UNIX shell
must be quoted. 

Column names are in the form 'column_1 column_2 ...'.
For example, to select columns 'NAME' and 'JOB' from the input rdbtable
the statement is:

                        'NAME  JOB' 

Note how the list of columns must be quoted, i.e. it must be one
single token.

This operator reads an rdbtable via STDIN and writes an rdbtable
via STDOUT.  If no column is specified on the command line, then only
the table comments, if any, are written to STDOUT.


$RCS_ID

            ----------------------
NoSQL RDBMS, Copyright (C) 1998 Carlo Strozzi.
This program comes with ABSOLUTELY NO WARRANTY; for details
refer to the GNU General Public License.

You should have received a copy of the GNU General Public License
along with this program;  if not, write to the Free Software
Foundation, Inc., 59 Temple Place Suite 330, Boston, MA 02111-1307
USA.
            ----------------------

_EOH_
        exit 0
        ;;
    -n) shift; no_hdr=1 ;;
    -bs) shift; bs=$1; shift ;;
    -ms) shift; ms=$1; shift ;;
    -es) shift; es=$1; shift ;;
    -e)  shift; e_set=$1; shift ;;
    *)  break ;;
  esac
done

awk 'BEGIN {
  NULL = ""
  FS = "\t"
  split( "'"${e_set}"'", e_set, NULL)
}
# Table comments.
r == 0 && $0 ~ /^ *#/ {
  if( "'${no_hdr}'" != 1 ) print
  next
}
# Column names and positions.
r == 0 {
  while( ++p <= NF ) {
    # Make sure we pick the first occurrence of duplicated column
    # names (it may happen after a join).
    if( P[$p] == NULL ) {
      ++q
      P[$q]=q
      N[q]=$q
    }
  }
  # Get requested column names.
  split( "'"$1"'", c_names_tmp, " " )
  # Remove references to invalid column names.
  while( c_names_tmp[++i] != NULL )
    while( N[++j] != NULL )
      if( c_names_tmp[i] == N[j] ) {
        c_names[++k] = c_names_tmp[i]
        j = 0
        break
      }
  if( c_names[1] == NULL ) exit
  if( "'${no_hdr}'" != 1 ) {
    c = 1
    out_rec = $P[c_names[c]]
    while( c_names[++c] != NULL )
      out_rec = out_rec FS $P[c_names[c]]
    print out_rec
  }
  r++
  next
}
# Column definitions.
r == 1 {
  if( "'${no_hdr}'" != 1 ) {
    c = 1
    out_rec = $P[c_names[c]]
    while( c_names[++c] != NULL )
      out_rec = out_rec FS $P[c_names[c]]
    print out_rec
  }
  r++
  NR = 0
  next
}
# Table body.
{
  c = 1
  if( c_names[c] == NULL ) exit
  out_rec = $P[c_names[c]]
  while( c_names[++c] != NULL )
    out_rec = out_rec FS $P[c_names[c]]
  e = 0
  while( e_set[++e] != NULL ) gsub( e_set[e], "\\\\"e_set[e], out_rec )
  print out_rec
}'

exit 0
