#!/bin/sh
#
# Convert text to unique word list.
#

RCS_ID='$Id: nsq-words,v 1.1 1998/05/29 20:35:14 carlos Exp $'

my_name=$(basename $0)

while [ $# -ge 1 ] ; do
  case $1 in
    -h*) cat <<_EOH_

        NoSQL operator: ${my_name}

Usage:  ${my_name}  [options]  [file [file ... ]]

Options:
           
    -f        Ignore the case of alphabetic characters.
    -h        Print this help info.

Generates a list of unique words from a set of text files. If no files
are specified on the command line, then the data is read from STDIN.
Non-alphanumeric chars are stripped automatically from output.

$RCS_ID

            ----------------------
NoSQL RDBMS, Copyright (C) 1998 Carlo Strozzi.
This program comes with ABSOLUTELY NO WARRANTY; for details
refer to the GNU General Public License.

You should have received a copy of the GNU General Public License
along with this program;  if not, write to the Free Software
Foundation, Inc., 59 Temple Place Suite 330, Boston, MA 02111-1307
USA.
            ----------------------

_EOH_
        exit 0
        ;;
	-f)    shift; sort_opt="-f"     ;;
    *)     break ;;
  esac
done

# Two ways of accomplishing the same result, one with 'sed' and the
# other with 'tr' alone.
#
#sed 's/[^A-Za-z0-9#&.,:;]/ /g' $* | tr -s ' ' '\n' |
#	grep -v '^ *$' | sort -u ${sort_opt}

cat $* | tr -cs '[a-zA-Z0-9#&.,:;]' '[\n*]' | sort -u ${sort_opt}

exit $?

