#!/usr/bin/perl

# Standalone compiler for CLC-INTERCAL

# This file is part of CLC-INTERCAL.

# WARNING - do not operate heavy machinery while using CLC-INTERCAL

# Copyright (C) 1999 Claudio Calvelli <lunatic@assurdo.com>, all rights reserved

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either 2 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

require 5.002;

$0 =~ / / or die "666 The compiler only work if the command name contains spaces\n";

$| = 1;

use vars qw($VERSION);
$VERSION = '0.05';

sub elapsed ();

use Getopt::Long;
use Language::INTERCAL;
use Language::INTERCAL::Runtime::Library;

use vars qw(*guess_charset);
*guess_charset = \&Language::INTERCAL::guess_charset;

my $just_compile = 0;
my $dont_compile = 0;
my $input_charset = '';
my $output_charset = '';
my $optimise = 0;
my $output_name = '';
my $backend = 'Perl';
my $parser = 'CLCintercal';
my @backparm = ();
my $dont_run = 0;
my $bug = '';
my $ubug = '';
my $obsolete = 0;
my $verbose = 0;

my @to_do = ();
my @to_link = ();
my $link_name = '';

END {
    print STDERR elapsed, "Exiting\n" if defined $verbose && $verbose;
}

Getopt::Long::config qw(no_ignore_case auto_abbrev permute bundling);

GetOptions('compile'   => \$just_compile,
	   'c'         => \$just_compile,
           'C'         => \$dont_compile,
           'nocompile' => \$dont_compile,
	   'bug:i'     => \$bug,
	   'ubug:i'    => \$ubug,
	   'obsolete'  => \$obsolete,
	   'a'         => sub { $input_charset = 'ASCII' },
	   'ascii'     => sub { $input_charset = 'ASCII' },
	   'b'         => sub { $input_charset = 'Baudot' },
	   'baudot'    => sub { $input_charset = 'Baudot' },
	   'e'         => sub { $input_charset = 'EBCDIC' },
	   'ebcdic'    => sub { $input_charset = 'EBCDIC' },
	   'charset=s' => sub { $input_charset = $_[1] },
	   'A'         => sub { $output_charset = 'ASCII' },
	   'ASCII'     => sub { $output_charset = 'ASCII' },
	   'B'         => sub { $output_charset = 'Baudot' },
	   'BAUDOT'    => sub { $output_charset = 'Baudot' },
	   'E'         => sub { $output_charset = 'EBCDIC' },
	   'EBCDIC'    => sub { $output_charset = 'EBCDIC' },
	   'CHARSET:s' => sub { $output_charset = $_[1] },
	   '0'         => sub { $output_charset = '' },
	   'optimise'  => \$optimise,
	   'optimize'  => \$optimise,
	   'O'         => \$optimise,
	   'o=s'       => \$output_name,
	   'output=s'  => \$output_name,
	   'backend=s' => \$backend,
	   'l=s'       => \$backend,
	   'parser=s'  => \$parser,
	   'p=s'       => \$parser,
	   'r'         => \$dont_run,
	   'norun'     => \$dont_run,
	   'v'         => \$verbose,
	   'verbose'   => \$verbose,
	   '<>'        => \&to_do) or usage();

fiddle Language::INTERCAL "bug=$bug" if $bug ne '';
fiddle Language::INTERCAL "ubug=$ubug" if $ubug ne '';
fiddle Language::INTERCAL 'next' if $obsolete;
fiddle Language::INTERCAL "parser=$parser";

my $to_do;
for $to_do (@to_do) {
    my ($op, @op) = @$to_do;
    &$op(@op);
}

if (@to_link) {
    my $name = shift @to_link;
    if (@to_link) {
	print STDERR elapsed, "Linking... " if $verbose;
	$name->link(@to_link);
	print STDERR "\n" if $verbose;
    }
    if ($dont_run || $backend ne 'Perl') {
	if ($output_name eq '') {
	    $output_name = $name->complete_name($backend, $link_name);
	}
	print STDERR elapsed, "Backend '$backend' to '$output_name'... "
	    if $verbose;
	print STDERR " (optimising)"
	    if $verbose && exists $name->{'flags'}{'optimise'};
	$name->backend($backend, $output_name, @backparm);
	print STDERR "\n" if $verbose;
    } else {
	if ($output_name eq '') {
	    $output_name = 'program';
	}
	print STDERR elapsed, "Backend 'Perl' to '$output_name'... "
	    if $verbose;
	print STDERR " (optimising)"
	    if $verbose && exists $name->{'flags'}{'optimise'};
	$name->backend('Perl', $output_name, @backparm);
	print STDERR "\n", elapsed, "Running.\n" if $verbose;
	_run($input_charset, $output_charset, $output_name);
    }
}

sub to_do {
    my ($source, $dest, $suffix) = @_;
    @backparm = ();
    ($backend, @backparm) = ($`,split(/,/, $')) if $backend =~ /=/;
    ($dest = $source) =~ s/\.[^\.]+$// or
    	die "111 File name $source requires suffix\n";
    $suffix = substr($&, 1);
    my @alph = ($input_charset, $output_charset, $optimise);
    if ($suffix eq 'i') {
	if ($dont_compile) {
	    push @to_do, [\&list, @alph, $source, $output_name];
	} elsif ($just_compile) {
	    push @to_do, [\&compile, @alph, $source,
			  $output_name || ($dest . '.ipt')];
	} else {
	    push @to_do, [\&compile_link, @alph, $source, $dest];
	}
    } elsif ($suffix eq 'ipt') {
	if ($dont_compile || $just_compile) {
	} else {
	    push @to_do, [\&link, @alph, $source, $dest];
	}
    } else {
	die "111 Unknown file suffix: $suffix\n";
    }
}

sub compile_link {
    my ($in, $out, $opt, $src, $dest) = @_;
    my $ptree = _compile($in, $out, $opt, $src);
    $link_name = $dest;
    push @to_link, $ptree;
}

sub link {
    my ($in, $out, $opt, $src, $dest) = @_;
    print STDERR elapsed, "Loading '$src'... " if $verbose;
    my $ptree = load Language::INTERCAL $src;
    print STDERR "\n" if $verbose;
    if ($opt) {
	print STDERR elapsed, "Optimising '$src'... " if $verbose;
	$ptree->optimise();
	print STDERR "\n" if $verbose;
    }
    $link_name = $dest;
    push @to_link, $ptree;
}

sub compile {
    my ($in, $out, $opt, $src, $dest) = @_;
    my $ptree = _compile($in, $out, $opt, $src);
    print STDERR elapsed, "Saving '$dest'... " if $verbose;
    $ptree->save($dest);
    print STDERR "\n" if $verbose;
}

sub list {
    my ($in, $out, $opt, $src, $dest) = @_;
    my $ptree = _compile($in, $out, $opt, $src);
}

sub _compile {
    my ($in, $out, $opt, $src) = @_;
    print STDERR elapsed, "Loading '$src'... "if $verbose;
    open(SRC, '< ' . $src) or die "110 $src\: $!\n";
    my $text = '';
    while (<SRC>) { $text .= $_ }
    close SRC;
    print STDERR "\n" if $verbose;
    if ($in eq '') {
	print STDERR elapsed, "Guessing input character set for '$src'... "
	    if $verbose;
	$in = guess_charset($text);
	die "111 Cannot guess character set\n" if $in eq '';
	print STDERR "$in\n" if $verbose;
    }
    fiddle Language::INTERCAL "charset=$in";
    my $cvt = convert_charset('ASCII', $out);
    print STDERR elapsed, "Parsing '$src'... " if $verbose;
    my $ptree = parse Language::INTERCAL $text, $src, sub { print &$cvt(@_) };
    print STDERR "\n" if $verbose;
    name $ptree $src;
    if ($opt) {
	print STDERR elapsed, "Optimising '$src'... " if $verbose;
	optimise $ptree;
	print STDERR "\n" if $verbose;
    }
    $ptree;
}

sub _run {
    my ($in, $out, $prog) = @_;
    my $cin = convert_charset($in || 'EBCDIC', 'EBCDIC');
    my $cout = convert_charset('ASCII', $out || 'ASCII');
    my $sin = sub {
	my $t;
	if (@_) {
	    read STDIN, $t, $_[0];
	} else {
	    $t = <STDIN>;
	    $t = &$cin($t);
	}
	$t;
    };
    my $sout = sub {
	print &$cout(@_);
    };
    $@ = '';
    eval "&$prog(\$sin, \$sout)";
    print $@ if $@;
}

sub usage {
    (my $p = $0) =~ s!^.*/!!;
    die "013 Usage: '$p' [-aAbBcCeEOr] [-o name] [-l lang] files...\n";
}

sub ptime ($) {
    my ($v) = @_;
    $v < 60 and return sprintf("%8d", $v);
    $v < 3600 and return sprintf("%5d:%02d", int($v / 60), $v % 60);
    return sprintf("%2d:%02d:%02d", int($v / 3600), int($v / 60) % 60, $v % 60);
}

sub elapsed () {
    my ($user, $system, $cuser, $csystem) = times;
    my $tot = $user + $system + $cuser + $csystem;
    # my $sec = time - $^T;
    # ptime($sec) . ' ' .
    ptime($tot) . sprintf(".%02d", int(100 * $tot) % 100) . ' ';
}

__END__

=pod

=head1 NAME

'oo, ick' - Compiler for CLC-INTERCAL

=head1 SYNOPSIS

B<'oo, ick'> [options] B<files>...\n";

=head1 DESCRIPTION

'oo, ick' reads one or more source files and compiles them using the
CLC-INTERCAL compiler. Currently, two types of input source files are
recognised:

=over 4

=item INTERCAL program source

These files must have suffix B<.i>. The compiler can produce a CLC-INTERCAL
compiler object, a program in another language using one of the optional
compiler back ends, or just compile and run the program.

=item CLC-INTERCAL compiler object.

These files must have suffix B<.ipt>. The compiler can produce a program in
another language using one of the optional back ends, or just run the program.

=back

The compiler accepts several options, some of which are documented here:

=over 4

=item B<-c>

Just produce a CLC-INTERCAL compiler object. If the input file is already a
compiler object, does nothing.

=item B<-C>

Does not compile the program. I can't remember why one would want to do this.

=item B<-A>

Lists the program source in ASCII.

=item B<-B>

Lists the program source in Baudot.

=item B<-E>

Lists the program source in EBCDIC.

=item B<-O>

Does not list the program source. This is the default. Note that this is
a zero, not an oh.

=item B<-a>

Assumes that program source is in ASCII.

=item B<-b>

Assumes that program source is in Baudot.

=item B<-e>

Assumes that program source is in EBCDIC.

=item B<--charset> I<name>

Assumes that program source is in the given character sets. Valid values are
currently I<ASCII>, I<Baudot>, I<EBCDIC> or I<''> (empty string). The latter,
which is the default, will try to guess the character set.

=item B<-O>

Invokes the optimiser. This is an oh, not a zero.

=item B<-o> I<name>

Selects a name for the output file. Default is to use the same name as
the source changing the suffix as appropriate. What is an appropriate
suffix depends on the backend being used. For CLC-INTERCAL compiler trees,
the suffix is B<.ipt>.

=item B<-l> I<name>

Selects a different compiler back end. Default is to use the built in
Perl back end and then call the subroutine created by the compiler. If
a different back end is selected, the program won't automatically run,
but presumably the back end produces some output file.

=item B<-p> I<name>

Select an alternative parser. Default is to use a parser which understands
the language described by the reference manual ("CLCintercal"). Alternative
parsers included with the distribution are "Cintercal" and "ThreadedINTERCAL"
which attempt to be compatible with I<ick> and I<thick>, respectively. Note
that you must include the system library, I<syslib.i>, if you need to use it
as this compiler does not automagically include it.

=item B<-r>

Do not run the program. This is the default if the back end selected is
not the built in Perl. Note that if the program does run, all output is
subject to the same conversions as specified by the B<-A>, B<-B>, or
B<-E> switch, with the exception that B<-0> is ignored; this includes
any binary output, so the safest thing is to use B<-0>. For input, the
same conversions as the source code apply, however, the conversion is
not applied to binary input. The recommended choice is B<-e>, which is
identical to what the program would do when compiled to a standalone
executable.

=back

=head1 NOTES

The program name is B<'oo, ick'>, with embedded space and comma. Because of
unexcusable limitation in several utilities (including, but not limited to,
I<man>, I<perldoc>, I<make> and I<ExtUtils::MakeMaker>), the installation
scripts will create a separate executable B<oo,space,ick> and related man
page. These can be useful to consult this documentation, but the executable
won't run. Use B<'oo, ick'> instead.

=head1 SEE ALSO

L<Language::INTERCAL>.

