#!/usr/bin/perl -w

#    Copyright (C) 1999 Tor Lillqvist

#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2, or (at your option)
#    any later version.

#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.

#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
#    02111-1307, USA.

# gen-formata.pl - Convert the "Format A" table files from the Unicode consortium
# into C source code tables we can use.  
#
# Usage example:
# perl gen-formata.pl sjis shiftjis.txt jis/shiftjis.h

# For instance, the table for Shift-JIS is is available as
# ftp://www.unicode.org/Public/MAPPINGS/EASTASIA/JIS/SHIFTJIS.TXT
# and the table for Microsoft CP936 as
# ftp://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT

# I consider the output of this program to be unrestricted.  Use it as
# you will.

$codename = $ARGV[0];
$infile = $ARGV[1];
$outfile = $ARGV[2];

open (INPUT, "<$infile") || die "Cannot open $infile for reading: $!";

$format_ok = 0;
while (<INPUT>) {
    if (/^#\s+Unicode version:\s+(\S+)/) {
	$UNICODE_VERSION = $1;
    } elsif (/^#\s+Table version:\s+([0-9.]+)/) {
	$TABLE_VERSION = $1;
    } elsif (/^#\s+Table format:\s+Format A/) {
	$format_ok = 1;
    }
    
    next if /^#/;
    
    s/\#.*$//;
    next if $_ eq '';
    
    die "Is this really a Format A table?" unless $format_ok;
    ($code,$u) = split ("\t", $_);
    
    next if $u =~ /^\s/;

    $msb = int(hex($code) / 0x100);
    $lsb = hex($code) % 0x100;
    $u = hex($u);

    if (eval '!defined @'.$codename.'_'.$msb) {
	eval 'for ($i = 0; $i < 0x100; $i++) { $'.$codename.'_'.$msb.'[$i]=0; }';
    }
    eval '$'.$codename.'_'.$msb.'['.$lsb.']='.$u;
}

open (OUTPUT, ">$outfile") || die "Cannot open $outfile for writing: $!";

print OUTPUT "/* This file is automatically generated.  DO NOT EDIT.  */\n\n";
$guardid = $outfile;
$guardid =~ tr!a-z/.!A-Z__!;
print OUTPUT "#ifndef $guardid\n";
print OUTPUT "#define $guardid\n";
print OUTPUT "\n";
print OUTPUT "/* Based on data in file $infile\n";
print OUTPUT " * Unicode version $UNICODE_VERSION\n";
print OUTPUT " * Table version $TABLE_VERSION\n";
print OUTPUT " */\n";
print OUTPUT "\n";

for ($msb = 0; $msb < 0x100; $msb++) {
    next unless eval 'defined @'.$codename.'_'.$msb;
    eval '@a = @'.$codename.'_'.$msb;
    printf OUTPUT "static unsigned short %s_%.02x[0x100] = {\n  ",
		  $codename, $msb;
    for ($lsb = 0; $lsb < 0x100; $lsb++) {
	printf OUTPUT "%#06x", $a[$lsb];
        print OUTPUT ',' if $lsb < 0xFF;
	print OUTPUT "\n  " unless ($lsb+1)%0x08;
    }
    $a[0]=0;			# Shut up -w
    print OUTPUT "};\n";
}

printf OUTPUT 'static unsigned short *'.$codename."_table[0x100] = {\n";
for ($msb = 0; $msb < 0x100; $msb++) {
    print OUTPUT '  ';
    if (eval 'defined @'.$codename.'_'.$msb) {
	printf OUTPUT "%s_%.02x", $codename, $msb;
    } else {
	print OUTPUT "NULL";
    }
    print OUTPUT ',' if $msb < 0xFF;
    print OUTPUT "\n"
}
print OUTPUT "};\n";
print OUTPUT "\n";
print OUTPUT "#endif /* $guardid */\n";

close OUTPUT;
