/*
 * Copyright 1995,96 Thierry Bousch
 * Licensed under the Gnu Public License, Version 2
 *
 * $Id: induce.c,v 2.3 1996/06/30 13:41:44 bousch Exp $
 *
 * Main module of induce. Here we build the dependency graph, and compute
 * the vertices in the "right order", i.e. we're attempting to use as few
 * mref's as possible.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <assert.h>
#include <ctype.h>
#include "saml.h"
#include "induce.h"

static unsigned int hashsize;
static vertex **htable;
static int nb_vertices;
static int max_depth;

void init_vertex_htable (int size)
{
	assert(size > 0);
#ifdef DEBUG_IGRAPH
	fprintf(stderr, "Initial hashsize %d\n", size);
#endif
	nb_vertices = max_depth = 0;
	hashsize = size;
	free(htable);
	htable = calloc(size, sizeof(vertex*));
	assert(htable != NULL);
}

static int hashme (const char *name)
{
	unsigned char c;
	unsigned int h = 0;

	while ((c = *name++) != '\0')
		h += (h << 4) + c;
	return h % hashsize;
}

static void resize_htable (void)
{
	int h, h2, old_size;
	vertex *v, *vnext, **ntable;

	old_size = hashsize;
	hashsize = 6 * old_size - 1;
#ifdef DEBUG_IGRAPH
	fprintf(stderr, "New hashsize %d (for %d vertices)...", hashsize,
		nb_vertices);
#endif
	ntable = calloc(hashsize, sizeof(vertex*));
	assert(ntable != NULL);
	for (h = 0; h < old_size; h++)
	    for (v = htable[h]; v; v = vnext) {
	    	/* Save the hnext field, cause we'll clobber it */
	    	vnext = v->hnext;
	    	/* Now insert the vertex in the new hash table */
	    	h2 = hashme(v->name);
	    	v->hnext = ntable[h2];
	    	ntable[h2] = v;
	    }
	free(htable);
	htable = ntable;
#ifdef DEBUG_IGRAPH
	fprintf(stderr, " done.\n");
#endif
}

vertex *lookup_vertex (const char *name)
{
	int h;
	vertex *v;

	h = hashme(name);
	for (v = htable[h]; v; v = v->hnext)
		if (strcmp(name, v->name) == 0)
			return v;
	return NULL;
}

void free_vertex (vertex *v)
{
	/*
	 * Yes, this is correct. See the comment in make_vertex() to
	 * understand why free(v->dep) must not be followed by free(v).
	 */
	mref_free(v->mr);
	free(v->dep);
}

vertex *make_vertex (const char *name)
{
	int h, nba;
	char *anc, *p;
	const char *bcode;
	vertex *v, **va;

	h = hashme(name);
	for (v = htable[h]; v; v = v->hnext)
		if (strcmp(name, v->name) == 0) {
			/*
			 * This vertex already exists. If it is locked, then
			 * we've just found a cyclic dependency.
			 */
			if (v->used < 0) {
			    fprintf(stderr,
				"Cyclic dependency on node `%s'. Abort.\n",
				v->name);
			    exit(1);
			}
			++(v->used);
			return v;
		}
	/* New vertex. First, find its dependencies. */
	nba = number_ancestors(name, &anc, &bcode);
	/*
	 * KLUDGE ALERT! We want to allocate (v) and (v->dep) in one
	 * object, to minimize memory waste and improve locality of
	 * reference. Since v already contains a variable-sized field at
	 * its end, we must put v->dep just before v. This explains
	 * the funny pointer arithmetics below.
	 *
	 * See also free_vertex().
	 */
	va = (vertex **) malloc(nba * sizeof(vertex *) + sizeof(vertex)
		+ strlen(name) + 1);
	assert(va != NULL);
	v = (vertex *)(va + nba);
	v->hnext = htable[h];
	htable[h] = v;
	strcpy(v->name, name);
	v->mr = v->depth = v->score = -1;
	v->deps = nba;
	v->dep = va;
	v->bytecode = bcode;
	/*
	 * Yet another kludge: a negative value in (v->used) means that the
	 * vertex is "locked"; if we meet it again in make_vertex(), it means
	 * that a cyclic dependency has been encountered. See above.
	 */
	v->used = -1;
	/* Fill the array of ancestors */
	for (p = anc, h = 0; h < nba; h++) {
		va[h] = make_vertex(p);
		p += strlen(p) + 1;
	}
	/* We can unlock the vertex now */
	v->used = 1;
	free(anc);
	/* Resize the hash table if appropriate */
	if (++nb_vertices > 2*hashsize)
		resize_htable();
	return v;
}

int number_ancestors (const char *name, char **anc, const char **bcode)
{
	char buff[4096], *rootname, *p, c;
	int i, ineg, nbdep, len, nbind, *ind;
	eval_rule *rule;

	*anc = NULL;
	*bcode = NULL;
	if (!isalpha(name[0]) && name[0] != '_') {
		/* Not a subscripted variable/literal */
		return 0;
	}
	/* Read the root name */
	rootname = alloca(strlen(name)+1);
	strcpy(rootname, name);
	nbind = 0;
	ind = NULL;
	if ((p = strchr(name, '[')) != NULL) {
		rootname[p-name] = '\0'; p++;
		i = ineg = 0;
		/*
		 * We know that nbind <= strlen(p), because the loop below
		 * will be executed at most strlen(p) times. The worst case
		 * happens with variable names like foo[,,,], which is
		 * accepted, and equivalent to foo[0,0,0,0].
		 */
		ind = alloca(strlen(p) * sizeof(int));
		while ((c = *p++) != '\0')
		  switch(c) {
		    case '0': case '1': case '2': case '3': case '4':
		    case '5': case '6': case '7': case '8': case '9':
		    	i = 10*i + (c - '0');
		    	break;
		    case '-':
	    		ineg = 1;
	    		break;
		    case ',':
		    case ']':
		    	ind[nbind++] = (ineg ? -i : i);
		    	i = ineg = 0;
		    	break;
		    default:
			fprintf(stderr,
			  "Invalid variable name `%s'. Abort.\n", name);
			exit(1);
		  }
	}
#if 0
	fprintf(stderr, "Name `%s' has root `%s' and %d indices\n",
		name, rootname, nbind);
#endif
	buff[0] = '\0';
	if (!strcmp(rootname, "__value__") && nbind == 1) {
		/*
		 * This is the only builtin rule. It is tested first, so
		 * we bypass memoizing and normal rule searching.
		 */
		if (ind[0] >= 0)
			sprintf(buff, "%d", ind[0]);
		else {
			*bcode = "0pn";
			sprintf(buff, "%d", -ind[0]);
			/* Be careful when ind[0] == -2147483648 */
			if (buff[0] == '-')
				memmove(buff,buff+1,strlen(buff));
		}
	}
	else if (is_saved(name)) {
		/*
		 * The object is saved somewhere on the filesystem.
		 * Thus, there are no dependencies. We do nothing since
		 * buff and bcode already contain what we want.
		 */
	}
	else if ((rule = first_matching_rule(rootname,nbind,ind)) != NULL) {
		/*
		 * Not a `__value__[xxx]' nor a memoized object, but we
		 * have found a user-defined rule; process it.
		 */
		for (i = 0; i < rule->nbdep; i++) {
		    int k, val;
		    idx_var *iv = rule->dep[i];
		    char mini[80];

		    mini[0] = ' ';
		    strcpy(mini+(i>0), iv->rootname);
		    for (k = 0; k < iv->nbind; k++) {
			val = exec_int_bytecode(nbind, ind, iv->ibcode[k]);
			strcat(mini, k ? "," : "[");
			sprintf(mini+strlen(mini), "%d", val);
		    }
		    if (k > 0)
		    	strcat(mini, "]");
		    strcat(buff, mini);
		}
		*bcode = rule->bytecode;
	}
#if 0
	fprintf(stderr, "%s: %s\n", name, buff);
#endif
	/* Replace spaces by nulls, and count the arguments */
	nbdep = !!(*buff);
	len = 0;
	for (p = buff; (c = *p) != '\0'; p++, len++)
		if (c == ' ')
			*p = '\0', ++nbdep;
	p = malloc(len+1); assert(p != NULL);
	memcpy(p, buff, len+1);
	*anc = p;
	return nbdep;
}

void find_depth (vertex *v, int min_depth)
{
	int i, n = v->deps;

	if (v->depth >= min_depth)
		return;
	/* Update the depth of this vertex */
	v->depth = min_depth;
	if (max_depth < min_depth)
		max_depth = min_depth;
	/* We must update the ancestors as well */
	for (i = 0; i < n; i++)
		find_depth(v->dep[i], min_depth + 1);
}

void depth_statistics (void)
{
#ifdef DEBUG_IGRAPH
	int d, h;
	vertex *v;

	for (d = 0; d <= max_depth; d++) {
		fprintf(stderr, "Depth %2d:", d);
		/* Iterate over the hash table */
		for (h = 0; h < hashsize; h++) {
		    for (v = htable[h]; v; v = v->hnext)
			if (v->depth == d)
			    fprintf(stderr, " %s", v->name);
		}
		fprintf(stderr, "\n");
	}
#endif
}

void hash_statistics (void)
{
	int in_use, count, mincount, maxcount, sum2, h;
	vertex *v;

	fprintf(stderr, "Nb of vertices: %d, maximum depth %d\n",
		nb_vertices, max_depth);
	in_use = maxcount = sum2 = 0;
	mincount = nb_vertices;
	for (h = 0; h < hashsize; h++) {
		count = 0;
		for (v = htable[h]; v; v = v->hnext)
			++count;
		if (count)
			++in_use;
		if (count > maxcount)
			maxcount = count;
		if (count < mincount)
			mincount = count;
		sum2 += count*count;
	}
	fprintf(stderr, "Hash slots: %d/%d, min/avg/max use: %d/%d/%d\n",
		in_use, hashsize, mincount,
		(nb_vertices? (sum2+nb_vertices/2)/nb_vertices : 0),
		maxcount);
}

static void compute_scores (void)
{
	int i, n, h, score;
	vertex *v;

	if (!quiet) fprintf(stderr, "Computing scores...");
	for (h = 0; h < hashsize; h++)
	    for (v = htable[h]; v; v = v->hnext) {
	    	score = 0;
	    	n = v->deps;
	    	for (i = 0; i < n; i++)
	    		score += 100000 / v->dep[i]->used;
	    	v->score = score;
	    }
	if (!quiet) fprintf(stderr, " done.\n");
}

static int sort_vertices (const void *p1, const void *p2)
{
	const vertex *v1, *v2;
	int d;

	v1 = *(const vertex **)p1;
	v2 = *(const vertex **)p2;
	d = (v2->depth - v1->depth);
	return d ? d : (v2->score - v1->score);
}

mref_t compute_vertex (vertex *v)
{
	int i, n, memoized;
	mref_t mr;

	/*
	 * Evaluation of v. If it has dependencies, we execute the bytecode
	 * if it is present, otherwise we simply compute the sum of the
	 * ancestors. (In the particular case of _one_ ancestor, it simply
	 * means that we copy the result.) If it has no dependencies, it
	 * will be considered as an integer if it begins with a digit,
	 * otherwise, it'll be either a memoized object or a literal.
	 */
	n = v->deps;
	v->mr = mr = mref_new();
	memoized = 0;

	if (v->bytecode) {
		/* There is bytecode. Run it. */
		mref_t argv[n];
		if (trace_mode)
			fprintf(stderr, "TRACE: %s\n", v->name);
		for (i = 0; i < n; i++)
			argv[i] = v->dep[i]->mr;
		mref_copy(mr, exec_bytecode(n, argv, v->bytecode));
	}
	else if (n != 0) {
		/* No bytecode, but dependencies. Sum them. */
		mref_copy(mr, v->dep[0]->mr);
		for (i = 1; i < n; i++)
			mref_add(mr, mr, v->dep[i]->mr);
	}
	else if (isdigit(v->name[0])) {
		/* It's a number. */
		if (floating_precision > 0) {
			char *buff = alloca(strlen(v->name)+10);
			strcpy(buff, v->name);
			sprintf(buff+strlen(buff), "p%d", floating_precision);
			mref_build(mr, ST_FLOAT, buff);
		} else {
			mref_build(mr, ST_RATIONAL, v->name);
			mref_cast(mr, parsed_poly_type);
		}
	}
	else if (retrieve_precious(v) == 0) {
		/*
		 * The object was saved, and we've just retrieved it.
		 */
		memoized = 1;
	}
	else {
		/* Everything failed. Declare it as a literal. */
		mref_t mr2 = mref_new();
		mref_build(mr, ST_LITERAL, v->name);
		mref_build(mr2, ST_RATIONAL, "0");
		mref_cast(mr2, parsed_poly_type);
		mref_promote(mr, mr2);
		mref_free(mr2);
	}
	if (!memoized)
		save_precious(v);
	/* Return the value of this vertex */
	return mr;
}

void eval_vertices (void)
{
	int i, j, n, h, freed, nb_mrefs, max_mrefs;
	vertex *v, *lv[nb_vertices];

	compute_scores();
	if (!quiet) fprintf(stderr, "Collecting vertices...");
	i = 0;
	for (h = 0; h < hashsize; h++)
	    for (v = htable[h]; v; v = v->hnext)
	    	lv[i++] = v;
	assert(i == nb_vertices);
	if (!quiet) fprintf(stderr, " sorting...");
	qsort(lv, nb_vertices, sizeof(vertex*), sort_vertices);
	if (!quiet) fprintf(stderr, " done.\n");
	nb_mrefs = max_mrefs = 0;
	for (i = 0; i < nb_vertices; i++) {
		v = lv[i];
		n = v->deps;
#ifdef DEBUG_ICOMP
		fprintf(stderr, "%s <-", v->name);
		/* Print the dependencies */
		for (j = 0; j < n; j++) {
			fprintf(stderr, " %s:%d", v->dep[j]->name,
				v->dep[j]->used);
			assert(v->dep[j]->mr >= 0);
		}
		fprintf(stderr, " . (depth %d, score %d)\n",
			v->depth, v->score);
#endif
		/* Now REALLY evaluate the vertex */
		compute_vertex(v);
		if (++nb_mrefs > max_mrefs)
			max_mrefs = nb_mrefs;
		/* Remove old subexpressions (if appropriate) */
		freed = 0;
		for (j = 0; j < n; j++)
		    if (--(v->dep[j]->used) == 0) {
		    	++freed;
#ifdef DEBUG_ICOMP
		    	if (freed == 1)
		    		fprintf(stderr, "Freeing");
		    	fprintf(stderr, " %s", v->dep[j]->name);
#endif
		    	free_vertex(v->dep[j]);
		    	--nb_mrefs;
		    }
#ifdef DEBUG_ICOMP
		if (freed)
			fprintf(stderr, "\n");
#endif
	}
	if (!quiet)
		fprintf(stderr, "All %d vertices (%d roots) evaluated, "
		"using %d variables\n", nb_vertices, nb_mrefs, max_mrefs);
}
