/*
 * DFA-based syntax highlighting for Jed.
 */

/* This file is included by syntax.c */

/*
 * The minimum number of "unsigned char"s we need to store at least
 * UCHAR_MAX+1 bits.
 */
#define SET_SIZE ((UCHAR_MAX+CHAR_BIT) / CHAR_BIT)

typedef struct NFA NFA;
typedef struct Accept Accept;
typedef struct DFA DFA;

/*
 * Set of characters. Note that this will not be the only kind of
 * set manipulated by the macros below - while constructing the DFA
 * table we will need to deal with sets of NFA states as well.
 */
typedef unsigned char Set[SET_SIZE];

/*
 * Macros for set manipulation. Beware - these macros are unsafe
 * with respect to side effects!
 */
#define empty_set(s,size) memset ((s), (unsigned char) 0, size)
#define fill_set(s,size) memset ((s), (unsigned char) ~0, size)
#define add_to_set(s,c) ((s)[(c)/CHAR_BIT] |= 1 << (c)%CHAR_BIT)
#define take_from_set(s,c) ((s)[(c)/CHAR_BIT] &= ~(1 << (c)%CHAR_BIT))
#define is_in_set(s,c) ((s)[(c)/CHAR_BIT] & (1 << (c)%CHAR_BIT))

struct NFA 
{
   NFA *next;
   int from, to;
   int is_empty;
   Set set;
};

struct Accept 
{
   Accept *next;
   int state;
   int is_quick, is_end;
   int colour, is_preproc, is_keyword;
};

struct DFA 
{
   DFA *next;
   int number;
   unsigned char *nfa_set;	       /* the corresp. set of NFA states */
   Accept *accept, *accept_end;
   DFA *where_to[UCHAR_MAX+1];
};

/*
 * This structure contains all the information for syntax
 * highlighting a given language. We have an NFA, generated by
 * parsing regular expressions, stored as a list of transitions. We
 * also partition the set of "unsigned char" values into
 * equivalence classes, accomplished by storing, in equiv[c], the
 * lowest value in the same equivalence class as c. "accept" is a
 * list of accepting states in the NFA, together with their colours
 * and properties, and "dfa" is the actual DFA table.
 * 
 * DFA tables can take some time to generate - over a second on my
 * 486 for C mode - and so instead of generating them every time
 * Jed needs them, I support a caching option.
 */
struct Highlight 
{
   int nfa_states, dfa_states;
   NFA *nfa;
   unsigned char equiv[UCHAR_MAX+1];
   Accept *accept;
   DFA *dfa;
   char *filename;
};

/*
 * Local prototypes.
 */
static int parse_regexp (Highlight *, char **, int *, int *, int *,
			 int *, int *);
static int parse_reg1 (Highlight *, char **, int *, int *, int *);
static int parse_reg2 (Highlight *, char **, int *, int *, int *);
static int parse_reg3 (Highlight *, char **, int *, int *, int *);
static int parse_reg4 (Highlight *, char **, int *, int *, int *);
static void add_nfa_trans (Highlight *, int , int , Set);
static int get_lexeme (char **, int *);
static void eat_lexeme (char **, int *);
static void compute_closure (Highlight *, unsigned char *);

/*
 * Parser error codes.
 */
#define ERR_EOT_EXPECTED 1
#define ERR_RPAREN_EXPECTED 2
#define ERR_EMPTY_SET 3
#define ERR_METACHAR_INVALID 4

static Highlight *init_highlight (void) 
{
   Highlight *result;
   
   result = (Highlight *) SLmalloc (sizeof (Highlight));
   if (result != NULL)
     {
	memset ((char *) result, 0, sizeof (Highlight));
	result->nfa_states = 2;	       /* 0 and 1 are "special" */
     }
   return result;
}

#define FLAG_QUICK    1
#define FLAG_KEYWORD  2
#define FLAG_PREPROC  4
static void add_rule (Highlight *h, char *rule, int length,
		      int flags, int colour) 
{
   int start = 0, end = 0;
   int is_begin, is_end;
   Accept *acc;
   int old_states;
   NFA *old_nfa;
   int err;
   
   old_states = h->nfa_states;
   old_nfa = h->nfa;
   err = parse_regexp (h, &rule, &length, &start, &end, &is_begin, &is_end);
   if (!err) 
     {
	/*
	 * We have a partial NFA gained from parsing the rule. Add
	 * an empty-string transition from the correct initial
	 * state (0 normally, 1 for a rule that must start at the
	 * beginning of a line), and flag the final state as
	 * accepting.
	 */
	add_nfa_trans (h, is_begin ? 1 : 0, start, NULL);
	acc = (Accept *) SLmalloc (sizeof(Accept));
	if (acc == NULL)
	  goto error;		       /* grotty, I know, but... */
	acc->next = h->accept;
	h->accept = acc;
	acc->state = end;
	acc->is_quick = ((flags & FLAG_QUICK) ? 1 : 0);
	acc->is_preproc = ((flags & FLAG_PREPROC) ? 1 : 0);
	acc->is_keyword = ((flags & FLAG_KEYWORD) ? 1 : 0);
	acc->colour = colour;
	acc->is_end = is_end;
     }
   else 
     {
	/*
	 * An error occurred while doing the parse. We must remove
	 * all the NFA states and transitions that the partial
	 * parse may have added.
	 */
	NFA *n;
	
	error:			       /* we may get here from above! */
	
	h->nfa_states = old_states;
	for (n=h->nfa; n && n!=old_nfa ;) 
	  {
	     NFA *temp = n;
	     n = n->next;
	     SLfree ((char *)temp);
	  }
	h->nfa = n;
     }
}

/*
 * Call this to parse a regular expression.
 * The grammar is:
 *   regexp : [^] reg1 [$]
 *   reg1   : reg2 [| reg2 [| reg2...]]
 *   reg2   : reg3 [reg3 [reg3...]]
 *   reg3   : reg4 [*, +, ?]
 *   reg4   : char
 *          | char-set
 *          | .
 *          | ( reg1 )
 * 
 * Lexemes are: all chars (0 to 255), caret, dollar, vbar, star,
 * plus, query, lbracket, rbracket, dash, dot, lparen and rparen.
 */

#define LEX_CARET (UCHAR_MAX+1+(unsigned char)'^')
#define LEX_DOLLAR (UCHAR_MAX+1+(unsigned char)'$')
#define LEX_VBAR (UCHAR_MAX+1+(unsigned char)'|')
#define LEX_STAR (UCHAR_MAX+1+(unsigned char)'*')
#define LEX_PLUS (UCHAR_MAX+1+(unsigned char)'+')
#define LEX_QUERY (UCHAR_MAX+1+(unsigned char)'?')
#define LEX_LBRACKET (UCHAR_MAX+1+(unsigned char)'[')
#define LEX_RBRACKET (UCHAR_MAX+1+(unsigned char)']')
#define LEX_DASH (UCHAR_MAX+1+(unsigned char)'-')
#define LEX_DOT (UCHAR_MAX+1+(unsigned char)'.')
#define LEX_LPAREN (UCHAR_MAX+1+(unsigned char)'(')
#define LEX_RPAREN (UCHAR_MAX+1+(unsigned char)')')
#define LEX_ENDOFTEXT (2*UCHAR_MAX+2)
#define is_normal_char(lex) ((lex)<=UCHAR_MAX)

static int parse_regexp (Highlight *h, char **text, int *length,
			 int *start, int *end, int *is_begin, int *is_end) 
{
   int err;
   
   if (get_lexeme(text, length) == LEX_CARET) 
     {
	*is_begin = 1;
	eat_lexeme (text, length);
     }
   else
     *is_begin = 0;
   
   if ( (err = parse_reg1 (h, text, length, start, end)) != 0)
     return err;
   
   if (get_lexeme(text, length) == LEX_DOLLAR) 
     {
	*is_end = 1;
	eat_lexeme (text, length);
     }
   else
     *is_end = 0;
   
   if (get_lexeme(text, length) != LEX_ENDOFTEXT)
     return ERR_EOT_EXPECTED;
   
   return 0;
}

static int parse_reg1 (Highlight *h, char **text, int *length,
		       int *start, int *end) 
{
   int err;
   
   if ( (err = parse_reg2 (h, text, length, start, end)) != 0)
     return err;
   
   while (get_lexeme(text, length) == LEX_VBAR) 
     {
	eat_lexeme (text, length);
	if ( (err = parse_reg2 (h, text, length, start, end)) != 0)
	  return err;
     }
   
   return 0;
}

static int parse_reg2 (Highlight *h, char **text, int *length,
		       int *start, int *end) 
{
   int en = 0;
   int lex, err;
   
   if ( (err = parse_reg3 (h, text, length, start, &en)) != 0)
     return err;
   
   while ((lex=get_lexeme(text, length)) == LEX_LPAREN ||
	  lex == LEX_LBRACKET || lex == LEX_DASH ||
	  lex == LEX_DOT || is_normal_char(lex)) 
     {
	int st = en;
	en = 0;
	if ( (err = parse_reg3 (h, text, length, &st, &en)) != 0)
	  return err;
     }
   
   if (*end)
     add_nfa_trans (h, en, *end, NULL);
   else
     *end = en;
   return 0;
}

static int parse_reg3 (Highlight *h, char **text, int *length,
		       int *start, int *end) 
{
   int lex, err;
   int st = 0, en = 0;
   
   if ( (err = parse_reg4 (h, text, length, &st, &en)) != 0)
     return err;
   
   lex = get_lexeme (text, length);
   switch (lex) 
     {
      case LEX_STAR:
	eat_lexeme (text, length);
	add_nfa_trans (h, st, en, NULL);
	add_nfa_trans (h, en, st, NULL);
	break;
      case LEX_PLUS:
	eat_lexeme (text, length);
	add_nfa_trans (h, en, st, NULL);
	break;
      case LEX_QUERY:
	eat_lexeme (text, length);
	add_nfa_trans (h, st, en, NULL);
	break;
     }
   
   if (!*end)
     *end = h->nfa_states++;
   add_nfa_trans (h, en, *end, NULL);
   
   if (!*start)
     *start = h->nfa_states++;
   add_nfa_trans (h, *start, st, NULL);
   
   return 0;
}

static int parse_reg4 (Highlight *h, char **text, int *length,
		       int *start, int *end) 
{
   int lex, err;
   Set set;
   
   lex = get_lexeme (text, length);
   
   if (lex == LEX_LPAREN) 
     {
	eat_lexeme (text, length);
	if ( (err = parse_reg1(h, text, length, start, end)) != 0)
	  return err;
	if (get_lexeme (text, length) != LEX_RPAREN)
	  return ERR_RPAREN_EXPECTED;
	eat_lexeme (text, length);
	return 0;
     }
   
    /*
     * OK, all other possibilities now involve a character set. So
     * create one.
     */
   empty_set ((char *)set, SET_SIZE);
   
   if (lex == LEX_LBRACKET) 
     {
	int complement = 0;
	eat_lexeme (text, length);
	if (get_lexeme (text, length) == LEX_CARET) 
	  {
	     eat_lexeme (text, length);
	     complement = 1;
	     fill_set ((char *)set, SET_SIZE);
	  }
	if (get_lexeme (text, length) == LEX_RBRACKET)
	  return ERR_EMPTY_SET;      /* empty set?! */
	while ( (lex = get_lexeme (text, length)) != LEX_RBRACKET) 
	  {
	     int one_end, other_end;
	     int bottom, top;
	     
	     if (!is_normal_char(lex))
	       return ERR_METACHAR_INVALID;
	     
	     one_end = lex;
	     eat_lexeme (text, length);
	     
	     if ( (lex = get_lexeme (text, length)) == LEX_DASH) 
	       {
		  eat_lexeme (text, length);
		  lex = get_lexeme (text, length);
		  if (!is_normal_char(lex))
		    return ERR_METACHAR_INVALID;
		  other_end = lex;
	       }
	     else
	       other_end = one_end;
	     
	     if (other_end < one_end)
	       bottom = other_end, top = one_end;
	     else
	       bottom = one_end, top = other_end;
	     
	     while (bottom <= top) 
	       {
		  if (complement)
		    take_from_set (set, bottom);
		  else
		    add_to_set (set, bottom);
		  bottom++;
	       }
	  }
	eat_lexeme (text, length);
     }
   else if (is_normal_char (lex) || lex == LEX_DASH) 
     {
	eat_lexeme (text, length);
	if (lex == LEX_DASH)
	  lex = (unsigned char) '-';
	add_to_set (set, lex);
     }
   else if (lex == LEX_DOT) 
     {
	eat_lexeme (text, length);
	fill_set ((char *)set, SET_SIZE);
     }
   else
     return ERR_METACHAR_INVALID;
   
    /*
     * We've got a character set: devise an NFA transition on it.
     */
   if (!*start)
     *start = h->nfa_states++;
   if (!*end)
     *end = h->nfa_states++;
   add_nfa_trans (h, *start, *end, set);
   
   return 0;
}

/*
 * Add an NFA transition.
 */
static void add_nfa_trans (Highlight *h, int from, int to, Set set) 
{
   NFA *trans;
   int i;
   unsigned char other[UCHAR_MAX+1], in[UCHAR_MAX+1];
   
   trans = (NFA *) SLmalloc(sizeof(*trans));
   if (trans) 
     {
	trans->next = h->nfa;
	h->nfa = trans;
	if (set) 
	  {
	     memcpy ((char *) trans->set, (char *)set, sizeof(Set));
	     trans->is_empty = 0;
	  }
	else
	  trans->is_empty = 1;
	trans->from = from;
	trans->to = to;
     }
   
    /*
     * Adjust the equivalence classes: no equivalence class should
     * contain members both inside and outside the given set.
     */
   if (set != NULL) 
     {
	for (i=0; i<=UCHAR_MAX; i++) 
	  {
	     int j = h->equiv[i];
	     if (j == i) 
	       {
		  other[i] = i;	       /* no "other" class defined yet */
		  in[i] = is_in_set (set, i);
	       }
	     else if ( (!is_in_set (set, i)) ^ (!in[j]) ) 
	       {
		  if (other[j] == j)
		    other[j] = i;
		  h->equiv[i] = other[j];
	       }
	  }
     }
}

/*
 * Parse the next lexeme out of the expression.
 */
static int get_lexeme (char **text, int *length)
{
   char *t;
   if (!*length)
     return LEX_ENDOFTEXT;
   
   t = *text;
   switch (*t)
     {
      case '^': return LEX_CARET;
      case '$': return LEX_DOLLAR;
      case '|': return LEX_VBAR;
      case '*': return LEX_STAR;
      case '+': return LEX_PLUS;
      case '?': return LEX_QUERY;
      case '[': return LEX_LBRACKET;
      case ']': return LEX_RBRACKET;
      case '-': return LEX_DASH;
      case '.': return LEX_DOT;
      case '(': return LEX_LPAREN;
      case ')': return LEX_RPAREN;
      case '\\':
	if (*length == 1)
	  return '\\';
	t++;
	/* drop */
      default:
	return (int) ((unsigned char) *t);
     }
}

/*
 * Advance the text pointer past the lexeme given.
 */
static void eat_lexeme (char **text, int *length) 
{
   int n = 0;
   
   if (!*length)
     return;
   
   if (**text == '\\' && *length > 1)
     {
	n++;
     }
   if(iskanji(*(*text + n))) n++;
   n++;
     
   (*text) += n, (*length) -= n;
}

static void make_dfa (Highlight *h) 
{
   int setsize;
   DFA *tail, *next, *search;
   unsigned char *destination;	       /* a working set */
   int c, dest_empty;
   NFA *n;
   Accept *a;
   
    /*
     * First calculate the size of array we will need to store a
     * set of NFA states. Allocate our temporary set variables.
     */
   setsize = (h->nfa_states + CHAR_BIT - 1) / CHAR_BIT;
   destination = (unsigned char *)SLmalloc(setsize);
   if (!destination)
     return;
   
    /*
     * Now define our first DFA state. This is the epsilon-closure
     * of NFA state zero, and is the normal start state for the DFA.
     */
   h->dfa_states = 0;
   
   h->dfa = tail = (DFA *) SLmalloc(sizeof(DFA));
   if (tail == NULL)
     goto error;
   
   memset ((char *) tail, 0, sizeof (DFA));
   
   if (!(tail->nfa_set = (unsigned char *)SLmalloc(setsize)))
     goto error;
   
   /* tail->next = NULL;  memset has done this */
   
   tail->number = h->dfa_states++;
   empty_set ((char *) tail->nfa_set, setsize);
   add_to_set (tail->nfa_set, 0);
   compute_closure (h, tail->nfa_set);
   
    /*
     * Our second DFA state is the epsilon-closure of NFA states
     * zero and one, and is the start state for the DFA when we are
     * at the beginning of a line.
     */
   if (!(tail->next = (DFA *) SLmalloc(sizeof(DFA))))
     goto error;
   tail = tail->next;
   memset ((char *) tail, 0, sizeof (DFA));
   
   if (!(tail->nfa_set = (unsigned char *) SLmalloc(setsize)))
     goto error;
   
   /* tail->next = NULL; */
   
   tail->number = h->dfa_states++;
   empty_set ((char *) tail->nfa_set, setsize);
   add_to_set (tail->nfa_set, 0);
   add_to_set (tail->nfa_set, 1);
   compute_closure (h, tail->nfa_set);
   
    /*
     * Next, work along the DFA processing each state in turn.
     */
   for (next = h->dfa; next; next=next->next) 
     {
	/*
	 * We have a state from which we wish to compute all the
	 * transitions. Of course we need only consider transitions
	 * on a representative member of each equivalence class.
	 */
	for (c = 0; c <= UCHAR_MAX; c++) 
	  {
	     if (h->equiv[c] == c) 
	       {
		  empty_set ((char *)destination, setsize);
		  dest_empty = 1;
		  
		  for (n = h->nfa; n; n = n->next) 
		    {
		       if (is_in_set (next->nfa_set, n->from) &&
			   !n->is_empty && is_in_set (n->set, c)) 
			 {
			    add_to_set (destination, n->to);
			    dest_empty = 0;
			 }
		    }
		  compute_closure (h, destination);
		  
		  if (dest_empty)
		    search = NULL;
		  else 
		    {
		       for (search = h->dfa; search; search = search->next)
			 if (!memcmp((char *) search->nfa_set, (char *) destination, setsize))
			   break;
		       
		       if (!search) 
			 {
			    if (!(search = tail->next = (DFA *) SLmalloc(sizeof(DFA))))
			      goto error;
			    tail = tail->next;
			    if (!(tail->nfa_set = (unsigned char *)SLmalloc(setsize)))
			      goto error;
			    tail->next = NULL;
			    tail->number = h->dfa_states++;
			    memcpy ((char *) tail->nfa_set, (char *)destination, setsize);
			 }
		    }
		  
		  next->where_to[c] = search;
	       }
	     else
	       next->where_to[c] = next->where_to[h->equiv[c]];
	  }
	
	/*
	 * Having done the transitions for the state, let us also
	 * check its acceptance properties: we need to know if it
	 * contains any accepting NFA-states, and whether they are
	 * constrained to be accepting only at the beginning or end
	 * of the line.
	 */
	next->accept = next->accept_end = NULL;
	for (a = h->accept; a; a = a->next) 
	  {
	     if (is_in_set (next->nfa_set, a->state)) 
	       {
		  next->accept_end = a;
		  if (!a->is_end)
		    next->accept = a;
	       }
	  }
     }
   
    /*
     * Free the temporary variables.
     */
   SLfree ((char *)destination);
   return;
   
    /*
     * Get here if a malloc returned null; clean up the mess.
     */
   error:
   for (tail = h->dfa; tail; tail=tail->next) 
     {
	if (tail->nfa_set)
	  SLfree ((char *)tail->nfa_set);
	next = tail->next;
	SLfree ((char *)tail);
	tail = next;
     }
   h->dfa = NULL;
}

/*
 * Compute the epsilon-closure of a set of NFA states: that is,
 * expand the set to its smallest superset with the property that
 * an epsilon NFA transition cannot move from a member of the set
 * to a non-member.
 */
static void compute_closure (Highlight *h, unsigned char *set) 
{
   NFA *n;
   int changed;
   
   do 
     {
	changed = 0;
	for (n=h->nfa; n; n=n->next) 
	  {
	     if (n->is_empty &&
		 is_in_set (set, n->from) &&
		 !is_in_set (set, n->to)) 
	       {
		  changed = 1;
		  add_to_set (set, n->to);
	       }
	  }
     }
   while (changed);
}

/*
 * I'm going to do this quite a bit in the next routine, so let's
 * make life easier for myself.
 */
#define get(buf) do { \
	if (!fgets((buf),sizeof((buf)),fp)) goto error; \
	(buf)[strcspn((buf), "\n")] = '\0'; \
    } while (0)

/*
 * Cache routine: try to load a DFA out of the cache.
 */
static int load_dfa (Highlight *h, char *name) 
{
   FILE *fp = NULL;
   char buffer[2048], buf2[2048];
   unsigned char equiv[UCHAR_MAX];
   int i, j;
   Accept *accept = NULL;
   int accepts;
   DFA *dfa = NULL;
   int dfa_states;
   
   if (!h->filename)
     return 0;		       /* don't have caching enabled */
   
   fp = fopen (h->filename, "r");
   if (!fp)
     return 0;		       /* can't open the file */
   
    /*
     * Check the first line: "DFA cache: %s".
     */
   get(buffer);
   sprintf(buf2, "DFA cache: %s", name);
   if (strcmp(buffer, buf2))
     goto error;		       /* not the right syntax table */
   
    /*
     * Check the next line: "equiv".
     */
   get(buffer);
   if (strcmp(buffer, "equiv"))
     goto error;
   
    /*
     * Read in the equivalence classes.
     */
   i = 0;
   while (i <= UCHAR_MAX) 
     {
	char *p, *q, *r;
	int e;
	
	get(buffer);
	
	p = q = buffer;
	while (*p && i <= UCHAR_MAX) 
	  {
	     q = p + strcspn(p, " ");
	     r = q + strspn(q, " ");
	     *q = '\0';
	     sscanf(p, "%x", &e);
	     equiv[i++] = e;
	     p = r;
	  }
     }
   
    /*
     * Read in the accepting states.
     */
   get(buffer);
   if (!sscanf(buffer, "accept %d", &accepts))
     goto error;
   accept = (Accept *) SLmalloc(sizeof(Accept) * accepts);
   if (!accept)
     goto error;
   for (i=0; i<accepts; i++) 
     {
	if (i == accepts-1)
	  accept[i].next = NULL;
	else
	  accept[i].next = accept+i+1;
	get(buffer);
	if (sscanf (buffer, "%d %d %d %d %d %d",
		    &accept[i].state, &accept[i].is_quick,
		    &accept[i].is_end, &accept[i].colour,
		    &accept[i].is_preproc, &accept[i].is_keyword) != 6)
	  goto error;
     }
   
    /*
     * Read in the DFA.
     */
   get(buffer);
   if (!sscanf(buffer, "dfa %d", &dfa_states))
     goto error;
   dfa = (DFA *) SLmalloc(sizeof(DFA) * dfa_states);
   if (!dfa)
     goto error;
   for (i=0; i<dfa_states; i++) 
     {
	int astate, estate;
	char *p, *q, *r;
	
	if (i == dfa_states-1)
	  dfa[i].next = NULL;
	else
	  dfa[i].next = dfa+i+1;
	dfa[i].accept = dfa[i].accept_end = NULL;
	get(buffer);
	if (sscanf (buffer, "%d %d %d:",
		    &dfa[i].number, &astate, &estate) != 3)
	  goto error;
	for (j=0; j<accepts; j++) 
	  {
	     if (accept[j].state == astate)
	       dfa[i].accept = accept+j;
	     if (accept[j].state == estate)
	       dfa[i].accept_end = accept+j;
	  }
	p = strchr(buffer, ':');
	if (!p)
	  goto error;
	p++;
	p += strspn(p, " ");
	for (j=0; j<=UCHAR_MAX; j++) 
	  {
	     if (equiv[j] == j) 
	       {
		  q = p + strcspn(p, " ");
		  r = q + strspn(q, " ");
		  *q = '\0';
		  astate = atoi(p);
		  if (astate >= 0)
		    dfa[i].where_to[j] = dfa+astate;
		  else
		    dfa[i].where_to[j] = NULL;
		  p = r;
	       }
	     else
	       dfa[i].where_to[j] = dfa[i].where_to[equiv[j]];
	  }
     }
   
    /*
     * If we've got here, we're actually done. So free the previous
     * list of Accept structures, set up the Highlight structure,
     * and leave.
     */
     {
	Accept *a, *b;
	a = h->accept;
	while (a) 
	  {
	     b = a;
	     a = a->next;
	     SLfree ((char *)b);
	  }
     }
   h->dfa_states = dfa_states;
   h->dfa = dfa;
   h->accept = accept;
   memcpy ((char *) h->equiv, (char *) equiv, sizeof(equiv));
   fclose (fp);
   return 1;
   
    /*
     * We only get here on error.
     */
   error:
   SLfree ((char *)dfa);
   SLfree ((char *)accept);
   if (fp != NULL) fclose(fp);
   return 0;
}
#undef get

/*
 * Cache routine: try to save a DFA into the cache, if we have
 * access rights. (We might be a non-root user and the cache might
 * be in /usr/lib/jed/lib, in which case we can't save cache files
 * ourselves but shouldn't actually moan about it.)
 *
 * The DFA cache format is as follows: the first line contains "DFA
 * cache: %s" where %s is the name of the syntax table. The next
 * line contains "equiv". Next the equivalence classes are stored:
 * 256 hex bytes, on 16 lines of 16, separated by spaces, denoting
 * for each character code in turn the lowest character code
 * equivalent to that character. Thus if all characters are
 * equivalent, 256 zeros will be stored, and if all characters are
 * distinct in behaviour, the numbers 0 through 255 will be stored
 * in order.
 *
 * The next line contains "accept %d", where %d is the number of
 * Accept structures. The Accept structures are stored next: each
 * line contains one structure. The line contains six integers,
 * separated by spaces: the members @state@, @is_quick@, @is_end@,
 * @colour@, @is_preproc@ and @is_keyword@ of the structure.
 *
 * The next line contains "dfa %d", where %d is the number of DFA
 * states. Thereafter the actual DFA states are given. Each line
 * contains one state: it begins with the number of the state, the
 * @state@ member of the Accept structure in its @accept@ field,
 * the same for its @accept_end@ field, then a colon, then the
 * numbers of each transition on a representative character,
 * separated by spaces. (A representative character is defined as
 * one which has the lowest character code in its equivalence
 * class.)
 */
static void save_dfa (Highlight *h, char *name) 
{
   FILE *fp;
   int i;
   DFA *d;
   Accept *a;
   
   if (!h->filename)
     return;			       /* don't have caching enabled */
   
   fp = fopen (h->filename, "w");
   if (!fp)
     return;			       /* didn't have access, or something */
   
   fprintf(fp, "DFA cache: %s\nequiv\n", name);
   
    /* write the equivalence classes */
   for (i=0; i<=UCHAR_MAX; i++) 
     {
	fprintf(fp, "%02X%c", h->equiv[i], ((i+1) % 16 ? ' ' : '\n'));
     }
   
    /* write the accepting states */
   for (i=0, a=h->accept; a; a=a->next, i++);
   fprintf(fp, "accept %d\n", i);
   for (a=h->accept; a; a=a->next)
     fprintf(fp, "%d %d %d %d %d %d\n",
	     a->state, a->is_quick, a->is_end,
	     a->colour, a->is_preproc, a->is_keyword);
   
    /* write the DFA itself */
   fprintf(fp, "dfa %d\n", h->dfa_states);
   
   for (d = h->dfa; d; d = d->next) 
     {
	fprintf(fp, "%d %d %d:", d->number,
		d->accept ? d->accept->state : -1,
		d->accept_end ? d->accept_end->state : -1);
	for (i=0; i<=UCHAR_MAX; i++)
	  if (h->equiv[i] == i)
	    fprintf(fp, " %d",
		    d->where_to[i] ? d->where_to[i]->number : -1);
	fprintf(fp, "\n");
     }
   fclose (fp);
}

#ifdef TEST_MODE
# include <ctype.h>
static void dump_it (Highlight *h) 
{
   NFA *n;
   DFA *df;
   Accept *a;
   int c, d;
   
   printf("Equivalence classes:\n");
   for (c=1; c<=UCHAR_MAX; c++) 
     {
	if (h->equiv[c] == c) 
	  {
	     printf("  equivalent to ");
	     if (isprint(c))
	       putchar (c);
	     else
	       printf("\\x%02X", c);
	     printf(": ");
	     for (d=c; d<=UCHAR_MAX; d++) 
	       {
		  if (h->equiv[d] == c) 
		    {
		       if (isprint(d))
			 putchar (d);
		       else
			 printf("\\x%02X", d);
		    }
	       }
	     printf("\n");
	  }
     }
   printf("  equivalent to \\x00: everything else\n");
   printf("NFA states: %d\n", h->nfa_states);
   for (n=h->nfa; n; n=n->next) 
     {
	printf("  transition %3d -> %3d on ", n->from, n->to);
	if (n->is_empty)
	  printf("epsilon");
	else
	  for (c=0; c<=UCHAR_MAX; c++)
	    if (h->equiv[c] == c)
	      if (is_in_set (n->set, c)) 
	  {
	     if (isprint(c))
	       putchar (c);
	     else
	       printf("\\x%02X", c);
	  }
	printf("\n");
     }
   for (a=h->accept; a; a=a->next)
     printf("Accepting state: %3d%s\n",
	    a->state, (a->is_end ? " (end)" : ""));
   
   printf("DFA states:\n");
   for (df=h->dfa; df; df=df->next) 
     {
	printf("  %d is [", df->number);
	for (c=0; c<h->nfa_states; c++)
	  if (is_in_set (df->nfa_set, c))
	    printf(" %d", c);
	printf(" ]");
	for (c=0; c<=UCHAR_MAX; c++)
	  if (h->equiv[c] == c) 
	  {
	     printf(", to %d on ",
		    df->where_to[c] ? df->where_to[c]->number : -1);
	     if (isprint(c))
	       putchar (c);
	     else
	       printf("\\x%02X", c);
	  }
	printf("\n");
	printf("    accepting: %d, e=%d\n",
	       df->accept ? df->accept->state : -1,
	       df->accept_end ? df->accept_end->state : -1);
     }
}
#endif				       /* TEST_MODE */


static int dfa_try_keyword (register unsigned short *q, int n,
			    register char *t, int case_sense)
{
   unsigned short *p;
   int m, result;
   
   if (!t)
     return 0;
   
   while (*t) 
     {
	p = q;
	m = n;
	result = 0;
	while (m--) 
	  {
	     unsigned char star_p = *p++;
	     unsigned char star_t = *t++;
	     if ((!case_sense) && (!iskanji(star_p)))
	       star_p = (unsigned char) LOWER_CASE (star_p);
	     
	     result = star_p - star_t;
	     if (result)
	       break;
	     if(iskanji(star_p))
	       {
		  m--;
		  star_p = *p++;
		  star_t = *t++;
		  if (result = star_p - star_t)
		    break;
	       }
	  }
	t += m;
	
	if (result < 0)
	  return 0;
	else if (result == 0)
	  return 1;
     }
   return 0;
}


static void dfa_syntax_highlight (register unsigned short *p,
				  register unsigned short *pmax,
				  Syntax_Table_Type *st)
{
   Highlight *h = st->hilite;
   DFA *d;
   unsigned short *q;
   unsigned short *last_acc_pos;
   Accept *accept, *last_accept;
   int preproc = -1, colour, i;
   int case_sense;
   
   if (st == NULL)
     return;
   
   case_sense = !(st->flags & SYNTAX_NOT_CASE_SENSITIVE);
   
   d = h->dfa->next;		       /* the first time, start at state 1 */
   while (p < pmax) 
     {
	q = p;
	last_accept = NULL;
	last_acc_pos = NULL;
	while (q < pmax) 
	  {
	     d = d->where_to[*q];
	     accept = (!d ? NULL : q==pmax ? d->accept_end : d->accept);
	     
	     if (iskanji(*q)) q++;
	     q++;
	     
	     if (accept) 
	       {
		/*
		 * We have hit an accepting state: record it.
		 */
		  last_accept = accept;
		  last_acc_pos = q;
		  if (last_accept->is_quick)
		    break;
	       }
	     if (!d)
	       break;		       /* error state */
	  }
	if (last_accept) 
	  {
	     colour = last_accept->colour;
	     if (last_accept->is_keyword) 
	       {
		  int n = last_acc_pos - p;
		  if (n>=1 && n<=MAX_KEYWORD_LEN)
		    for (i=0; i<MAX_KEYWORD_TABLES; i++)
		      if (dfa_try_keyword (p, n, st->keywords[i][n-1],
					   case_sense)) 
		    {
		       colour = (JKEY_COLOR+i) << 8;
		       break;
		    }
	       }
	     if (preproc != -1 && !last_accept->is_preproc &&
		 colour != (JCOM_COLOR << 8))
	       colour = preproc;
	     while (p < last_acc_pos) *p++ |= colour;
	     if (last_accept->is_preproc)
	       preproc = last_accept->colour;
	  }
	else 
	  {
	     if (preproc != -1)
	       while (p < q) *p++ |= preproc;
	     else
	       p = q;
	  }
	d = h->dfa;
     }
}


void define_highlight_rule (char *rule, char *colour, char *name) 
{
   Syntax_Table_Type *table;
   int flags = 0;
   int i;
   
   table = find_syntax_table (name, 1);
   if (table == NULL)
     return;
   
   if (!table->hilite)
     table->hilite = init_highlight();
   
   if (table->hilite->dfa)
     return;			       /* don't need to */
   
   while (*colour == 'K' || *colour == 'P' || *colour == 'Q') 
     {
	switch (*colour) 
	  {
	   case 'K':
	     flags |= FLAG_KEYWORD;
	     break;
	   case 'Q':
	     flags |= FLAG_QUICK;
	     break;
	   case 'P':
	     flags |= FLAG_PREPROC;
	     break;
	  }
	colour++;
     }
   
   if ( (i = map_color_object_to_number (colour)) != -1)
     add_rule (table->hilite, rule, strlen(rule), flags, i << 8);
}

void build_highlight_table (char *name) 
{
   Syntax_Table_Type *table;
   
   table = find_syntax_table (name, 1);
   
   if ((table != NULL) && table->hilite && !table->hilite->dfa)
     {
	flush_message ("Creating DFA syntax table...");
	make_dfa (table->hilite);
	if (table->hilite->filename)
	  save_dfa (table->hilite, name);
     }
}

static char *Dfa_Cache_Dir;

void jed_set_dfa_cache_dir (void)
{
   char *str;
   
   if (SLpop_string (&str))
     return;
   
   SLfree (Dfa_Cache_Dir);
   Dfa_Cache_Dir = str;
}


void enable_highlight_cache (char *file, char *name) 
{
   Syntax_Table_Type *table;
   char fname[1024];
   
   table = find_syntax_table (name, 1);
   
   if (!table)
     return;
   
   if (!table->hilite)
     table->hilite = init_highlight();
   
   /* This needs to be modified to search though a list of user defined
    * directories.
    */
   
   *fname = 0;
   if (Dfa_Cache_Dir != NULL)
     {
	safe_strcpy (fname, Dfa_Cache_Dir, sizeof(fname));
	fixup_dir (fname);
     }
   
   safe_strcat (fname, file, sizeof(fname));
   
   if (table->hilite->filename != NULL)
     SLfree (table->hilite->filename);
   
   table->hilite->filename = SLmake_string (fname);
   
   if (!table->hilite->dfa)
     load_dfa (table->hilite, name);
}

