Logo Search packages:      
Sourcecode: jed version File versions

dfasyntx.c

/*
 * DFA-based syntax highlighting for Jed.
 */

/* This file is included by syntax.c */

#define USE_DFA_CACHE   1
/*
 * The minimum number of "unsigned char"s we need to store at least
 * UCHAR_MAX+1 bits.
 */
#define SET_SIZE ((UCHAR_MAX+CHAR_BIT) / CHAR_BIT)

typedef struct NFA NFA;
typedef struct Accept Accept;
typedef struct DFA DFA;
typedef struct Highlight Highlight;

/*
 * Set of characters. Note that this will not be the only kind of
 * set manipulated by the macros below - while constructing the DFA
 * table we will need to deal with sets of NFA states as well.
 */
typedef unsigned char Set[SET_SIZE];

/*
 * Macros for set manipulation. Beware - these macros are unsafe
 * with respect to side effects!
 */
#define empty_set(s,size) memset ((s), (unsigned char) 0, size)
#define fill_set(s,size) memset ((s), (unsigned char) ~0, size)
#define add_to_set(s,c) ((s)[(c)/CHAR_BIT] |= 1 << (c)%CHAR_BIT)
#define take_from_set(s,c) ((s)[(c)/CHAR_BIT] &= ~(1 << (c)%CHAR_BIT))
#define is_in_set(s,c) ((s)[(c)/CHAR_BIT] & (1 << (c)%CHAR_BIT))

struct NFA 
{
   NFA *next;
   int from, to;
   int is_empty;
   Set set;
};

struct Accept 
{
   Accept *next;
   int state;
   int is_quick, is_end;
   int colour, is_preproc, is_keyword;
};

struct DFA 
{
   DFA *next;
   int number;
   unsigned char *nfa_set;           /* the corresp. set of NFA states */
   Accept *accept, *accept_end;
   DFA *where_to[UCHAR_MAX+1];
};

/*
 * This structure contains all the information for syntax
 * highlighting a given language. We have an NFA, generated by
 * parsing regular expressions, stored as a list of transitions. We
 * also partition the set of "unsigned char" values into
 * equivalence classes, accomplished by storing, in equiv[c], the
 * lowest value in the same equivalence class as c. "accept" is a
 * list of accepting states in the NFA, together with their colours
 * and properties, and "dfa" is the actual DFA table.
 * 
 * DFA tables can take some time to generate - over a second on my
 * 486 for C mode - and so instead of generating them every time
 * Jed needs them, I support a caching option.
 */
struct Highlight 
{
   int nfa_states, dfa_states;
   NFA *nfa;
   unsigned char equiv[UCHAR_MAX+1];
   Accept *accept;
   DFA *dfa;
   char *filename;
};

/*
 * Local prototypes.
 */
static int parse_regexp (Highlight *, char **, int *, int *, int *,
                   int *, int *);
static int parse_reg1 (Highlight *, char **, int *, int *, int *);
static int parse_reg2 (Highlight *, char **, int *, int *, int *);
static int parse_reg3 (Highlight *, char **, int *, int *, int *);
static int parse_reg4 (Highlight *, char **, int *, int *, int *);
static void add_nfa_trans (Highlight *, int , int , Set);
static int get_lexeme (char **, int *);
static void eat_lexeme (char **, int *);
static void compute_closure (Highlight *, unsigned char *);

/*
 * Parser error codes.
 */
#define ERR_EOT_EXPECTED 1
#define ERR_RPAREN_EXPECTED 2
#define ERR_EMPTY_SET 3
#define ERR_METACHAR_INVALID 4

static Highlight *init_highlight (void) 
{
   Highlight *result;
   
   result = (Highlight *) SLmalloc (sizeof (Highlight));
   if (result != NULL)
     {
      memset ((char *) result, 0, sizeof (Highlight));
      result->nfa_states = 2;        /* 0 and 1 are "special" */
     }
   return result;
}

#define FLAG_QUICK    1
#define FLAG_KEYWORD  2
#define FLAG_PREPROC  4
static void add_rule (Highlight *h, char *rule, int length,
                  int flags, int colour) 
{
   int start = 0, end = 0;
   int is_begin, is_end;
   Accept *acc;
   int old_states;
   NFA *old_nfa;
   int err;
   
   old_states = h->nfa_states;
   old_nfa = h->nfa;
   err = parse_regexp (h, &rule, &length, &start, &end, &is_begin, &is_end);
   if (!err) 
     {
      /*
       * We have a partial NFA gained from parsing the rule. Add
       * an empty-string transition from the correct initial
       * state (0 normally, 1 for a rule that must start at the
       * beginning of a line), and flag the final state as
       * accepting.
       */
      add_nfa_trans (h, is_begin ? 1 : 0, start, NULL);
      acc = (Accept *) SLmalloc (sizeof(Accept));
      if (acc == NULL)
        goto error;                  /* grotty, I know, but... */
      acc->next = h->accept;
      h->accept = acc;
      acc->state = end;
      acc->is_quick = ((flags & FLAG_QUICK) ? 1 : 0);
      acc->is_preproc = ((flags & FLAG_PREPROC) ? 1 : 0);
      acc->is_keyword = ((flags & FLAG_KEYWORD) ? 1 : 0);
      acc->colour = colour;
      acc->is_end = is_end;
     }
   else 
     {
      /*
       * An error occurred while doing the parse. We must remove
       * all the NFA states and transitions that the partial
       * parse may have added.
       */
      NFA *n;
      
      error:                         /* we may get here from above! */
      
      h->nfa_states = old_states;
      for (n=h->nfa; n && n!=old_nfa ;) 
        {
           NFA *temp = n;
           n = n->next;
           SLfree ((char *)temp);
        }
      h->nfa = n;
     }
}

/*
 * Call this to parse a regular expression.
 * The grammar is:
 *   regexp : [^] reg1 [$]
 *   reg1   : reg2 [| reg2 [| reg2...]]
 *   reg2   : reg3 [reg3 [reg3...]]
 *   reg3   : reg4 [*, +, ?]
 *   reg4   : char
 *          | char-set
 *          | .
 *          | ( reg1 )
 * 
 * Lexemes are: all chars (0 to 255), caret, dollar, vbar, star,
 * plus, query, lbracket, rbracket, dash, dot, lparen and rparen.
 */

#define LEX_CARET (UCHAR_MAX+1+(unsigned char)'^')
#define LEX_DOLLAR (UCHAR_MAX+1+(unsigned char)'$')
#define LEX_VBAR (UCHAR_MAX+1+(unsigned char)'|')
#define LEX_STAR (UCHAR_MAX+1+(unsigned char)'*')
#define LEX_PLUS (UCHAR_MAX+1+(unsigned char)'+')
#define LEX_QUERY (UCHAR_MAX+1+(unsigned char)'?')
#define LEX_LBRACKET (UCHAR_MAX+1+(unsigned char)'[')
#define LEX_RBRACKET (UCHAR_MAX+1+(unsigned char)']')
#define LEX_DASH (UCHAR_MAX+1+(unsigned char)'-')
#define LEX_DOT (UCHAR_MAX+1+(unsigned char)'.')
#define LEX_LPAREN (UCHAR_MAX+1+(unsigned char)'(')
#define LEX_RPAREN (UCHAR_MAX+1+(unsigned char)')')
#define LEX_ENDOFTEXT (2*UCHAR_MAX+2)
#define is_normal_char(lex) ((lex)<=UCHAR_MAX)

static int parse_regexp (Highlight *h, char **text, int *length,
                   int *start, int *end, int *is_begin, int *is_end) 
{
   int err;
   
   if (get_lexeme(text, length) == LEX_CARET) 
     {
      *is_begin = 1;
      eat_lexeme (text, length);
     }
   else
     *is_begin = 0;
   
   if ( (err = parse_reg1 (h, text, length, start, end)) != 0)
     return err;
   
   if (get_lexeme(text, length) == LEX_DOLLAR) 
     {
      *is_end = 1;
      eat_lexeme (text, length);
     }
   else
     *is_end = 0;
   
   if (get_lexeme(text, length) != LEX_ENDOFTEXT)
     return ERR_EOT_EXPECTED;
   
   return 0;
}

static int parse_reg1 (Highlight *h, char **text, int *length,
                   int *start, int *end) 
{
   int err;
   
   if ( (err = parse_reg2 (h, text, length, start, end)) != 0)
     return err;
   
   while (get_lexeme(text, length) == LEX_VBAR) 
     {
      eat_lexeme (text, length);
      if ( (err = parse_reg2 (h, text, length, start, end)) != 0)
        return err;
     }
   
   return 0;
}

static int parse_reg2 (Highlight *h, char **text, int *length,
                   int *start, int *end) 
{
   int en = 0;
   int lex, err;
   
   if ( (err = parse_reg3 (h, text, length, start, &en)) != 0)
     return err;
   
   while ((lex=get_lexeme(text, length)) == LEX_LPAREN ||
        lex == LEX_LBRACKET || lex == LEX_DASH ||
        lex == LEX_DOT || is_normal_char(lex)) 
     {
      int st = en;
      en = 0;
      if ( (err = parse_reg3 (h, text, length, &st, &en)) != 0)
        return err;
     }
   
   if (*end)
     add_nfa_trans (h, en, *end, NULL);
   else
     *end = en;
   return 0;
}

static int parse_reg3 (Highlight *h, char **text, int *length,
                   int *start, int *end) 
{
   int lex, err;
   int st = 0, en = 0;
   
   if ( (err = parse_reg4 (h, text, length, &st, &en)) != 0)
     return err;
   
   lex = get_lexeme (text, length);
   switch (lex) 
     {
      case LEX_STAR:
      eat_lexeme (text, length);
      add_nfa_trans (h, st, en, NULL);
      add_nfa_trans (h, en, st, NULL);
      break;
      case LEX_PLUS:
      eat_lexeme (text, length);
      add_nfa_trans (h, en, st, NULL);
      break;
      case LEX_QUERY:
      eat_lexeme (text, length);
      add_nfa_trans (h, st, en, NULL);
      break;
     }
   
   if (!*end)
     *end = h->nfa_states++;
   add_nfa_trans (h, en, *end, NULL);
   
   if (!*start)
     *start = h->nfa_states++;
   add_nfa_trans (h, *start, st, NULL);
   
   return 0;
}

static int parse_reg4 (Highlight *h, char **text, int *length,
                   int *start, int *end) 
{
   int lex, err;
   Set set;
   
   lex = get_lexeme (text, length);
   
   if (lex == LEX_LPAREN) 
     {
      eat_lexeme (text, length);
      if ( (err = parse_reg1(h, text, length, start, end)) != 0)
        return err;
      if (get_lexeme (text, length) != LEX_RPAREN)
        return ERR_RPAREN_EXPECTED;
      eat_lexeme (text, length);
      return 0;
     }
   
    /*
     * OK, all other possibilities now involve a character set. So
     * create one.
     */
   empty_set ((char *)set, SET_SIZE);
   
   if (lex == LEX_LBRACKET) 
     {
      int complement = 0;
      eat_lexeme (text, length);
      if (get_lexeme (text, length) == LEX_CARET) 
        {
           eat_lexeme (text, length);
           complement = 1;
           fill_set ((char *)set, SET_SIZE);
        }
      if (get_lexeme (text, length) == LEX_RBRACKET)
        return ERR_EMPTY_SET;      /* empty set?! */
      while ( (lex = get_lexeme (text, length)) != LEX_RBRACKET) 
        {
           int one_end, other_end;
           int bottom, top;
           
           if (!is_normal_char(lex))
             return ERR_METACHAR_INVALID;
           
           one_end = lex;
           eat_lexeme (text, length);
           
           if ( (lex = get_lexeme (text, length)) == LEX_DASH) 
             {
              eat_lexeme (text, length);
              lex = get_lexeme (text, length);
              if (!is_normal_char(lex))
                return ERR_METACHAR_INVALID;
              other_end = lex;
             }
           else
             other_end = one_end;
           
           if (other_end < one_end)
             bottom = other_end, top = one_end;
           else
             bottom = one_end, top = other_end;
           
           while (bottom <= top) 
             {
              if (complement)
                take_from_set (set, bottom);
              else
                add_to_set (set, bottom);
              bottom++;
             }
        }
      eat_lexeme (text, length);
     }
   else if (is_normal_char (lex) || lex == LEX_DASH) 
     {
      eat_lexeme (text, length);
      if (lex == LEX_DASH)
        lex = (unsigned char) '-';
      add_to_set (set, lex);
     }
   else if (lex == LEX_DOT) 
     {
      eat_lexeme (text, length);
      fill_set ((char *)set, SET_SIZE);
     }
   else
     return ERR_METACHAR_INVALID;
   
    /*
     * We've got a character set: devise an NFA transition on it.
     */
   if (!*start)
     *start = h->nfa_states++;
   if (!*end)
     *end = h->nfa_states++;
   add_nfa_trans (h, *start, *end, set);
   
   return 0;
}

/*
 * Add an NFA transition.
 */
static void add_nfa_trans (Highlight *h, int from, int to, Set set) 
{
   NFA *trans;
   int i;
   unsigned char other[UCHAR_MAX+1], in[UCHAR_MAX+1];
   
   trans = (NFA *) SLmalloc(sizeof(*trans));
   if (trans) 
     {
      trans->next = h->nfa;
      h->nfa = trans;
      if (set) 
        {
           memcpy ((char *) trans->set, (char *)set, sizeof(Set));
           trans->is_empty = 0;
        }
      else
        trans->is_empty = 1;
      trans->from = from;
      trans->to = to;
     }
   
    /*
     * Adjust the equivalence classes: no equivalence class should
     * contain members both inside and outside the given set.
     */
   if (set != NULL) 
     {
      for (i=0; i<=UCHAR_MAX; i++) 
        {
           int j = h->equiv[i];
           if (j == i) 
             {
              other[i] = i;          /* no "other" class defined yet */
              in[i] = is_in_set (set, i);
             }
           else if ( (!is_in_set (set, i)) ^ (!in[j]) ) 
             {
              if (other[j] == j)
                other[j] = i;
              h->equiv[i] = other[j];
             }
        }
     }
}

/*
 * Parse the next lexeme out of the expression.
 */
static int get_lexeme (char **text, int *length)
{
   char *t;
   if (!*length)
     return LEX_ENDOFTEXT;
   
   t = *text;
   switch (*t)
     {
      case '^': return LEX_CARET;
      case '$': return LEX_DOLLAR;
      case '|': return LEX_VBAR;
      case '*': return LEX_STAR;
      case '+': return LEX_PLUS;
      case '?': return LEX_QUERY;
      case '[': return LEX_LBRACKET;
      case ']': return LEX_RBRACKET;
      case '-': return LEX_DASH;
      case '.': return LEX_DOT;
      case '(': return LEX_LPAREN;
      case ')': return LEX_RPAREN;
      case '\\':
      if (*length == 1)
        return '\\';
      t++;
      /* drop */
      default:
      return (int) ((unsigned char) *t);
     }
}

/*
 * Advance the text pointer past the lexeme given.
 */
static void eat_lexeme (char **text, int *length) 
{
   if (!*length)
     return;
   
   if (**text == '\\' && *length > 1)
     (*text) += 2, (*length) -= 2;
   else
     (*text) += 1, (*length) -= 1;
}

static void make_dfa (Highlight *h) 
{
   int setsize;
   DFA *tail, *next, *search;
   unsigned char *destination;             /* a working set */
   int c, dest_empty;
   NFA *n;
   Accept *a;
   
    /*
     * First calculate the size of array we will need to store a
     * set of NFA states. Allocate our temporary set variables.
     */
   setsize = (h->nfa_states + CHAR_BIT - 1) / CHAR_BIT;
   destination = (unsigned char *)SLmalloc(setsize);
   if (!destination)
     return;
   
    /*
     * Now define our first DFA state. This is the epsilon-closure
     * of NFA state zero, and is the normal start state for the DFA.
     */
   h->dfa_states = 0;
   
   h->dfa = tail = (DFA *) SLmalloc(sizeof(DFA));
   if (tail == NULL)
     goto error;
   
   memset ((char *) tail, 0, sizeof (DFA));
   
   if (NULL == (tail->nfa_set = (unsigned char *)SLmalloc(setsize)))
     goto error;
   
   /* tail->next = NULL;  memset has done this */
   
   tail->number = h->dfa_states++;
   empty_set ((char *) tail->nfa_set, setsize);
   add_to_set (tail->nfa_set, 0);
   compute_closure (h, tail->nfa_set);
   
    /*
     * Our second DFA state is the epsilon-closure of NFA states
     * zero and one, and is the start state for the DFA when we are
     * at the beginning of a line.
     */
   if (NULL == (tail->next = (DFA *) SLmalloc(sizeof(DFA))))
     goto error;
   tail = tail->next;
   memset ((char *) tail, 0, sizeof (DFA));
   
   if (NULL == (tail->nfa_set = (unsigned char *) SLmalloc(setsize)))
     goto error;
   
   /* tail->next = NULL; */
   
   tail->number = h->dfa_states++;
   empty_set ((char *) tail->nfa_set, setsize);
   add_to_set (tail->nfa_set, 0);
   add_to_set (tail->nfa_set, 1);
   compute_closure (h, tail->nfa_set);
   
    /*
     * Next, work along the DFA processing each state in turn.
     */
   for (next = h->dfa; next; next=next->next) 
     {
      /*
       * We have a state from which we wish to compute all the
       * transitions. Of course we need only consider transitions
       * on a representative member of each equivalence class.
       */
      for (c = 0; c <= UCHAR_MAX; c++) 
        {
           if (h->equiv[c] == c) 
             {
              empty_set ((char *)destination, setsize);
              dest_empty = 1;
              
              for (n = h->nfa; n; n = n->next) 
                {
                   if (is_in_set (next->nfa_set, n->from) &&
                     !n->is_empty && is_in_set (n->set, c)) 
                   {
                      add_to_set (destination, n->to);
                      dest_empty = 0;
                   }
                }
              compute_closure (h, destination);
              
              if (dest_empty)
                search = NULL;
              else 
                {
                   for (search = h->dfa; search; search = search->next)
                   if (!memcmp((char *) search->nfa_set, (char *) destination, setsize))
                     break;
                   
                   if (!search) 
                   {
                      if (NULL == (search = tail->next = (DFA *) SLmalloc(sizeof(DFA))))
                        goto error;
                      tail = tail->next;
                      if (NULL == (tail->nfa_set = (unsigned char *)SLmalloc(setsize)))
                        goto error;
                      tail->next = NULL;
                      tail->number = h->dfa_states++;
                      memcpy ((char *) tail->nfa_set, (char *)destination, setsize);
                   }
                }
              
              next->where_to[c] = search;
             }
           else
             next->where_to[c] = next->where_to[h->equiv[c]];
        }
      
      /*
       * Having done the transitions for the state, let us also
       * check its acceptance properties: we need to know if it
       * contains any accepting NFA-states, and whether they are
       * constrained to be accepting only at the beginning or end
       * of the line.
       */
      next->accept = next->accept_end = NULL;
      for (a = h->accept; a; a = a->next) 
        {
           if (is_in_set (next->nfa_set, a->state)) 
             {
              next->accept_end = a;
              if (!a->is_end)
                next->accept = a;
             }
        }
     }
   
    /*
     * Free the temporary variables.
     */
   SLfree ((char *)destination);
   return;
   
    /*
     * Get here if a malloc returned null; clean up the mess.
     */
   error:
   for (tail = h->dfa; tail; tail=tail->next) 
     {
      if (tail->nfa_set)
        SLfree ((char *)tail->nfa_set);
      next = tail->next;
      SLfree ((char *)tail);
      tail = next;
     }
   h->dfa = NULL;
}

/*
 * Compute the epsilon-closure of a set of NFA states: that is,
 * expand the set to its smallest superset with the property that
 * an epsilon NFA transition cannot move from a member of the set
 * to a non-member.
 */
static void compute_closure (Highlight *h, unsigned char *set) 
{
   NFA *n;
   int changed;
   
   do 
     {
      changed = 0;
      for (n=h->nfa; n; n=n->next) 
        {
           if (n->is_empty &&
             is_in_set (set, n->from) &&
             !is_in_set (set, n->to)) 
             {
              changed = 1;
              add_to_set (set, n->to);
             }
        }
     }
   while (changed);
}

#if USE_DFA_CACHE


/*
 * I'm going to do this quite a bit in the next routine, so let's
 * make life easier for myself.
 */
#define get(buf) do { \
      if (!fgets((buf),sizeof((buf)),fp)) goto error; \
      (buf)[strcspn((buf), "\n")] = '\0'; \
    } while (0)

/*
 * Cache routine: try to load a DFA out of the cache.
 */

/* FIXME!!!  This routine allocates arrays of Accept, DFA, etc structures
 * and turns them into linked lists.  However, elsewhere each node in the
 * list is allocated.
 */
static int load_dfa (Highlight *h, char *name) 
{
   FILE *fp;
   char buffer[2048], buf2[2048];
   unsigned char equiv[UCHAR_MAX];
   int i, j;
   Accept *accept = NULL;
   int accepts;
   DFA *dfa = NULL;
   int dfa_states;
   
   if (h->filename == NULL)
     return 0;                 /* don't have caching enabled */
   
   fp = fopen (h->filename, "r");
   if (fp == NULL)
     return 0;                 /* can't open the file */
   
    /*
     * Check the first line: "DFA cache: %s".
     */
   get(buffer);
   _SLsnprintf (buf2, sizeof (buf2), "DFA cache: %s", name);
   if (strcmp(buffer, buf2))
     goto error;               /* not the right syntax table */
   
    /*
     * Check the next line: "equiv".
     */
   get(buffer);
   if (strcmp(buffer, "equiv"))
     goto error;
   
    /*
     * Read in the equivalence classes.
     */
   i = 0;
   while (i <= UCHAR_MAX) 
     {
      char *p, *q, *r;
      int e;
      
      get(buffer);
      
      p = q = buffer;
      while (*p && i <= UCHAR_MAX) 
        {
           q = p + strcspn(p, " ");
           r = q + strspn(q, " ");
           *q = '\0';
           sscanf(p, "%x", &e);
           equiv[i++] = e;
           p = r;
        }
     }
   
    /*
     * Read in the accepting states.
     */
   get(buffer);
   if (!sscanf(buffer, "accept %d", &accepts))
     goto error;
   accept = (Accept *) SLmalloc(sizeof(Accept) * accepts);
   if (!accept)
     goto error;
   for (i=0; i<accepts; i++) 
     {
      if (i == accepts-1)
        accept[i].next = NULL;
      else
        accept[i].next = accept+i+1;
      get(buffer);
      if (sscanf (buffer, "%d %d %d %d %d %d",
                &accept[i].state, &accept[i].is_quick,
                &accept[i].is_end, &accept[i].colour,
                &accept[i].is_preproc, &accept[i].is_keyword) != 6)
        goto error;
     }
   
    /*
     * Read in the DFA.
     */
   get(buffer);
   if (!sscanf(buffer, "dfa %d", &dfa_states))
     goto error;
   dfa = (DFA *) SLmalloc(sizeof(DFA) * dfa_states);
   if (!dfa)
     goto error;
   for (i=0; i<dfa_states; i++) 
     {
      int astate, estate;
      char *p, *q, *r;
      
      if (i == dfa_states-1)
        dfa[i].next = NULL;
      else
        dfa[i].next = dfa+i+1;
      dfa[i].accept = dfa[i].accept_end = NULL;
      get(buffer);
      if (sscanf (buffer, "%d %d %d:",
                &dfa[i].number, &astate, &estate) != 3)
        goto error;
      for (j=0; j<accepts; j++) 
        {
           if (accept[j].state == astate)
             dfa[i].accept = accept+j;
           if (accept[j].state == estate)
             dfa[i].accept_end = accept+j;
        }
      p = strchr(buffer, ':');
      if (!p)
        goto error;
      p++;
      p += strspn(p, " ");
      for (j=0; j<=UCHAR_MAX; j++) 
        {
           if (equiv[j] == j) 
             {
              q = p + strcspn(p, " ");
              r = q + strspn(q, " ");
              *q = '\0';
              astate = atoi(p);
              if (astate >= 0)
                dfa[i].where_to[j] = dfa+astate;
              else
                dfa[i].where_to[j] = NULL;
              p = r;
             }
           else
             dfa[i].where_to[j] = dfa[i].where_to[equiv[j]];
        }
     }
   
    /*
     * If we've got here, we're actually done. So free the previous
     * list of Accept structures, set up the Highlight structure,
     * and leave.
     */
     {
      Accept *a, *b;
      a = h->accept;
      while (a) 
        {
           b = a;
           a = a->next;
           SLfree ((char *)b);
        }
     }
   h->dfa_states = dfa_states;
   h->dfa = dfa;
   h->accept = accept;
   memcpy ((char *) h->equiv, (char *) equiv, sizeof(equiv));
   fclose (fp);
   return 1;
   
    /*
     * We only get here on error.
     */
   error:
   SLfree ((char *)dfa);
   SLfree ((char *)accept);
   if (fp != NULL) fclose(fp);
   return 0;
}
#undef get
#endif                               /* USE_DFA_CACHE */

/*
 * Cache routine: try to save a DFA into the cache, if we have
 * access rights. (We might be a non-root user and the cache might
 * be in /usr/lib/jed/lib, in which case we can't save cache files
 * ourselves but shouldn't actually moan about it.)
 *
 * The DFA cache format is as follows: the first line contains "DFA
 * cache: %s" where %s is the name of the syntax table. The next
 * line contains "equiv". Next the equivalence classes are stored:
 * 256 hex bytes, on 16 lines of 16, separated by spaces, denoting
 * for each character code in turn the lowest character code
 * equivalent to that character. Thus if all characters are
 * equivalent, 256 zeros will be stored, and if all characters are
 * distinct in behaviour, the numbers 0 through 255 will be stored
 * in order.
 *
 * The next line contains "accept %d", where %d is the number of
 * Accept structures. The Accept structures are stored next: each
 * line contains one structure. The line contains six integers,
 * separated by spaces: the members @state@, @is_quick@, @is_end@,
 * @colour@, @is_preproc@ and @is_keyword@ of the structure.
 *
 * The next line contains "dfa %d", where %d is the number of DFA
 * states. Thereafter the actual DFA states are given. Each line
 * contains one state: it begins with the number of the state, the
 * @state@ member of the Accept structure in its @accept@ field,
 * the same for its @accept_end@ field, then a colon, then the
 * numbers of each transition on a representative character,
 * separated by spaces. (A representative character is defined as
 * one which has the lowest character code in its equivalence
 * class.)
 */
#if USE_DFA_CACHE
static void save_dfa (Highlight *h, char *name) 
{
   FILE *fp;
   int i;
   DFA *d;
   Accept *a;
   
   if (!h->filename)
     return;                         /* don't have caching enabled */
   
   fp = fopen (h->filename, "w");
   if (!fp)
     return;                         /* didn't have access, or something */

   /* FIXME!!! Check the return values from the fprintf functions.  Upon
    * failure, remove the file.
    */
   fprintf(fp, "DFA cache: %s\nequiv\n", name);
   
    /* write the equivalence classes */
   for (i=0; i<=UCHAR_MAX; i++) 
     {
      fprintf(fp, "%02X%c", h->equiv[i], ((i+1) % 16 ? ' ' : '\n'));
     }
   
    /* write the accepting states */
   for (i=0, a=h->accept; a; a=a->next, i++);
   fprintf(fp, "accept %d\n", i);
   for (a=h->accept; a; a=a->next)
     fprintf(fp, "%d %d %d %d %d %d\n",
           a->state, a->is_quick, a->is_end,
           a->colour, a->is_preproc, a->is_keyword);
   
    /* write the DFA itself */
   fprintf(fp, "dfa %d\n", h->dfa_states);
   
   for (d = h->dfa; d; d = d->next) 
     {
      fprintf(fp, "%d %d %d:", d->number,
            d->accept ? d->accept->state : -1,
            d->accept_end ? d->accept_end->state : -1);
      for (i=0; i<=UCHAR_MAX; i++)
        if (h->equiv[i] == i)
          fprintf(fp, " %d",
                d->where_to[i] ? d->where_to[i]->number : -1);
      fprintf(fp, "\n");
     }
   fclose (fp);
}
#endif                               /* USE_DFA_CACHE */

#ifdef TEST_MODE
# include <ctype.h>
static void dump_it (Highlight *h) 
{
   NFA *n;
   DFA *df;
   Accept *a;
   int c, d;
   
   printf("Equivalence classes:\n");
   for (c=1; c<=UCHAR_MAX; c++) 
     {
      if (h->equiv[c] == c) 
        {
           printf("  equivalent to ");
           if (isprint(c))
             putchar (c);
           else
             printf("\\x%02X", c);
           printf(": ");
           for (d=c; d<=UCHAR_MAX; d++) 
             {
              if (h->equiv[d] == c) 
                {
                   if (isprint(d))
                   putchar (d);
                   else
                   printf("\\x%02X", d);
                }
             }
           printf("\n");
        }
     }
   printf("  equivalent to \\x00: everything else\n");
   printf("NFA states: %d\n", h->nfa_states);
   for (n=h->nfa; n; n=n->next) 
     {
      printf("  transition %3d -> %3d on ", n->from, n->to);
      if (n->is_empty)
        printf("epsilon");
      else
        for (c=0; c<=UCHAR_MAX; c++)
          if (h->equiv[c] == c)
            if (is_in_set (n->set, c)) 
        {
           if (isprint(c))
             putchar (c);
           else
             printf("\\x%02X", c);
        }
      printf("\n");
     }
   for (a=h->accept; a; a=a->next)
     printf("Accepting state: %3d%s\n",
          a->state, (a->is_end ? " (end)" : ""));
   
   printf("DFA states:\n");
   for (df=h->dfa; df; df=df->next) 
     {
      printf("  %d is [", df->number);
      for (c=0; c<h->nfa_states; c++)
        if (is_in_set (df->nfa_set, c))
          printf(" %d", c);
      printf(" ]");
      for (c=0; c<=UCHAR_MAX; c++)
        if (h->equiv[c] == c) 
        {
           printf(", to %d on ",
                df->where_to[c] ? df->where_to[c]->number : -1);
           if (isprint(c))
             putchar (c);
           else
             printf("\\x%02X", c);
        }
      printf("\n");
      printf("    accepting: %d, e=%d\n",
             df->accept ? df->accept->state : -1,
             df->accept_end ? df->accept_end->state : -1);
     }
}
#endif                               /* TEST_MODE */


static int dfa_try_keyword (register unsigned char *q, int n,
                      register char *t, int case_sense)
{
   unsigned char *p;
   int m, result;
   
   if (!t)
     return 0;
   
   while (*t) 
     {
      p = q;
      m = n;
      result = 0;
      while (m--) 
        {
           unsigned char star_p = *p++;
           unsigned char star_t = *t++;
           if (!case_sense)
             star_p = (unsigned char) LOWER_CASE (star_p);
           
           result = star_p - star_t;
           if (result)
             break;
        }
      t += m;
      
      if (result < 0)
        return 0;
      else if (result == 0)
        return 1;
     }
   return 0;
}


static void dfa_syntax_highlight (register unsigned char *p,
                          register unsigned char *pmax,
                          Syntax_Table_Type *st)
{
   Highlight *h = st->hilite;
   DFA *d;
   unsigned char *q;
   unsigned char *last_acc_pos;
   Accept *accept, *last_accept;
   int preproc = -1, colour, i;
   int case_sense;
   
   if (st == NULL)
     return;
   
   case_sense = !(st->flags & SYNTAX_NOT_CASE_SENSITIVE);
   
   d = h->dfa->next;                 /* the first time, start at state 1 */
   while (p < pmax) 
     {
      q = p;
      last_accept = NULL;
      last_acc_pos = NULL;
      while (q < pmax) 
        {
           d = d->where_to[*q++];
           accept = (!d ? NULL : q==pmax ? d->accept_end : d->accept);
           
           if (accept) 
             {
            /*
             * We have hit an accepting state: record it.
             */
              last_accept = accept;
              last_acc_pos = q;
              if (last_accept->is_quick)
                break;
             }
           if (!d)
             break;                  /* error state */
        }
      if (last_accept) 
        {
           colour = last_accept->colour;
           if (last_accept->is_keyword) 
             {
              int n = last_acc_pos - p;
              if (n>=1 && n<=MAX_KEYWORD_LEN)
                for (i=0; i<MAX_KEYWORD_TABLES; i++)
                  if (dfa_try_keyword (p, n, st->keywords[i][n-1],
                                 case_sense)) 
                  {
                     colour = JKEY_COLOR+i;
                     break;
                  }
             }
           if (preproc != -1 && !last_accept->is_preproc &&
             colour != JCOM_COLOR)
             colour = preproc;
             p = write_using_color(p, last_acc_pos, colour);

           if (last_accept->is_preproc)
             preproc = last_accept->colour;
        }
      else 
        {
           if (preproc != -1)
             p = write_using_color (p, q, preproc);
           else
             p = write_using_color (p, q, JNORMAL_COLOR);
        }
      d = h->dfa;
     }
}

#if 0
/* Until the reading of the cache gets fixed, do not use this */
static void free_highlight_table (Highlight *h)
{
   NFA *nfa;
   Accept *accept;

   if (h == NULL)
     return;
   
   nfa = h->nfa;
   while (nfa != NULL)
     {
      NFA *next = nfa->next;
      SLfree ((char *) nfa);
      nfa = next;
     }
   accept = h->accept;
   while (accept != NULL)
     {
      Accept *next = accept->next;
      SLfree ((char *) accept);
      accept = next;
     }
   dfa = h->dfa;
   while (dfa != NULL)
     {
      DFA *next = dfa->next;
      SLfree ((char *) dfa->nfa_set);
      SLfree ((char *) dfa);
      dfa = next;
     }
   SLfree (h->filename);
   SLfree ((char *) h);
}
#endif

static Highlight *find_highlight_table (char *name, int init)
{
   Syntax_Table_Type *table;

   table = jed_find_syntax_table (name, 1);

   if (table == NULL)
     return NULL;
   
   if ((table->hilite == NULL)
       && init)
     table->hilite = init_highlight ();
   
   return table->hilite;
}


static void define_highlight_rule (char *rule, char *colour, char *name) 
{
   Highlight *hilite;
   int flags = 0;
   int i;
   
   if (NULL == (hilite = find_highlight_table (name, 1)))
     return;
   
   if (hilite->dfa)
     return;                         /* don't need to */

   while (*colour == 'K' || *colour == 'P' || *colour == 'Q') 
     {
      switch (*colour) 
        {
         case 'K':
           flags |= FLAG_KEYWORD;
           break;
         case 'Q':
           flags |= FLAG_QUICK;
           break;
         case 'P':
           flags |= FLAG_PREPROC;
           break;
        }
      colour++;
     }
   
   if ( (i = jed_get_color_obj (colour)) != -1)
     add_rule (hilite, rule, strlen(rule), flags, i);
}

static void build_highlight_table (char *name) 
{
   Highlight *hilite;

   if (NULL == (hilite = find_highlight_table (name, 0)))
     return;
   
   if (NULL == hilite->dfa)
     {
      jed_vmessage (1, "Creating DFA syntax table for %s...", name);
      make_dfa (hilite);
#if USE_DFA_CACHE
      if (hilite->filename)
        save_dfa (hilite, name);
#endif
      message ("");
     }
}

static void enable_highlight_cache (char *file, char *name) 
{
#if USE_DFA_CACHE
   Highlight *hilite;

   if (NULL == (hilite = find_highlight_table (name, 1)))
     return;
   
   /* This needs to be modified to search though a list of user defined
    * directories.
    */
   
   if (hilite->filename != NULL)
     SLfree (hilite->filename);

   /* Ok to fail */
   hilite->filename = SLmake_string (file);

   if (!hilite->dfa)
     load_dfa (hilite, name);
#else
   (void) file;
   (void) name;
#endif
}


static void set_init_callback (char *name)
{
   SLang_Name_Type *f;
   Syntax_Table_Type *t;

   if (NULL == (f = SLang_pop_function ()))
     return;
   
   if (NULL == (t = jed_find_syntax_table (name, 1)))
     {
#if SLANG_VERSION > 10400
      SLang_free_function (f);
#endif
      return;
     }
#if SLANG_VERSION > 10400
   if (t->init_dfa_callback != NULL)
     SLang_free_function (t->init_dfa_callback);
#endif
   t->init_dfa_callback = f;
}

static void use_dfa_syntax (int *x)
{
   Syntax_Table_Type *t = CBuf->syntax_table;
   if (t == NULL)
     t = Default_Syntax_Table;
   
   if (*x == 0)
     {
      t->use_dfa_syntax = 0;
      return;
     }
   
   if (t->hilite == NULL)
     {
      if ((t->init_dfa_callback == NULL)
          || (-1 == SLang_start_arg_list ())
          || (-1 == SLang_push_string (t->name))
          || (-1 == SLang_end_arg_list ())
          || (1 != SLexecute_function (t->init_dfa_callback))
          || (NULL == t->hilite)
          || (NULL == t->hilite->dfa))
        {
           t->use_dfa_syntax = 0;
           return;
        }
     }
   t->use_dfa_syntax = 1;
}

static SLang_Intrin_Fun_Type DFA_Intrinsics [] = 
{
   MAKE_INTRINSIC_SSS("dfa_define_highlight_rule", define_highlight_rule, VOID_TYPE),
   MAKE_INTRINSIC_S("dfa_build_highlight_table", build_highlight_table, VOID_TYPE),
   MAKE_INTRINSIC_SS("_dfa_enable_highlight_cache", enable_highlight_cache, VOID_TYPE),
   MAKE_INTRINSIC_I("use_dfa_syntax", use_dfa_syntax, VOID_TYPE),
   MAKE_INTRINSIC_S("dfa_set_init_callback", set_init_callback, VOID_TYPE),

   MAKE_INTRINSIC_0(NULL, NULL, 0)
};

int jed_init_dfa_syntax (void)
{
   return SLadd_intrin_fun_table (DFA_Intrinsics, "HAS_DFA_SYNTAX");
}


      

Generated by  Doxygen 1.6.0   Back to index