This is rxsuper.c in view mode; [Download] [Up]
/* Copyright (C) 1995, 1996 Tom Lord
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Library General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this software; see the file COPYING. If not, write to
* the Free Software Foundation, 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
#include "rxall.h"
#include "rxsuper.h"
/* The functions in the next several pages define the lazy-NFA-conversion used
* by matchers. The input to this construction is an NFA such as
* is built by compactify_nfa (rx.c). The output is the superNFA.
*/
/* Match engines can use arbitrary values for opcodes. So, the parse tree
* is built using instructions names (enum rx_opcode), but the superstate
* nfa is populated with mystery opcodes (void *).
*
* For convenience, here is an id table. The opcodes are == to their inxs
*
* The lables in re_search_2 would make good values for instructions.
*/
void * rx_id_instruction_table[rx_num_instructions] =
{
(void *) rx_backtrack_point,
(void *) rx_do_side_effects,
(void *) rx_cache_miss,
(void *) rx_next_char,
(void *) rx_backtrack,
(void *) rx_error_inx
};
/* The partially instantiated superstate graph has a transition
* table at every node. There is one entry for every character.
* This fills in the transition for a set.
*/
#ifdef __STDC__
static void
install_transition (struct rx_superstate *super,
struct rx_inx *answer, rx_Bitset trcset)
#else
static void
install_transition (super, answer, trcset)
struct rx_superstate *super;
struct rx_inx *answer;
rx_Bitset trcset;
#endif
{
struct rx_inx * transitions = super->transitions;
int chr;
for (chr = 0; chr < 256; )
if (!*trcset)
{
++trcset;
chr += 32;
}
else
{
RX_subset sub = *trcset;
RX_subset mask = 1;
int bound = chr + 32;
while (chr < bound)
{
if (sub & mask)
transitions [chr] = *answer;
++chr;
mask <<= 1;
}
++trcset;
}
}
#ifdef __STDC__
static int
qlen (struct rx_superstate * q)
#else
static int
qlen (q)
struct rx_superstate * q;
#endif
{
int count = 1;
struct rx_superstate * it;
if (!q)
return 0;
for (it = q->next_recyclable; it != q; it = it->next_recyclable)
++count;
return count;
}
#ifdef __STDC__
static void
check_cache (struct rx_cache * cache)
#else
static void
check_cache (cache)
struct rx_cache * cache;
#endif
{
struct rx_cache * you_fucked_up = 0;
int total = cache->superstates;
int semi = cache->semifree_superstates;
if (semi != qlen (cache->semifree_superstate))
check_cache (you_fucked_up);
if ((total - semi) != qlen (cache->lru_superstate))
check_cache (you_fucked_up);
}
#ifdef __STDC__
char *
rx_cache_malloc (struct rx_cache * cache, int size)
#else
char *
rx_cache_malloc (cache, size)
struct rx_cache * cache;
int size;
#endif
{
char * answer;
answer = (char *)malloc (size);
if (answer)
cache->bytes_used += size;
return answer;
}
#ifdef __STDC__
void
rx_cache_free (struct rx_cache * cache, int size, char * mem)
#else
void
rx_cache_free (cache, size, mem)
struct rx_cache * cache;
int size;
char * mem;
#endif
{
free (mem);
cache->bytes_used -= size;
}
/* When a superstate is old and neglected, it can enter a
* semi-free state. A semi-free state is slated to die.
* Incoming transitions to a semi-free state are re-written
* to cause an (interpreted) fault when they are taken.
* The fault handler revives the semi-free state, patches
* incoming transitions back to normal, and continues.
*
* The idea is basicly to free in two stages, aborting
* between the two if the state turns out to be useful again.
* When a free is aborted, the rescued superstate is placed
* in the most-favored slot to maximize the time until it
* is next semi-freed.
*
* Overall, this approximates truly freeing superstates in
* lru order, but does not involve the cost of updating perfectly
* accurate timestamps every time a superstate is touched.
*/
#ifdef __STDC__
static void
semifree_superstate (struct rx_cache * cache)
#else
static void
semifree_superstate (cache)
struct rx_cache * cache;
#endif
{
int disqualified = cache->semifree_superstates;
if (disqualified == cache->superstates)
return;
while (cache->lru_superstate->locks)
{
cache->lru_superstate = cache->lru_superstate->next_recyclable;
++disqualified;
if (disqualified == cache->superstates)
return;
}
{
struct rx_superstate * it = cache->lru_superstate;
it->next_recyclable->prev_recyclable = it->prev_recyclable;
it->prev_recyclable->next_recyclable = it->next_recyclable;
cache->lru_superstate = (it == it->next_recyclable
? 0
: it->next_recyclable);
if (!cache->semifree_superstate)
{
cache->semifree_superstate = it;
it->next_recyclable = it;
it->prev_recyclable = it;
}
else
{
it->prev_recyclable = cache->semifree_superstate->prev_recyclable;
it->next_recyclable = cache->semifree_superstate;
it->prev_recyclable->next_recyclable = it;
it->next_recyclable->prev_recyclable = it;
}
{
struct rx_distinct_future *df;
it->is_semifree = 1;
++cache->semifree_superstates;
df = it->transition_refs;
if (df)
{
df->prev_same_dest->next_same_dest = 0;
for (df = it->transition_refs; df; df = df->next_same_dest)
{
df->future_frame.inx = cache->instruction_table[rx_cache_miss];
df->future_frame.data = 0;
df->future_frame.data_2 = (void *) df;
/* If there are any NEXT-CHAR instruction frames that
* refer to this state, we convert them to CACHE-MISS frames.
*/
if (!df->effects
&& (df->edge->options->next_same_super_edge[0]
== df->edge->options))
install_transition (df->present, &df->future_frame,
df->edge->cset);
}
df = it->transition_refs;
df->prev_same_dest->next_same_dest = df;
}
}
}
}
#ifdef __STDC__
static void
refresh_semifree_superstate (struct rx_cache * cache,
struct rx_superstate * super)
#else
static void
refresh_semifree_superstate (cache, super)
struct rx_cache * cache;
struct rx_superstate * super;
#endif
{
struct rx_distinct_future *df;
if (super->transition_refs)
{
super->transition_refs->prev_same_dest->next_same_dest = 0;
for (df = super->transition_refs; df; df = df->next_same_dest)
{
df->future_frame.inx = cache->instruction_table[rx_next_char];
df->future_frame.data = (void *) super->transitions;
df->future_frame.data_2 = (void *)(super->contents->is_final);
/* CACHE-MISS instruction frames that refer to this state,
* must be converted to NEXT-CHAR frames.
*/
if (!df->effects
&& (df->edge->options->next_same_super_edge[0]
== df->edge->options))
install_transition (df->present, &df->future_frame,
df->edge->cset);
}
super->transition_refs->prev_same_dest->next_same_dest
= super->transition_refs;
}
if (cache->semifree_superstate == super)
cache->semifree_superstate = (super->prev_recyclable == super
? 0
: super->prev_recyclable);
super->next_recyclable->prev_recyclable = super->prev_recyclable;
super->prev_recyclable->next_recyclable = super->next_recyclable;
if (!cache->lru_superstate)
(cache->lru_superstate
= super->next_recyclable
= super->prev_recyclable
= super);
else
{
super->next_recyclable = cache->lru_superstate;
super->prev_recyclable = cache->lru_superstate->prev_recyclable;
super->next_recyclable->prev_recyclable = super;
super->prev_recyclable->next_recyclable = super;
}
super->is_semifree = 0;
--cache->semifree_superstates;
}
#ifdef __STDC__
void
rx_refresh_this_superstate (struct rx_cache * cache,
struct rx_superstate * superstate)
#else
void
rx_refresh_this_superstate (cache, superstate)
struct rx_cache * cache;
struct rx_superstate * superstate;
#endif
{
if (superstate->is_semifree)
refresh_semifree_superstate (cache, superstate);
else if (cache->lru_superstate == superstate)
cache->lru_superstate = superstate->next_recyclable;
else if (superstate != cache->lru_superstate->prev_recyclable)
{
superstate->next_recyclable->prev_recyclable
= superstate->prev_recyclable;
superstate->prev_recyclable->next_recyclable
= superstate->next_recyclable;
superstate->next_recyclable = cache->lru_superstate;
superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
superstate->next_recyclable->prev_recyclable = superstate;
superstate->prev_recyclable->next_recyclable = superstate;
}
}
#ifdef __STDC__
static void
release_superset_low (struct rx_cache * cache,
struct rx_superset *set)
#else
static void
release_superset_low (cache, set)
struct rx_cache * cache;
struct rx_superset *set;
#endif
{
if (!--set->refs)
{
if (set->starts_for)
set->starts_for->start_set = 0;
if (set->cdr)
release_superset_low (cache, set->cdr);
rx_hash_free (&set->hash_item, &cache->superset_hash_rules);
rx_cache_free (cache,
sizeof (struct rx_superset),
(char *)set);
}
}
#ifdef __STDC__
void
rx_release_superset (struct rx *rx,
struct rx_superset *set)
#else
void
rx_release_superset (rx, set)
struct rx *rx;
struct rx_superset *set;
#endif
{
release_superset_low (rx->cache, set);
}
/* This tries to add a new superstate to the superstate freelist.
* It might, as a result, free some edge pieces or hash tables.
* If nothing can be freed because too many locks are being held, fail.
*/
#ifdef __STDC__
static int
rx_really_free_superstate (struct rx_cache * cache)
#else
static int
rx_really_free_superstate (cache)
struct rx_cache * cache;
#endif
{
int locked_superstates = 0;
struct rx_superstate * it;
if (!cache->superstates)
return 0;
/* scale these */
while ((cache->hits + cache->misses) > cache->superstates)
{
cache->hits >>= 1;
cache->misses >>= 1;
}
/* semifree superstates faster than they are freed
* so popular states can be rescued.
*/
semifree_superstate (cache);
semifree_superstate (cache);
semifree_superstate (cache);
#if 0
Redundant because semifree_superstate already
makes this check;
while (cache->semifree_superstate && cache->semifree_superstate->locks)
{
refresh_semifree_superstate (cache, cache->semifree_superstate);
++locked_superstates;
if (locked_superstates == cache->superstates)
return 0;
}
if (cache->semifree_superstate)
insert the "it =" block which follows this "if 0" code;
else
{
while (cache->lru_superstate->locks)
{
cache->lru_superstate = cache->lru_superstate->next_recyclable;
++locked_superstates;
if (locked_superstates == cache->superstates)
return 0;
}
it = cache->lru_superstate;
it->next_recyclable->prev_recyclable = it->prev_recyclable;
it->prev_recyclable->next_recyclable = it->next_recyclable;
cache->lru_superstate = ((it == it->next_recyclable)
? 0
: it->next_recyclable);
}
#endif
if (!cache->semifree_superstate)
return 0;
{
it = cache->semifree_superstate;
it->next_recyclable->prev_recyclable = it->prev_recyclable;
it->prev_recyclable->next_recyclable = it->next_recyclable;
cache->semifree_superstate = ((it == it->next_recyclable)
? 0
: it->next_recyclable);
--cache->semifree_superstates;
}
if (it->transition_refs)
{
struct rx_distinct_future *df;
for (df = it->transition_refs,
df->prev_same_dest->next_same_dest = 0;
df;
df = df->next_same_dest)
{
df->future_frame.inx = cache->instruction_table[rx_cache_miss];
df->future_frame.data = 0;
df->future_frame.data_2 = (void *) df;
df->future = 0;
}
it->transition_refs->prev_same_dest->next_same_dest =
it->transition_refs;
}
{
struct rx_super_edge *tc = it->edges;
while (tc)
{
struct rx_distinct_future * df;
struct rx_super_edge *tct = tc->next;
df = tc->options;
df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
while (df)
{
struct rx_distinct_future *dft = df;
df = df->next_same_super_edge[0];
if (dft->future && dft->future->transition_refs == dft)
{
dft->future->transition_refs = dft->next_same_dest;
if (dft->future->transition_refs == dft)
dft->future->transition_refs = 0;
}
dft->next_same_dest->prev_same_dest = dft->prev_same_dest;
dft->prev_same_dest->next_same_dest = dft->next_same_dest;
rx_cache_free (cache,
sizeof (struct rx_distinct_future),
(char *)dft);
}
rx_cache_free (cache,
sizeof (struct rx_super_edge),
(char *)tc);
tc = tct;
}
}
if (it->contents->superstate == it)
it->contents->superstate = 0;
release_superset_low (cache, it->contents);
rx_cache_free (cache,
(sizeof (struct rx_superstate)
+ cache->local_cset_size * sizeof (struct rx_inx)),
(char *)it);
--cache->superstates;
return 1;
}
#ifdef __STDC__
static char *
rx_cache_malloc_or_get (struct rx_cache * cache, int size)
#else
static char *
rx_cache_malloc_or_get (cache, size)
struct rx_cache * cache;
int size;
#endif
{
while ( (cache->bytes_used + size > cache->bytes_allowed)
&& rx_really_free_superstate (cache))
;
return rx_cache_malloc (cache, size);
}
#ifdef __STDC__
static int
supersetcmp (void * va, void * vb)
#else
static int
supersetcmp (va, vb)
void * va;
void * vb;
#endif
{
struct rx_superset * a = (struct rx_superset *)va;
struct rx_superset * b = (struct rx_superset *)vb;
return ( (a == b)
|| (a && b && (a->id == b->id) && (a->car == b->car) && (a->cdr == b->cdr)));
}
#define rx_abs(A) (((A) > 0) ? (A) : -(A))
#ifdef __STDC__
static struct rx_hash_item *
superset_allocator (struct rx_hash_rules * rules, void * val)
#else
static struct rx_hash_item *
superset_allocator (rules, val)
struct rx_hash_rules * rules;
void * val;
#endif
{
struct rx_cache * cache;
struct rx_superset * template;
struct rx_superset * newset;
cache = ((struct rx_cache *)
((char *)rules
- (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules)));
template = (struct rx_superset *)val;
newset = ((struct rx_superset *)
rx_cache_malloc (cache, sizeof (*template)));
if (!newset)
return 0;
{
int cdrfinal;
int cdredges;
cdrfinal = (template->cdr
? template->cdr->is_final
: 0);
cdredges = (template->cdr
? template->cdr->has_cset_edges
: 0);
newset->is_final = (rx_abs (template->car->is_final) > rx_abs(cdrfinal)
? template->car->is_final
: template->cdr->is_final);
newset->has_cset_edges = (template->car->has_cset_edges || cdredges);
}
newset->refs = 0;
newset->id = template->id;
newset->car = template->car;
newset->cdr = template->cdr;
rx_protect_superset (rx, template->cdr);
newset->superstate = 0;
newset->starts_for = 0;
newset->hash_item.data = (void *)newset;
newset->hash_item.binding = 0;
return &newset->hash_item;
}
#ifdef __STDC__
static struct rx_hash *
super_hash_allocator (struct rx_hash_rules * rules)
#else
static struct rx_hash *
super_hash_allocator (rules)
struct rx_hash_rules * rules;
#endif
{
struct rx_cache * cache;
cache = ((struct rx_cache *)
((char *)rules
- (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules)));
return ((struct rx_hash *)
rx_cache_malloc (cache, sizeof (struct rx_hash)));
}
#ifdef __STDC__
static void
super_hash_liberator (struct rx_hash * hash, struct rx_hash_rules * rules)
#else
static void
super_hash_liberator (hash, rules)
struct rx_hash * hash;
struct rx_hash_rules * rules;
#endif
{
struct rx_cache * cache
= ((struct rx_cache *)
(char *)rules - (long)(&((struct rx_cache *)0)->superset_hash_rules));
rx_cache_free (cache, sizeof (struct rx_hash), (char *)hash);
}
#ifdef __STDC__
static void
superset_hash_item_liberator (struct rx_hash_item * it,
struct rx_hash_rules * rules)
#else
static void
superset_hash_item_liberator (it, rules)
struct rx_hash_item * it;
struct rx_hash_rules * rules;
#endif
{
}
int rx_cache_bound = 3;
static int rx_default_cache_got = 0;
static struct rx_cache default_cache =
{
{
supersetcmp,
super_hash_allocator,
super_hash_liberator,
superset_allocator,
superset_hash_item_liberator,
},
0,
0,
0,
0,
0,
0,
0,
RX_DEFAULT_DFA_CACHE_SIZE,
0,
256,
rx_id_instruction_table,
{
0,
0,
0,
0,
0
}
};
struct rx_cache * rx_default_cache = &default_cache;
/* This adds an element to a superstate set. These sets are lists, such
* that lists with == elements are ==. The empty set is returned by
* superset_cons (rx, 0, 0) and is NOT equivelent to
* (struct rx_superset)0.
*/
#ifdef __STDC__
struct rx_superset *
rx_superset_cons (struct rx * rx,
struct rx_nfa_state *car, struct rx_superset *cdr)
#else
struct rx_superset *
rx_superset_cons (rx, car, cdr)
struct rx * rx;
struct rx_nfa_state *car;
struct rx_superset *cdr;
#endif
{
struct rx_cache * cache = rx->cache;
if (!car && !cdr)
{
if (!cache->empty_superset)
{
cache->empty_superset
= ((struct rx_superset *)
rx_cache_malloc (cache, sizeof (struct rx_superset)));
if (!cache->empty_superset)
return 0;
rx_bzero ((char *)cache->empty_superset, sizeof (struct rx_superset));
cache->empty_superset->refs = 1000;
}
return cache->empty_superset;
}
{
struct rx_superset template;
struct rx_hash_item * hit;
template.car = car;
template.cdr = cdr;
template.id = rx->rx_id;
hit = rx_hash_store (&cache->superset_table,
(unsigned long)car ^ car->id ^ (unsigned long)cdr,
(void *)&template,
&cache->superset_hash_rules);
return (hit
? (struct rx_superset *)hit->data
: 0);
}
}
/* This computes a union of two NFA state sets. The sets do not have the
* same representation though. One is a RX_SUPERSET structure (part
* of the superstate NFA) and the other is an NFA_STATE_SET (part of the NFA).
*/
#ifdef __STDC__
struct rx_superset *
rx_superstate_eclosure_union (struct rx * rx, struct rx_superset *set, struct rx_nfa_state_set *ecl)
#else
struct rx_superset *
rx_superstate_eclosure_union (rx, set, ecl)
struct rx * rx;
struct rx_superset *set;
struct rx_nfa_state_set *ecl;
#endif
{
if (!ecl)
return set;
if (!set->car)
return rx_superset_cons (rx, ecl->car,
rx_superstate_eclosure_union (rx, set, ecl->cdr));
if (set->car == ecl->car)
return rx_superstate_eclosure_union (rx, set, ecl->cdr);
{
struct rx_superset * tail;
struct rx_nfa_state * first;
if (set->car->id < ecl->car->id)
{
tail = rx_superstate_eclosure_union (rx, set->cdr, ecl);
first = set->car;
}
else
{
tail = rx_superstate_eclosure_union (rx, set, ecl->cdr);
first = ecl->car;
}
if (!tail)
return 0;
else
{
struct rx_superset * answer;
answer = rx_superset_cons (rx, first, tail);
if (!answer)
{
rx_protect_superset (rx, tail);
rx_release_superset (rx, tail);
return 0;
}
else
return answer;
}
}
}
/*
* This makes sure that a list of rx_distinct_futures contains
* a future for each possible set of side effects in the eclosure
* of a given state. This is some of the work of filling in a
* superstate transition.
*/
#ifdef __STDC__
static struct rx_distinct_future *
include_futures (struct rx *rx,
struct rx_distinct_future *df, struct rx_nfa_state
*state, struct rx_superstate *superstate)
#else
static struct rx_distinct_future *
include_futures (rx, df, state, superstate)
struct rx *rx;
struct rx_distinct_future *df;
struct rx_nfa_state *state;
struct rx_superstate *superstate;
#endif
{
struct rx_possible_future *future;
struct rx_cache * cache = rx->cache;
for (future = rx_state_possible_futures (rx, state); future; future = future->next)
{
struct rx_distinct_future *dfp;
struct rx_distinct_future *insert_before = 0;
if (df)
df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
for (dfp = df; dfp; dfp = dfp->next_same_super_edge[0])
if (dfp->effects == future->effects)
break;
else
{
int order = rx->se_list_cmp (rx, dfp->effects, future->effects);
if (order > 0)
{
insert_before = dfp;
dfp = 0;
break;
}
}
if (df)
df->next_same_super_edge[1]->next_same_super_edge[0] = df;
if (!dfp)
{
dfp
= ((struct rx_distinct_future *)
rx_cache_malloc (cache,
sizeof (struct rx_distinct_future)));
if (!dfp)
return 0;
if (!df)
{
df = insert_before = dfp;
df->next_same_super_edge[0] = df->next_same_super_edge[1] = df;
}
else if (!insert_before)
insert_before = df;
else if (insert_before == df)
df = dfp;
dfp->next_same_super_edge[0] = insert_before;
dfp->next_same_super_edge[1]
= insert_before->next_same_super_edge[1];
dfp->next_same_super_edge[1]->next_same_super_edge[0] = dfp;
dfp->next_same_super_edge[0]->next_same_super_edge[1] = dfp;
dfp->next_same_dest = dfp->prev_same_dest = dfp;
dfp->future = 0;
dfp->present = superstate;
dfp->future_frame.inx = rx->instruction_table[rx_cache_miss];
dfp->future_frame.data = 0;
dfp->future_frame.data_2 = (void *) dfp;
dfp->side_effects_frame.inx
= rx->instruction_table[rx_do_side_effects];
dfp->side_effects_frame.data = 0;
dfp->side_effects_frame.data_2 = (void *) dfp;
dfp->effects = future->effects;
}
}
return df;
}
/* This constructs a new superstate from its state set. The only
* complexity here is memory management.
*/
#ifdef __STDC__
struct rx_superstate *
rx_superstate (struct rx *rx,
struct rx_superset *set)
#else
struct rx_superstate *
rx_superstate (rx, set)
struct rx *rx;
struct rx_superset *set;
#endif
{
struct rx_cache * cache = rx->cache;
struct rx_superstate * superstate = 0;
/* Does the superstate already exist in the cache? */
if (set->superstate)
{
if (set->superstate->rx_id != rx->rx_id)
{
/* Aha. It is in the cache, but belongs to a superstate
* that refers to an NFA that no longer exists.
* (We know it no longer exists because it was evidently
* stored in the same region of memory as the current nfa
* yet it has a different id.)
*/
superstate = set->superstate;
if (!superstate->is_semifree)
{
if (cache->lru_superstate == superstate)
{
cache->lru_superstate = superstate->next_recyclable;
if (cache->lru_superstate == superstate)
cache->lru_superstate = 0;
}
{
superstate->next_recyclable->prev_recyclable
= superstate->prev_recyclable;
superstate->prev_recyclable->next_recyclable
= superstate->next_recyclable;
if (!cache->semifree_superstate)
{
(cache->semifree_superstate
= superstate->next_recyclable
= superstate->prev_recyclable
= superstate);
}
else
{
superstate->next_recyclable = cache->semifree_superstate;
superstate->prev_recyclable
= cache->semifree_superstate->prev_recyclable;
superstate->next_recyclable->prev_recyclable
= superstate;
superstate->prev_recyclable->next_recyclable
= superstate;
cache->semifree_superstate = superstate;
}
++cache->semifree_superstates;
}
}
set->superstate = 0;
goto handle_cache_miss;
}
++cache->hits;
superstate = set->superstate;
rx_refresh_this_superstate (cache, superstate);
return superstate;
}
handle_cache_miss:
/* This point reached only for cache misses. */
++cache->misses;
#if RX_DEBUG
if (rx_debug_trace > 1)
{
struct rx_superset * setp = set;
fprintf (stderr, "Building a superstet %d(%d): ", rx->rx_id, set);
while (setp)
{
fprintf (stderr, "%d ", setp->id);
setp = setp->cdr;
}
fprintf (stderr, "(%d)\n", set);
}
#endif
{
int superstate_size;
superstate_size = ( sizeof (*superstate)
+ ( sizeof (struct rx_inx)
* rx->local_cset_size));
superstate = ((struct rx_superstate *)
rx_cache_malloc_or_get (cache, superstate_size));
++cache->superstates;
}
if (!superstate)
return 0;
if (!cache->lru_superstate)
(cache->lru_superstate
= superstate->next_recyclable
= superstate->prev_recyclable
= superstate);
else
{
superstate->next_recyclable = cache->lru_superstate;
superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
( superstate->prev_recyclable->next_recyclable
= superstate->next_recyclable->prev_recyclable
= superstate);
}
superstate->rx_id = rx->rx_id;
superstate->transition_refs = 0;
superstate->locks = 0;
superstate->is_semifree = 0;
set->superstate = superstate;
superstate->contents = set;
rx_protect_superset (rx, set);
superstate->edges = 0;
{
int x;
/* None of the transitions from this superstate are known yet. */
for (x = 0; x < rx->local_cset_size; ++x)
{
struct rx_inx * ifr = &superstate->transitions[x];
ifr->inx = rx->instruction_table [rx_cache_miss];
ifr->data = ifr->data_2 = 0;
}
}
return superstate;
}
/* This computes the destination set of one edge of the superstate NFA.
* Note that a RX_DISTINCT_FUTURE is a superstate edge.
* Returns 0 on an allocation failure.
*/
#ifdef __STDC__
static int
solve_destination (struct rx *rx, struct rx_distinct_future *df)
#else
static int
solve_destination (rx, df)
struct rx *rx;
struct rx_distinct_future *df;
#endif
{
struct rx_super_edge *tc = df->edge;
struct rx_superset *nfa_state;
struct rx_superset *nil_set = rx_superset_cons (rx, 0, 0);
struct rx_superset *solution = nil_set;
struct rx_superstate *dest;
rx_protect_superset (rx, solution);
/* Iterate over all NFA states in the state set of this superstate. */
for (nfa_state = df->present->contents;
nfa_state->car;
nfa_state = nfa_state->cdr)
{
struct rx_nfa_edge *e;
/* Iterate over all edges of each NFA state. */
for (e = nfa_state->car->edges; e; e = e->next)
/* If we find an edge that is labeled with
* the characters we are solving for.....
*/
if ((e->type == ne_cset)
&& rx_bitset_is_subset (rx->local_cset_size,
tc->cset,
e->params.cset))
{
struct rx_nfa_state *n = e->dest;
struct rx_possible_future *pf;
/* ....search the partial epsilon closures of the destination
* of that edge for a path that involves the same set of
* side effects we are solving for.
* If we find such a RX_POSSIBLE_FUTURE, we add members to the
* stateset we are computing.
*/
for (pf = rx_state_possible_futures (rx, n); pf; pf = pf->next)
if (pf->effects == df->effects)
{
struct rx_superset * old_sol;
old_sol = solution;
solution = rx_superstate_eclosure_union (rx, solution,
pf->destset);
if (!solution)
return 0;
rx_protect_superset (rx, solution);
rx_release_superset (rx, old_sol);
}
}
}
/* It is possible that the RX_DISTINCT_FUTURE we are working on has
* the empty set of NFA states as its definition. In that case, this
* is a failure point.
*/
if (solution == nil_set)
{
df->future_frame.inx = (void *) rx_backtrack;
df->future_frame.data = 0;
df->future_frame.data_2 = 0;
return 1;
}
dest = rx_superstate (rx, solution);
rx_release_superset (rx, solution);
if (!dest)
return 0;
{
struct rx_distinct_future *dft;
dft = df;
df->prev_same_dest->next_same_dest = 0;
while (dft)
{
dft->future = dest;
dft->future_frame.inx = rx->instruction_table[rx_next_char];
dft->future_frame.data = (void *) dest->transitions;
dft->future_frame.data_2 = (void *) dest->contents->is_final;
dft = dft->next_same_dest;
}
df->prev_same_dest->next_same_dest = df;
}
if (!dest->transition_refs)
dest->transition_refs = df;
else
{
struct rx_distinct_future *dft;
dft = dest->transition_refs->next_same_dest;
dest->transition_refs->next_same_dest = df->next_same_dest;
df->next_same_dest->prev_same_dest = dest->transition_refs;
df->next_same_dest = dft;
dft->prev_same_dest = df;
}
return 1;
}
/* This takes a superstate and a character, and computes some edges
* from the superstate NFA. In particular, this computes all edges
* that lead from SUPERSTATE given CHR. This function also
* computes the set of characters that share this edge set.
* This returns 0 on allocation error.
* The character set and list of edges are returned through
* the paramters CSETOUT and DFOUT.
*/
#ifdef __STDC__
static int
compute_super_edge (struct rx *rx, struct rx_distinct_future **dfout,
rx_Bitset csetout, struct rx_superstate *superstate,
unsigned char chr)
#else
static int
compute_super_edge (rx, dfout, csetout, superstate, chr)
struct rx *rx;
struct rx_distinct_future **dfout;
rx_Bitset csetout;
struct rx_superstate *superstate;
unsigned char chr;
#endif
{
struct rx_superset *stateset = superstate->contents;
/* To compute the set of characters that share edges with CHR,
* we start with the full character set, and subtract.
*/
rx_bitset_universe (rx->local_cset_size, csetout);
*dfout = 0;
/* Iterate over the NFA states in the superstate state-set. */
while (stateset->car)
{
struct rx_nfa_edge *e;
for (e = stateset->car->edges; e; e = e->next)
if (e->type == ne_cset)
{
if (!RX_bitset_member (e->params.cset, chr))
/* An edge that doesn't apply at least tells us some characters
* that don't share the same edge set as CHR.
*/
rx_bitset_difference (rx->local_cset_size, csetout, e->params.cset);
else
{
/* If we find an NFA edge that applies, we make sure there
* are corresponding edges in the superstate NFA.
*/
{
struct rx_distinct_future * saved;
saved = *dfout;
*dfout = include_futures (rx, *dfout, e->dest, superstate);
if (!*dfout)
{
struct rx_distinct_future * df;
df = saved;
if (df)
df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
while (df)
{
struct rx_distinct_future *dft;
dft = df;
df = df->next_same_super_edge[0];
if (dft->future && dft->future->transition_refs == dft)
{
dft->future->transition_refs = dft->next_same_dest;
if (dft->future->transition_refs == dft)
dft->future->transition_refs = 0;
}
dft->next_same_dest->prev_same_dest = dft->prev_same_dest;
dft->prev_same_dest->next_same_dest = dft->next_same_dest;
rx_cache_free (rx->cache, sizeof (*dft), (char *)dft);
}
return 0;
}
}
/* We also trim the character set a bit. */
rx_bitset_intersection (rx->local_cset_size,
csetout, e->params.cset);
}
}
stateset = stateset->cdr;
}
return 1;
}
/* This is a constructor for RX_SUPER_EDGE structures. These are
* wrappers for lists of superstate NFA edges that share character sets labels.
* If a transition class contains more than one rx_distinct_future (superstate
* edge), then it represents a non-determinism in the superstate NFA.
*/
#ifdef __STDC__
static struct rx_super_edge *
rx_super_edge (struct rx *rx,
struct rx_superstate *super, rx_Bitset cset,
struct rx_distinct_future *df)
#else
static struct rx_super_edge *
rx_super_edge (rx, super, cset, df)
struct rx *rx;
struct rx_superstate *super;
rx_Bitset cset;
struct rx_distinct_future *df;
#endif
{
struct rx_super_edge *tc;
int tc_size;
tc_size = ( sizeof (struct rx_super_edge)
+ rx_sizeof_bitset (rx->local_cset_size));
tc = ((struct rx_super_edge *)
rx_cache_malloc (rx->cache, tc_size));
if (!tc)
return 0;
tc->next = super->edges;
super->edges = tc;
tc->rx_backtrack_frame.inx = rx->instruction_table[rx_backtrack_point];
tc->rx_backtrack_frame.data = 0;
tc->rx_backtrack_frame.data_2 = (void *) tc;
tc->options = df;
tc->cset = (rx_Bitset) ((char *) tc + sizeof (*tc));
rx_bitset_assign (rx->local_cset_size, tc->cset, cset);
if (df)
{
struct rx_distinct_future * dfp = df;
df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
while (dfp)
{
dfp->edge = tc;
dfp = dfp->next_same_super_edge[0];
}
df->next_same_super_edge[1]->next_same_super_edge[0] = df;
}
return tc;
}
/* There are three kinds of cache miss. The first occurs when a
* transition is taken that has never been computed during the
* lifetime of the source superstate. That cache miss is handled by
* calling COMPUTE_SUPER_EDGE. The second kind of cache miss
* occurs when the destination superstate of a transition doesn't
* exist. SOLVE_DESTINATION is used to construct the destination superstate.
* Finally, the third kind of cache miss occurs when the destination
* superstate of a transition is in a `semi-free state'. That case is
* handled by UNFREE_SUPERSTATE.
*
* The function of HANDLE_CACHE_MISS is to figure out which of these
* cases applies.
*/
#ifdef __STDC__
static void
install_partial_transition (struct rx_superstate *super,
struct rx_inx *answer,
RX_subset set, int offset)
#else
static void
install_partial_transition (super, answer, set, offset)
struct rx_superstate *super;
struct rx_inx *answer;
RX_subset set;
int offset;
#endif
{
int start = offset;
int end = start + 32;
RX_subset pos = 1;
struct rx_inx * transitions = super->transitions;
while (start < end)
{
if (set & pos)
transitions[start] = *answer;
pos <<= 1;
++start;
}
}
#ifdef __STDC__
struct rx_inx *
rx_handle_cache_miss (struct rx *rx, struct rx_superstate *super, unsigned char chr, void *data)
#else
struct rx_inx *
rx_handle_cache_miss (rx, super, chr, data)
struct rx *rx;
struct rx_superstate *super;
unsigned char chr;
void *data;
#endif
{
int offset = chr / RX_subset_bits;
struct rx_distinct_future *df = data;
if (!df) /* must be the shared_cache_miss_frame */
{
/* Perhaps this is just a transition waiting to be filled. */
struct rx_super_edge *tc;
RX_subset mask = rx_subset_singletons [chr % RX_subset_bits];
for (tc = super->edges; tc; tc = tc->next)
if (tc->cset[offset] & mask)
{
struct rx_inx * answer;
df = tc->options;
answer = ((tc->options->next_same_super_edge[0] != tc->options)
? &tc->rx_backtrack_frame
: (df->effects
? &df->side_effects_frame
: &df->future_frame));
install_partial_transition (super, answer,
tc->cset [offset], offset * 32);
return answer;
}
/* Otherwise, it's a flushed or newly encountered edge. */
{
char cset_space[1024]; /* this limit is far from unreasonable */
rx_Bitset trcset;
struct rx_inx *answer;
if (rx_sizeof_bitset (rx->local_cset_size) > sizeof (cset_space))
return 0; /* If the arbitrary limit is hit, always fail */
/* cleanly. */
trcset = (rx_Bitset)cset_space;
rx_lock_superstate (rx, super);
if (!compute_super_edge (rx, &df, trcset, super, chr))
{
rx_unlock_superstate (rx, super);
return 0;
}
if (!df) /* We just computed the fail transition. */
{
static struct rx_inx
shared_fail_frame = { 0, 0, (void *)rx_backtrack, 0 };
answer = &shared_fail_frame;
}
else
{
tc = rx_super_edge (rx, super, trcset, df);
if (!tc)
{
rx_unlock_superstate (rx, super);
return 0;
}
answer = ((tc->options->next_same_super_edge[0] != tc->options)
? &tc->rx_backtrack_frame
: (df->effects
? &df->side_effects_frame
: &df->future_frame));
}
install_partial_transition (super, answer,
trcset[offset], offset * 32);
rx_unlock_superstate (rx, super);
return answer;
}
}
else if (df->future) /* A cache miss on an edge with a future? Must be
* a semi-free destination. */
{
if (df->future->is_semifree)
refresh_semifree_superstate (rx->cache, df->future);
return &df->future_frame;
}
else
/* no future superstate on an existing edge */
{
rx_lock_superstate (rx, super);
if (!solve_destination (rx, df))
{
rx_unlock_superstate (rx, super);
return 0;
}
if (!df->effects
&& (df->edge->options->next_same_super_edge[0] == df->edge->options))
install_partial_transition (super, &df->future_frame,
df->edge->cset[offset], offset * 32);
rx_unlock_superstate (rx, super);
return &df->future_frame;
}
}
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.