/* | |
* misc.c | |
* | |
* Manage tokens, regular expressions. | |
* Print methods for debugging | |
* Compute follow lists onto tail ends of rules. | |
* | |
* The following functions are visible: | |
* | |
* int addTname(char *); Add token name | |
* int addTexpr(char *); Add token expression | |
* int Tnum(char *); Get number of expr/token | |
* void Tklink(char *, char *); Link a name with an expression | |
* int hasAction(expr); Does expr already have action assigned? | |
* void setHasAction(expr); Indicate that expr now has an action | |
* Entry *newEntry(char *,int); Create new table entry with certain size | |
* void list_add(ListNode **list, char *e) | |
* void list_free(ListNode **list, int freeData); *** MR10 *** | |
* void list_apply(ListNode *list, void (*f)()) | |
* void lexclass(char *m); switch to new/old lexical class | |
* void lexmode(int i); switch to old lexical class i | |
* | |
* SOFTWARE RIGHTS | |
* | |
* We reserve no LEGAL rights to the Purdue Compiler Construction Tool | |
* Set (PCCTS) -- PCCTS is in the public domain. An individual or | |
* company may do whatever they wish with source code distributed with | |
* PCCTS or the code generated by PCCTS, including the incorporation of | |
* PCCTS, or its output, into commerical software. | |
* | |
* We encourage users to develop software with PCCTS. However, we do ask | |
* that credit is given to us for developing PCCTS. By "credit", | |
* we mean that if you incorporate our source code into one of your | |
* programs (commercial product, research project, or otherwise) that you | |
* acknowledge this fact somewhere in the documentation, research report, | |
* etc... If you like PCCTS and have developed a nice tool with the | |
* output, please mention that you developed it using PCCTS. In | |
* addition, we ask that this header remain intact in our source code. | |
* As long as these guidelines are kept, we expect to continue enhancing | |
* this system and expect to make other tools available as they are | |
* completed. | |
* | |
* ANTLR 1.33 | |
* Terence Parr | |
* Parr Research Corporation | |
* with Purdue University and AHPCRC, University of Minnesota | |
* 1989-2001 | |
*/ | |
#include <stdio.h> | |
#include "pcctscfg.h" | |
#include "set.h" | |
#include "syn.h" | |
#include "hash.h" | |
#include "generic.h" | |
#include "dlgdef.h" | |
#include <ctype.h> | |
static int tsize=TSChunk; /* size of token str arrays */ | |
static void | |
#ifdef __USE_PROTOS | |
RemapForcedTokensInSyntaxDiagram(Node *); | |
#else | |
RemapForcedTokensInSyntaxDiagram(); | |
#endif | |
/* T o k e n M a n i p u l a t i o n */ | |
/* | |
* add token 't' to the TokenStr/Expr array. Make more room if necessary. | |
* 't' is either an expression or a token name. | |
* | |
* There is only one TokenStr array, but multiple ExprStr's. Therefore, | |
* for each lex class (element of lclass) we must extend the ExprStr array. | |
* ExprStr's and TokenStr are always all the same size. | |
* | |
* Also, there is a Texpr hash table for each automaton. | |
*/ | |
static void | |
#ifdef __USE_PROTOS | |
Ttrack( char *t ) | |
#else | |
Ttrack( t ) | |
char *t; | |
#endif | |
{ | |
if ( TokenNum >= tsize ) /* terminal table overflow? */ | |
{ | |
char **p; | |
int i, more, j; | |
more = TSChunk * (1 + ((TokenNum-tsize) / TSChunk)); | |
tsize += more; | |
TokenStr = (char **) realloc((char *)TokenStr, tsize*sizeof(char *)); | |
require(TokenStr != NULL, "Ttrack: can't extend TokenStr"); | |
for (i=0; i<NumLexClasses; i++) | |
{ | |
lclass[i].exprs = (char **) | |
realloc((char *)lclass[i].exprs, tsize*sizeof(char *)); | |
require(lclass[i].exprs != NULL, "Ttrack: can't extend ExprStr"); | |
for (p= &lclass[i].exprs[tsize-more],j=1; j<=more; j++) *p++ = NULL; | |
} | |
for (p= &TokenStr[tsize-more],i=1; i<=more; i++) *p++ = NULL; | |
lexmode( CurrentLexClass ); /* reset ExprStr in case table moved */ | |
} | |
/* note: we use the actual ExprStr/TokenStr array | |
* here as TokenInd doesn't exist yet | |
*/ | |
if ( *t == '"' ) ExprStr[TokenNum] = t; | |
else TokenStr[TokenNum] = t; | |
} | |
static Expr * | |
#ifdef __USE_PROTOS | |
newExpr( char *e ) | |
#else | |
newExpr( e ) | |
char *e; | |
#endif | |
{ | |
Expr *p = (Expr *) calloc(1, sizeof(Expr)); | |
require(p!=NULL, "newExpr: cannot alloc Expr node"); | |
p->expr = e; | |
p->lclass = CurrentLexClass; | |
return p; | |
} | |
/* switch to lexical class/mode m. This amounts to creating a new | |
* lex mode if one does not already exist and making ExprStr point | |
* to the correct char string array. We must also switch Texpr tables. | |
* | |
* BTW, we need multiple ExprStr arrays because more than one automaton | |
* may have the same label for a token, but with different expressions. | |
* We need to track an expr for each automaton. If we disallowed this | |
* feature, only one ExprStr would be required. | |
*/ | |
void | |
#ifdef __USE_PROTOS | |
lexclass( char *m ) | |
#else | |
lexclass( m ) | |
char *m; | |
#endif | |
{ | |
int i; | |
TermEntry *p; | |
static char EOFSTR[] = "\"@\""; | |
if ( hash_get(Tname, m) != NULL ) | |
{ | |
warn(eMsg1("lexclass name conflicts with token/errclass label '%s'",m)); | |
} | |
/* does m already exist? */ | |
i = LexClassIndex(m); | |
if ( i != -1 ) {lexmode(i); return;} | |
/* must make new one */ | |
NumLexClasses++; | |
CurrentLexClass = NumLexClasses-1; | |
require(NumLexClasses<=MaxLexClasses, "number of allowable lexclasses exceeded\nIncrease MaxLexClasses in generic.h and recompile all C files"); | |
lclass[CurrentLexClass].classnum = m; | |
lclass[CurrentLexClass].exprs = (char **) calloc(tsize, sizeof(char *)); | |
require(lclass[CurrentLexClass].exprs!=NULL, | |
"lexclass: cannot allocate ExprStr"); | |
lclass[CurrentLexClass].htable = newHashTable(); | |
ExprStr = lclass[CurrentLexClass].exprs; | |
Texpr = lclass[CurrentLexClass].htable; | |
/* define EOF for each automaton */ | |
p = newTermEntry( EOFSTR ); | |
p->token = EofToken; /* couldn't have remapped tokens yet, use EofToken */ | |
hash_add(Texpr, EOFSTR, (Entry *)p); | |
list_add(&ExprOrder, (void *)newExpr(EOFSTR)); | |
/* note: we use the actual ExprStr array | |
* here as TokenInd doesn't exist yet | |
*/ | |
ExprStr[EofToken] = EOFSTR; | |
} | |
void | |
#ifdef __USE_PROTOS | |
lexmode( int i ) | |
#else | |
lexmode( i ) | |
int i; | |
#endif | |
{ | |
require(i<NumLexClasses, "lexmode: invalid mode"); | |
ExprStr = lclass[i].exprs; | |
Texpr = lclass[i].htable; | |
CurrentLexClass = i; | |
} | |
/* return index into lclass array of lexical class. return -1 if nonexistent */ | |
int | |
#ifdef __USE_PROTOS | |
LexClassIndex( char *cl ) | |
#else | |
LexClassIndex( cl ) | |
char *cl; | |
#endif | |
{ | |
int i; | |
for (i=0; i<NumLexClasses; i++) | |
{ | |
if ( strcmp(lclass[i].classnum, cl) == 0 ) return i; | |
} | |
return -1; | |
} | |
int | |
#ifdef __USE_PROTOS | |
hasAction( char *expr ) | |
#else | |
hasAction( expr ) | |
char *expr; | |
#endif | |
{ | |
TermEntry *p; | |
require(expr!=NULL, "hasAction: invalid expr"); | |
p = (TermEntry *) hash_get(Texpr, expr); | |
require(p!=NULL, eMsg1("hasAction: expr '%s' doesn't exist",expr)); | |
return (p->action!=NULL); | |
} | |
void | |
#ifdef __USE_PROTOS | |
setHasAction( char *expr, char *action ) | |
#else | |
setHasAction( expr, action ) | |
char *expr; | |
char *action; | |
#endif | |
{ | |
TermEntry *p; | |
require(expr!=NULL, "setHasAction: invalid expr"); | |
p = (TermEntry *) hash_get(Texpr, expr); | |
require(p!=NULL, eMsg1("setHasAction: expr '%s' doesn't exist",expr)); | |
p->action = action; | |
} | |
ForcedToken * | |
#ifdef __USE_PROTOS | |
newForcedToken(char *token, int tnum) | |
#else | |
newForcedToken(token, tnum) | |
char *token; | |
int tnum; | |
#endif | |
{ | |
ForcedToken *ft = (ForcedToken *) calloc(1, sizeof(ForcedToken)); | |
require(ft!=NULL, "out of memory"); | |
ft->token = token; | |
ft->tnum = tnum; | |
return ft; | |
} | |
/* | |
* Make a token indirection array that remaps token numbers and then walk | |
* the appropriate symbol tables and SynDiag to change token numbers | |
*/ | |
void | |
#ifdef __USE_PROTOS | |
RemapForcedTokens(void) | |
#else | |
RemapForcedTokens() | |
#endif | |
{ | |
ListNode *p; | |
ForcedToken *q; | |
int max_token_number=0; /* MR9 23-Sep-97 Removed "unsigned" */ | |
int i; | |
if ( ForcedTokens == NULL ) return; | |
/* find max token num */ | |
for (p = ForcedTokens->next; p!=NULL; p=p->next) | |
{ | |
q = (ForcedToken *) p->elem; | |
if ( q->tnum > max_token_number ) max_token_number = q->tnum; | |
} | |
fprintf(stderr, "max token number is %d\n", max_token_number); | |
/* make token indirection array */ | |
TokenInd = (int *) calloc(max_token_number+1, sizeof(int)); | |
LastTokenCounted = TokenNum; | |
TokenNum = max_token_number+1; | |
require(TokenInd!=NULL, "RemapForcedTokens: cannot allocate TokenInd"); | |
/* fill token indirection array and change token id htable ; swap token indices */ | |
for (i=1; i<TokenNum; i++) TokenInd[i] = i; | |
for (p = ForcedTokens->next; p!=NULL; p=p->next) | |
{ | |
TermEntry *te; | |
int old_pos, t; | |
q = (ForcedToken *) p->elem; | |
fprintf(stderr, "%s forced to %d\n", q->token, q->tnum); | |
te = (TermEntry *) hash_get(Tname, q->token); | |
require(te!=NULL, "RemapForcedTokens: token not in hash table"); | |
old_pos = te->token; | |
fprintf(stderr, "Before: TokenInd[old_pos==%d] is %d\n", old_pos, TokenInd[old_pos]); | |
fprintf(stderr, "Before: TokenInd[target==%d] is %d\n", q->tnum, TokenInd[q->tnum]); | |
q = (ForcedToken *) p->elem; | |
t = TokenInd[old_pos]; | |
TokenInd[old_pos] = q->tnum; | |
TokenInd[q->tnum] = t; | |
te->token = q->tnum; /* update token type id symbol table */ | |
fprintf(stderr, "After: TokenInd[old_pos==%d] is %d\n", old_pos, TokenInd[old_pos]); | |
fprintf(stderr, "After: TokenInd[target==%d] is %d\n", q->tnum, TokenInd[q->tnum]); | |
/* Change the token number in the sym tab entry for the exprs | |
* at the old position of the token id and the target position | |
*/ | |
/* update expr at target (if any) of forced token id */ | |
if ( q->tnum < TokenNum ) /* is it a valid position? */ | |
{ | |
for (i=0; i<NumLexClasses; i++) | |
{ | |
if ( lclass[i].exprs[q->tnum]!=NULL ) | |
{ | |
/* update the symbol table for this expr */ | |
TermEntry *e = (TermEntry *) hash_get(lclass[i].htable, lclass[i].exprs[q->tnum]); | |
require(e!=NULL, "RemapForcedTokens: expr not in hash table"); | |
e->token = old_pos; | |
fprintf(stderr, "found expr '%s' at target %d in lclass[%d]; changed to %d\n", | |
lclass[i].exprs[q->tnum], q->tnum, i, old_pos); | |
} | |
} | |
} | |
/* update expr at old position (if any) of forced token id */ | |
for (i=0; i<NumLexClasses; i++) | |
{ | |
if ( lclass[i].exprs[old_pos]!=NULL ) | |
{ | |
/* update the symbol table for this expr */ | |
TermEntry *e = (TermEntry *) hash_get(lclass[i].htable, lclass[i].exprs[old_pos]); | |
require(e!=NULL, "RemapForcedTokens: expr not in hash table"); | |
e->token = q->tnum; | |
fprintf(stderr, "found expr '%s' for id %s in lclass[%d]; changed to %d\n", | |
lclass[i].exprs[old_pos], q->token, i, q->tnum); | |
} | |
} | |
} | |
/* Update SynDiag */ | |
RemapForcedTokensInSyntaxDiagram((Node *)SynDiag); | |
} | |
static void | |
#ifdef __USE_PROTOS | |
RemapForcedTokensInSyntaxDiagram(Node *p) | |
#else | |
RemapForcedTokensInSyntaxDiagram(p) | |
Node *p; | |
#endif | |
{ | |
Junction *j = (Junction *) p; | |
RuleRefNode *r = (RuleRefNode *) p; | |
TokNode *t = (TokNode *)p; | |
if ( p==NULL ) return; | |
require(p->ntype>=1 && p->ntype<=NumNodeTypes, "Remap...: invalid diagram node"); | |
switch ( p->ntype ) | |
{ | |
case nJunction : | |
if ( j->visited ) return; | |
if ( j->jtype == EndRule ) return; | |
j->visited = TRUE; | |
RemapForcedTokensInSyntaxDiagram( j->p1 ); | |
RemapForcedTokensInSyntaxDiagram( j->p2 ); | |
j->visited = FALSE; | |
return; | |
case nRuleRef : | |
RemapForcedTokensInSyntaxDiagram( r->next ); | |
return; | |
case nToken : | |
if ( t->remapped ) return; /* we've been here before */ | |
t->remapped = 1; | |
fprintf(stderr, "remapping %d to %d\n", t->token, TokenInd[t->token]); | |
t->token = TokenInd[t->token]; | |
RemapForcedTokensInSyntaxDiagram( t->next ); | |
return; | |
case nAction : | |
RemapForcedTokensInSyntaxDiagram( ((ActionNode *)p)->next ); | |
return; | |
default : | |
fatal_internal("invalid node type"); | |
} | |
} | |
/* | |
* Add a token name. Return the token number associated with it. If it already | |
* exists, then return the token number assigned to it. | |
* | |
* Track the order in which tokens are found so that the DLG output maintains | |
* that order. It also lets us map token numbers to strings. | |
*/ | |
int | |
#ifdef __USE_PROTOS | |
addTname( char *token ) | |
#else | |
addTname( token ) | |
char *token; | |
#endif | |
{ | |
TermEntry *p; | |
require(token!=NULL, "addTname: invalid token name"); | |
if ( (p=(TermEntry *)hash_get(Tname, token)) != NULL ) return p->token; | |
p = newTermEntry( token ); | |
Ttrack( p->str ); | |
p->token = TokenNum++; | |
hash_add(Tname, token, (Entry *)p); | |
return p->token; | |
} | |
/* This is the same as addTname except we force the TokenNum to be tnum. | |
* We don't have to use the Forced token stuff as no tokens will have | |
* been defined with #tokens when this is called. This is only called | |
* when a #tokdefs meta-op is used. | |
*/ | |
int | |
#ifdef __USE_PROTOS | |
addForcedTname( char *token, int tnum ) | |
#else | |
addForcedTname( token, tnum ) | |
char *token; | |
int tnum; | |
#endif | |
{ | |
TermEntry *p; | |
require(token!=NULL, "addTname: invalid token name"); | |
if ( (p=(TermEntry *)hash_get(Tname, token)) != NULL ) return p->token; | |
p = newTermEntry( token ); | |
Ttrack( p->str ); | |
p->token = tnum; | |
hash_add(Tname, token, (Entry *)p); | |
return p->token; | |
} | |
/* | |
* Add a token expr. Return the token number associated with it. If it already | |
* exists, then return the token number assigned to it. | |
*/ | |
int | |
#ifdef __USE_PROTOS | |
addTexpr( char *expr ) | |
#else | |
addTexpr( expr ) | |
char *expr; | |
#endif | |
{ | |
TermEntry *p; | |
require(expr!=NULL, "addTexpr: invalid regular expression"); | |
if ( (p=(TermEntry *)hash_get(Texpr, expr)) != NULL ) return p->token; | |
p = newTermEntry( expr ); | |
Ttrack( p->str ); | |
/* track the order in which they occur */ | |
list_add(&ExprOrder, (void *)newExpr(p->str)); | |
p->token = TokenNum++; | |
hash_add(Texpr, expr, (Entry *)p); | |
return p->token; | |
} | |
/* return the token number of 'term'. Return 0 if no 'term' exists */ | |
int | |
#ifdef __USE_PROTOS | |
Tnum( char *term ) | |
#else | |
Tnum( term ) | |
char *term; | |
#endif | |
{ | |
TermEntry *p; | |
require(term!=NULL, "Tnum: invalid terminal"); | |
if ( *term=='"' ) p = (TermEntry *) hash_get(Texpr, term); | |
else p = (TermEntry *) hash_get(Tname, term); | |
if ( p == NULL ) return 0; | |
else return p->token; | |
} | |
/* associate a Name with an expr. If both have been already assigned | |
* token numbers, then an error is reported. Add the token or expr | |
* that has not been added if no error. This 'represents' the #token | |
* ANTLR pseudo-op. If both have not been defined, define them both | |
* linked to same token number. | |
*/ | |
void | |
#ifdef __USE_PROTOS | |
Tklink( char *token, char *expr ) | |
#else | |
Tklink( token, expr ) | |
char *token; | |
char *expr; | |
#endif | |
{ | |
TermEntry *p, *q; | |
require(token!=NULL && expr!=NULL, "Tklink: invalid token name and/or expr"); | |
p = (TermEntry *) hash_get(Tname, token); | |
q = (TermEntry *) hash_get(Texpr, expr); | |
if ( p != NULL && q != NULL ) /* both defined */ | |
{ | |
warn( eMsg2("token name %s and rexpr %s already defined; ignored", | |
token, expr) ); | |
return; | |
} | |
if ( p==NULL && q==NULL ) /* both not defined */ | |
{ | |
int t = addTname( token ); | |
q = newTermEntry( expr ); | |
hash_add(Texpr, expr, (Entry *)q); | |
q->token = t; | |
/* note: we use the actual ExprStr array | |
* here as TokenInd doesn't exist yet | |
*/ | |
ExprStr[t] = q->str; | |
/* track the order in which they occur */ | |
list_add(&ExprOrder, (void *)newExpr(q->str)); | |
return; | |
} | |
if ( p != NULL ) /* one is defined, one is not */ | |
{ | |
q = newTermEntry( expr ); | |
hash_add(Texpr, expr, (Entry *)q); | |
q->token = p->token; | |
ExprStr[p->token] = q->str; /* both expr and token str defined now */ | |
list_add(&ExprOrder, (void *)newExpr(q->str)); | |
} | |
else /* trying to associate name with expr here*/ | |
{ | |
p = newTermEntry( token ); | |
hash_add(Tname, token, (Entry *)p); | |
p->token = q->token; | |
TokenStr[p->token] = p->str;/* both expr and token str defined now */ | |
} | |
} | |
/* | |
* Given a string, this function allocates and returns a pointer to a | |
* hash table record of size 'sz' whose "str" pointer is reset to a position | |
* in the string table. | |
*/ | |
Entry * | |
#ifdef __USE_PROTOS | |
newEntry( char *text, int sz ) | |
#else | |
newEntry( text, sz ) | |
char *text; | |
int sz; | |
#endif | |
{ | |
Entry *p; | |
require(text!=NULL, "new: NULL terminal"); | |
if ( (p = (Entry *) calloc(1,sz)) == 0 ) | |
{ | |
fatal_internal("newEntry: out of memory for terminals\n"); | |
exit(PCCTS_EXIT_FAILURE); | |
} | |
p->str = mystrdup(text); | |
return(p); | |
} | |
/* | |
* add an element to a list. | |
* | |
* Any non-empty list has a sentinel node whose 'elem' pointer is really | |
* a pointer to the last element. (i.e. length(list) = #elemIn(list)+1). | |
* Elements are appended to the list. | |
*/ | |
void | |
#ifdef __USE_PROTOS | |
list_add( ListNode **list, void *e ) | |
#else | |
list_add( list, e ) | |
ListNode **list; | |
void *e; | |
#endif | |
{ | |
ListNode *p, *tail; | |
require(e!=NULL, "list_add: attempting to add NULL list element"); | |
p = newListNode; | |
require(p!=NULL, "list_add: cannot alloc new list node"); | |
p->elem = e; | |
if ( *list == NULL ) | |
{ | |
ListNode *sentinel = newListNode; | |
require(sentinel!=NULL, "list_add: cannot alloc sentinel node"); | |
*list=sentinel; | |
sentinel->next = p; | |
sentinel->elem = (char *)p; /* set tail pointer */ | |
} | |
else /* find end of list */ | |
{ | |
tail = (ListNode *) (*list)->elem; /* get tail pointer */ | |
tail->next = p; | |
(*list)->elem = (char *) p; /* reset tail */ | |
} | |
} | |
/* MR10 list_free() frees the ListNode elements in the list */ | |
/* MR10 if freeData then free the data elements of the list too */ | |
void | |
#ifdef __USE_PROTOS | |
list_free(ListNode **list,int freeData) | |
#else | |
list_free(list,freeData) | |
ListNode **list; | |
int freeData; | |
#endif | |
{ | |
ListNode *p; | |
ListNode *next; | |
if (list == NULL) return; | |
if (*list == NULL) return; | |
for (p=*list; p != NULL; p=next) { | |
next=p->next; | |
if (freeData && p->elem != NULL) { | |
free( (char *) p->elem); | |
}; | |
free( (char *) p); | |
}; | |
*list=NULL; | |
} | |
void | |
#ifdef __USE_PROTOS | |
list_apply( ListNode *list, void (*f)(void *) ) | |
#else | |
list_apply( list, f ) | |
ListNode *list; | |
void (*f)(); | |
#endif | |
{ | |
ListNode *p; | |
require(f!=NULL, "list_apply: NULL function to apply"); | |
if ( list == NULL ) return; | |
for (p = list->next; p!=NULL; p=p->next) (*f)( p->elem ); | |
} | |
/* MR27 */ | |
#ifdef __USE_PROTOS | |
int list_search_cstring(ListNode *list, char * cstring) | |
#else | |
int list_search_cstring(list, cstring) | |
ListNode * list; | |
char * cstring; | |
#endif | |
{ | |
ListNode *p; | |
if (list == NULL ) return 0; | |
for (p = list->next; p!=NULL; p=p->next) { | |
if (p->elem == NULL) continue; | |
if (0 == strcmp((char *) p->elem , cstring)) return 1; | |
} | |
return 0; | |
} | |
/* F O L L O W C y c l e S t u f f */ | |
/* make a key based upon (rulename, computation, k value). | |
* Computation values are 'i'==FIRST, 'o'==FOLLOW. | |
*/ | |
/* MR10 Make the key all characters so it can be read easily */ | |
/* MR10 by a simple dump program. Also, separates */ | |
/* MR10 'o' and 'i' from rule name */ | |
char * | |
#ifdef __USE_PROTOS | |
Fkey( char *rule, int computation, int k ) | |
#else | |
Fkey( rule, computation, k ) | |
char *rule; | |
int computation; | |
int k; | |
#endif | |
{ | |
static char key[MaxRuleName+2+2+1]; /* MR10 */ | |
int i; | |
if ( k > 99 ) /* MR10 */ | |
fatal("k>99 is too big for this implementation of ANTLR!\n"); /* MR10 */ | |
if ( (i=strlen(rule)) > MaxRuleName ) /* MR10 */ | |
fatal( eMsgd("rule name > max of %d\n", MaxRuleName) ); /* MR10 */ | |
strcpy(key,rule); | |
/* MR10 */ key[i]='*'; | |
/* MR10 */ key[i+1] = (char) computation; /* MR20 G. Hobbelt */ | |
/* MR10 */ if (k < 10) { | |
/* MR10 */ key[i+2] = (char) ( '0' + k); | |
/* MR10 */ key[i+3] = '\0'; | |
/* MR10 */ } else { | |
/* MR10 */ key[i+2] = (char) ( '0' + k/10); | |
/* MR10 */ key[i+3] = (char) ( '0' + k % 10); | |
/* MR10 */ key[i+4] = '\0'; | |
/* MR10 */ }; | |
return key; | |
} | |
/* Push a rule onto the kth FOLLOW stack */ | |
void | |
#ifdef __USE_PROTOS | |
FoPush( char *rule, int k ) | |
#else | |
FoPush( rule, k ) | |
char *rule; | |
int k; | |
#endif | |
{ | |
RuleEntry *r; | |
require(rule!=NULL, "FoPush: tried to push NULL rule"); | |
require(k<=CLL_k, "FoPush: tried to access non-existent stack"); | |
/*fprintf(stderr, "FoPush(%s)\n", rule);*/ | |
r = (RuleEntry *) hash_get(Rname, rule); | |
if ( r == NULL ) {fatal_internal( eMsg1("rule %s must be defined but isn't", rule) );} | |
if ( FoStack[k] == NULL ) /* Does the kth stack exist yet? */ | |
{ | |
/*fprintf(stderr, "allocating FoStack\n");*/ | |
FoStack[k] = (int *) calloc(FoStackSize, sizeof(int)); | |
require(FoStack[k]!=NULL, "FoPush: cannot allocate FOLLOW stack\n"); | |
} | |
if ( FoTOS[k] == NULL ) | |
{ | |
FoTOS[k]=FoStack[k]; | |
*(FoTOS[k]) = r->rulenum; | |
} | |
else | |
{ | |
#ifdef MEMCHK | |
require(valid(FoStack[k]), "FoPush: invalid FoStack"); | |
#endif | |
if ( FoTOS[k] >= &(FoStack[k][FoStackSize-1]) ) | |
fatal( eMsgd("exceeded max depth of FOLLOW recursion (%d)\n", | |
FoStackSize) ); | |
require(FoTOS[k]>=FoStack[k], | |
eMsg1("FoPush: FoStack stack-ptr is playing out of its sandbox", | |
rule)); | |
++(FoTOS[k]); | |
*(FoTOS[k]) = r->rulenum; | |
} | |
{ | |
/* | |
**** int *p; | |
**** fprintf(stderr, "FoStack[k=%d]:\n", k); | |
**** for (p=FoStack[k]; p<=FoTOS[k]; p++) | |
**** { | |
**** fprintf(stderr, "\t%s\n", RulePtr[*p]->rname); | |
**** } | |
*/ | |
} | |
} | |
/* Pop one rule off of the FOLLOW stack. TOS ptr is NULL if empty. */ | |
void | |
#ifdef __USE_PROTOS | |
FoPop( int k ) | |
#else | |
FoPop( k ) | |
int k; | |
#endif | |
{ | |
require(k<=CLL_k, "FoPop: tried to access non-existent stack"); | |
/*fprintf(stderr, "FoPop\n");*/ | |
require(FoTOS[k]>=FoStack[k]&&FoTOS[k]<=&(FoStack[k][FoStackSize-1]), | |
"FoPop: FoStack stack-ptr is playing out of its sandbox"); | |
if ( FoTOS[k] == FoStack[k] ) FoTOS[k] = NULL; | |
else (FoTOS[k])--; | |
} | |
/* Compute FOLLOW cycle. | |
* Mark all FOLLOW sets for rules in cycle as incomplete. | |
* Then, save cycle on the cycle list (Cycles) for later resolution. | |
* The Cycle is stored in the form: | |
* (head of cycle==croot, rest of rules in cycle==cyclicDep) | |
* | |
* e.g. (Fo means "FOLLOW of", "-->" means requires or depends on) | |
* | |
* Fo(x)-->Fo(a)-->Fo(b)-->Fo(c)-->Fo(x) | |
* ^----Infinite recursion (cycle) | |
* | |
* the cycle would be: x -> {a,b,c} or stored as (x,{a,b,c}). Fo(x) depends | |
* on the FOLLOW of a,b, and c. The root of a cycle is always complete after | |
* Fo(x) finishes. Fo(a,b,c) however are not. It turns out that all rules | |
* in a FOLLOW cycle have the same FOLLOW set. | |
*/ | |
void | |
#ifdef __USE_PROTOS | |
RegisterCycle( char *rule, int k ) | |
#else | |
RegisterCycle( rule, k ) | |
char *rule; | |
int k; | |
#endif | |
{ | |
CacheEntry *f; | |
Cycle *c; | |
int *p; | |
RuleEntry *r; | |
require(rule!=NULL, "RegisterCycle: tried to register NULL rule"); | |
require(k<=CLL_k, "RegisterCycle: tried to access non-existent stack"); | |
/*fprintf(stderr, "RegisterCycle(%s)\n", rule);*/ | |
/* Find cycle start */ | |
r = (RuleEntry *) hash_get(Rname, rule); | |
require(r!=NULL,eMsg1("rule %s must be defined but isn't", rule)); | |
require(FoTOS[k]>=FoStack[k]&&FoTOS[k]<=&(FoStack[k][FoStackSize-1]), | |
eMsg1("RegisterCycle(%s): FoStack stack-ptr is playing out of its sandbox", | |
rule)); | |
/*** if ( FoTOS[k]<FoStack[k]||FoTOS[k]>&(FoStack[k][FoStackSize-1]) ) | |
**** { | |
**** fprintf(stderr, "RegisterCycle(%s): FoStack stack-ptr is playing out of its sandbox\n", | |
**** rule); | |
**** fprintf(stderr, "RegisterCycle: sp==0x%x out of bounds 0x%x...0x%x\n", | |
**** FoTOS[k], FoStack[k], &(FoStack[k][FoStackSize-1])); | |
**** exit(PCCTS_EXIT_FAILURE); | |
**** } | |
****/ | |
#ifdef MEMCHK | |
require(valid(FoStack[k]), "RegisterCycle: invalid FoStack"); | |
#endif | |
for (p=FoTOS[k]; *p != r->rulenum && p >= FoStack[k]; --p) {;} | |
require(p>=FoStack[k], "RegisterCycle: FoStack is screwed up beyond belief"); | |
if ( p == FoTOS[k] ) return; /* don't worry about cycles to oneself */ | |
/* compute cyclic dependents (rules in cycle except head) */ | |
c = newCycle; | |
require(c!=NULL, "RegisterCycle: couldn't alloc new cycle"); | |
c->cyclicDep = empty; | |
c->croot = *p++; /* record root of cycle */ | |
for (; p<=FoTOS[k]; p++) | |
{ | |
/* Mark all dependent rules as incomplete */ | |
f = (CacheEntry *) hash_get(Fcache, Fkey(RulePtr[*p]->rname,'o',k)); | |
if ( f==NULL ) | |
{ | |
f = newCacheEntry( Fkey(RulePtr[*p]->rname,'o',k) ); | |
hash_add(Fcache, Fkey(RulePtr[*p]->rname,'o',k), (Entry *)f); | |
} | |
f->incomplete = TRUE; | |
set_orel(*p, &(c->cyclicDep)); /* mark rule as dependent of croot */ | |
} | |
list_add(&(Cycles[k]), (void *)c); | |
} | |
/* make all rules in cycle complete | |
* | |
* while ( some set has changed ) do | |
* for each cycle do | |
* if degree of FOLLOW set for croot > old degree then | |
* update all FOLLOW sets for rules in cyclic dependency | |
* change = TRUE | |
* endif | |
* endfor | |
* endwhile | |
*/ | |
void | |
#ifdef __USE_PROTOS | |
ResolveFoCycles( int k ) | |
#else | |
ResolveFoCycles( k ) | |
int k; | |
#endif | |
{ | |
ListNode *p, *q; | |
Cycle *c; | |
int changed = 1; | |
CacheEntry *f,*g; | |
int r; | |
/* int i; */ /* MR10 not useful */ | |
unsigned d; | |
unsigned *cursor; /* MR10 */ | |
unsigned *origin; /* MR10 */ | |
/*fprintf(stderr, "Resolving following cycles for %d\n", k);*/ | |
while ( changed ) | |
{ | |
changed = 0; | |
/* MR10 i = 0; */ | |
for (p = Cycles[k]->next; p!=NULL; p=p->next) | |
{ | |
c = (Cycle *) p->elem; | |
/*fprintf(stderr, "cycle %d: %s -->", i++, RulePtr[c->croot]->rname);*/ | |
/*s_fprT(stderr, c->cyclicDep);*/ | |
/*fprintf(stderr, "\n");*/ | |
f = (CacheEntry *) | |
hash_get(Fcache, Fkey(RulePtr[c->croot]->rname,'o',k)); | |
require(f!=NULL, eMsg1("FOLLOW(%s) must be in cache but isn't", RulePtr[c->croot]->rname) ); | |
if ( (d=set_deg(f->fset)) > c->deg ) | |
{ | |
/*fprintf(stderr, "Fo(%s) has changed\n", RulePtr[c->croot]->rname);*/ | |
changed = 1; | |
c->deg = d; /* update cycle FOLLOW set degree */ | |
/* MR10 */ origin=set_pdq(c->cyclicDep); | |
/* MR10 */ for (cursor=origin; *cursor != nil; cursor++) { | |
/* MR10 */ r=*cursor; | |
/******** while ( !set_nil(c->cyclicDep) ) { *****/ | |
/******** r = set_int(c->cyclicDep); *****/ | |
/******** set_rm(r, c->cyclicDep); *****/ | |
/*fprintf(stderr, "updating Fo(%s)\n", RulePtr[r]->rname);*/ | |
g = (CacheEntry *) | |
hash_get(Fcache, Fkey(RulePtr[r]->rname,'o',k)); | |
require(g!=NULL, eMsg1("FOLLOW(%s) must be in cache but isn't", RulePtr[r]->rname) ); | |
set_orin(&(g->fset), f->fset); | |
g->incomplete = FALSE; | |
} | |
/* MR10 */ free( (char *) origin); | |
/* MR10 */ origin=NULL; | |
} | |
} | |
/* MR10 - this if statement appears to be meaningless since i is always 0 */ | |
/* MR10 if ( i == 1 ) changed = 0; */ /* if only 1 cycle, no need to repeat */ | |
} | |
/* kill Cycle list */ | |
for (q = Cycles[k]->next; q != NULL; q=p) | |
{ | |
p = q->next; | |
set_free( ((Cycle *)q->elem)->cyclicDep ); | |
free((char *)q); | |
} | |
free( (char *)Cycles[k] ); | |
Cycles[k] = NULL; | |
} | |
/* P r i n t i n g S y n t a x D i a g r a m s */ | |
static void | |
#ifdef __USE_PROTOS | |
pBlk( Junction *q, int btype ) | |
#else | |
pBlk( q, btype ) | |
Junction *q; | |
int btype; | |
#endif | |
{ | |
int k,a; | |
Junction *alt, *p; | |
q->end->pvisited = TRUE; | |
if ( btype == aLoopBegin ) | |
{ | |
require(q->p2!=NULL, "pBlk: invalid ()* block"); | |
PRINT(q->p1); | |
alt = (Junction *)q->p2; | |
PRINT(alt->p1); | |
if ( PrintAnnotate ) | |
{ | |
printf(" /* Opt "); | |
k = 1; | |
while ( !set_nil(alt->fset[k]) ) | |
{ | |
s_fprT(stdout, alt->fset[k]); | |
if ( k++ == CLL_k ) break; | |
if ( !set_nil(alt->fset[k]) ) printf(", "); | |
} | |
printf(" */\n"); | |
} | |
return; | |
} | |
for (a=1,alt=q; alt != NULL; alt= (Junction *) alt->p2, a++) | |
{ | |
if ( alt->p1 != NULL ) PRINT(alt->p1); | |
if ( PrintAnnotate ) | |
{ | |
printf( " /* [%d] ", alt->altnum); | |
k = 1; | |
while ( !set_nil(alt->fset[k]) ) | |
{ | |
s_fprT(stdout, alt->fset[k]); | |
if ( k++ == CLL_k ) break; | |
if ( !set_nil(alt->fset[k]) ) printf(", "); | |
} | |
if ( alt->p2 == NULL && btype == aOptBlk ) | |
printf( " (optional branch) */\n"); | |
else printf( " */\n"); | |
} | |
/* ignore implied empty alt of Plus blocks */ | |
if ( alt->p2 != NULL && ((Junction *)alt->p2)->ignore ) break; | |
if ( alt->p2 != NULL && !(((Junction *)alt->p2)->p2==NULL && btype == aOptBlk) ) | |
{ | |
if ( pLevel == 1 ) | |
{ | |
printf("\n"); | |
if ( a+1==pAlt1 || a+1==pAlt2 ) printf("=>"); | |
printf("\t"); | |
} | |
else printf(" "); | |
printf("|"); | |
if ( pLevel == 1 ) | |
{ | |
p = (Junction *) ((Junction *)alt->p2)->p1; | |
while ( p!=NULL ) | |
{ | |
if ( p->ntype==nAction ) | |
{ | |
p=(Junction *)((ActionNode *)p)->next; | |
continue; | |
} | |
if ( p->ntype!=nJunction ) | |
{ | |
break; | |
} | |
if ( p->jtype==EndBlk || p->jtype==EndRule ) | |
{ | |
p = NULL; | |
break; | |
} | |
p = (Junction *)p->p1; | |
} | |
if ( p==NULL ) printf("\n\t"); /* Empty alt? */ | |
} | |
} | |
} | |
q->end->pvisited = FALSE; | |
} | |
/* How to print out a junction */ | |
void | |
#ifdef __USE_PROTOS | |
pJunc( Junction *q ) | |
#else | |
pJunc( q ) | |
Junction *q; | |
#endif | |
{ | |
int dum_k; | |
int doing_rule; | |
require(q!=NULL, "pJunc: NULL node"); | |
require(q->ntype==nJunction, "pJunc: not junction"); | |
if ( q->pvisited == TRUE ) return; | |
q->pvisited = TRUE; | |
switch ( q->jtype ) | |
{ | |
case aSubBlk : | |
if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k); | |
if ( q->end->p1 != NULL && ((Junction *)q->end->p1)->ntype==nJunction && | |
((Junction *)q->end->p1)->jtype == EndRule ) doing_rule = 1; | |
else doing_rule = 0; | |
pLevel++; | |
if ( pLevel==1 ) | |
{ | |
if ( pAlt1==1 ) printf("=>"); | |
printf("\t"); | |
} | |
else printf(" "); | |
if ( doing_rule ) | |
{ | |
if ( pLevel==1 ) printf(" "); | |
pBlk(q,q->jtype); | |
} | |
else { | |
printf("("); | |
if ( pLevel==1 ) printf(" "); | |
pBlk(q,q->jtype); | |
if ( pLevel>1 ) printf(" "); | |
printf(")"); | |
} | |
if ( q->guess ) printf("?"); | |
pLevel--; | |
if ( PrintAnnotate ) freeBlkFsets(q); | |
if ( q->end->p1 != NULL ) PRINT(q->end->p1); | |
break; | |
case aOptBlk : | |
if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k); | |
pLevel++; | |
if ( pLevel==1 ) | |
{ | |
if ( pAlt1==1 ) printf("=>"); | |
printf("\t"); | |
} | |
else printf(" "); | |
printf("{"); | |
if ( pLevel==1 ) printf(" "); | |
pBlk(q,q->jtype); | |
if ( pLevel>1 ) printf(" "); | |
else printf("\n\t"); | |
printf("}"); | |
pLevel--; | |
if ( PrintAnnotate ) freeBlkFsets(q); | |
if ( q->end->p1 != NULL ) PRINT(q->end->p1); | |
break; | |
case aLoopBegin : | |
if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k); | |
pLevel++; | |
if ( pLevel==1 ) | |
{ | |
if ( pAlt1==1 ) printf("=>"); | |
printf("\t"); | |
} | |
else printf(" "); | |
printf("("); | |
if ( pLevel==1 ) printf(" "); | |
pBlk(q,q->jtype); | |
if ( pLevel>1 ) printf(" "); | |
else printf("\n\t"); | |
printf(")*"); | |
pLevel--; | |
if ( PrintAnnotate ) freeBlkFsets(q); | |
if ( q->end->p1 != NULL ) PRINT(q->end->p1); | |
break; | |
case aLoopBlk : | |
if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k); | |
pBlk(q,q->jtype); | |
if ( PrintAnnotate ) freeBlkFsets(q); | |
break; | |
case aPlusBlk : | |
if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k); | |
pLevel++; | |
if ( pLevel==1 ) | |
{ | |
if ( pAlt1==1 ) printf("=>"); | |
printf("\t"); | |
} | |
else printf(" "); | |
printf("("); | |
if ( pLevel==1 ) printf(" "); | |
pBlk(q,q->jtype); | |
if ( pLevel>1 ) printf(" "); | |
printf(")+"); | |
pLevel--; | |
if ( PrintAnnotate ) freeBlkFsets(q); | |
if ( q->end->p1 != NULL ) PRINT(q->end->p1); | |
break; | |
case EndBlk : | |
break; | |
case RuleBlk : | |
printf( "\n%s :\n", q->rname); | |
PRINT(q->p1); | |
if ( q->p2 != NULL ) PRINT(q->p2); | |
break; | |
case Generic : | |
if ( q->p1 != NULL ) PRINT(q->p1); | |
q->pvisited = FALSE; | |
if ( q->p2 != NULL ) PRINT(q->p2); | |
break; | |
case EndRule : | |
printf( "\n\t;\n"); | |
break; | |
} | |
q->pvisited = FALSE; | |
} | |
/* How to print out a rule reference node */ | |
void | |
#ifdef __USE_PROTOS | |
pRuleRef( RuleRefNode *p ) | |
#else | |
pRuleRef( p ) | |
RuleRefNode *p; | |
#endif | |
{ | |
require(p!=NULL, "pRuleRef: NULL node"); | |
require(p->ntype==nRuleRef, "pRuleRef: not rule ref node"); | |
printf( " %s", p->text); | |
PRINT(p->next); | |
} | |
/* How to print out a terminal node */ | |
void | |
#ifdef __USE_PROTOS | |
pToken( TokNode *p ) | |
#else | |
pToken( p ) | |
TokNode *p; | |
#endif | |
{ | |
require(p!=NULL, "pToken: NULL node"); | |
require(p->ntype==nToken, "pToken: not token node"); | |
if ( p->wild_card ) printf(" ."); | |
printf( " %s", TerminalString(p->token)); | |
PRINT(p->next); | |
} | |
/* How to print out a terminal node */ | |
void | |
#ifdef __USE_PROTOS | |
pAction( ActionNode *p ) | |
#else | |
pAction( p ) | |
ActionNode *p; | |
#endif | |
{ | |
require(p!=NULL, "pAction: NULL node"); | |
require(p->ntype==nAction, "pAction: not action node"); | |
PRINT(p->next); | |
} | |
/* F i l l F o l l o w L i s t s */ | |
/* | |
* Search all rules for all rule reference nodes, q to rule, r. | |
* Add q->next to follow list dangling off of rule r. | |
* i.e. | |
* | |
* r: -o-R-o-->o--> Ptr to node following rule r in another rule | |
* | | |
* o--> Ptr to node following another reference to r. | |
* | |
* This is the data structure employed to avoid FOLLOW set computation. We | |
* simply compute the FIRST (reach) of the EndRule Node which follows the | |
* list found at the end of all rules which are referenced elsewhere. Rules | |
* not invoked by other rules have no follow list (r->end->p1==NULL). | |
* Generally, only start symbols are not invoked by another rule. | |
* | |
* Note that this mechanism also gives a free cross-reference mechanism. | |
* | |
* The entire syntax diagram is layed out like this: | |
* | |
* SynDiag | |
* | | |
* v | |
* o-->R1--o | |
* | | |
* o-->R2--o | |
* | | |
* ... | |
* | | |
* o-->Rn--o | |
* | |
*/ | |
void | |
#ifdef __USE_PROTOS | |
FoLink( Node *p ) | |
#else | |
FoLink( p ) | |
Node *p; | |
#endif | |
{ | |
RuleEntry *q; | |
Junction *j = (Junction *) p; | |
RuleRefNode *r = (RuleRefNode *) p; | |
if ( p==NULL ) return; | |
require(p->ntype>=1 && p->ntype<=NumNodeTypes, | |
eMsgd("FoLink: invalid diagram node: ntype==%d",p->ntype)); | |
switch ( p->ntype ) | |
{ | |
case nJunction : | |
if ( j->fvisited ) return; | |
if ( j->jtype == EndRule ) return; | |
j->fvisited = TRUE; | |
FoLink( j->p1 ); | |
FoLink( j->p2 ); | |
/* MR14 */ | |
/* MR14 */ /* Need to determine whether the guess block is an */ | |
/* MR14 */ /* of the form (alpha)? beta before follow sets are */ | |
/* MR14 */ /* computed. This is necessary to solve problem */ | |
/* MR14 */ /* of doing follow on the alpha of an (alpha)? beta block. */ | |
/* MR14 */ | |
/* MR14 */ /* This is performed by analysis_point as a side-effect. */ | |
/* MR14 */ | |
/* MR14 */ | |
/* MR14 */ if (j->jtype == aSubBlk && j->guess) { | |
/* MR14 */ Junction *ignore; | |
/* MR14 */ ignore=analysis_point(j); | |
/* MR14 */ } | |
/* MR14 */ | |
return; | |
case nRuleRef : | |
if ( r->linked ) return; | |
q = (RuleEntry *) hash_get(Rname, r->text); | |
if ( q == NULL ) | |
{ | |
warnFL( eMsg1("rule %s not defined",r->text), FileStr[r->file], r->line ); | |
} | |
else | |
{ | |
if ( r->parms!=NULL && RulePtr[q->rulenum]->pdecl==NULL ) | |
{ | |
warnFL( eMsg1("rule %s accepts no parameter(s)", r->text), | |
FileStr[r->file], r->line ); | |
} | |
if ( r->parms==NULL && RulePtr[q->rulenum]->pdecl!=NULL ) | |
{ | |
warnFL( eMsg1("rule %s requires parameter(s)", r->text), | |
FileStr[r->file], r->line ); | |
} | |
if ( r->assign!=NULL && RulePtr[q->rulenum]->ret==NULL ) | |
{ | |
warnFL( eMsg1("rule %s yields no return value(s)", r->text), | |
FileStr[r->file], r->line ); | |
} | |
if ( r->assign==NULL && RulePtr[q->rulenum]->ret!=NULL ) | |
{ | |
warnFL( eMsg1("rule %s returns a value(s)", r->text), | |
FileStr[r->file], r->line ); | |
} | |
if ( !r->linked ) | |
{ | |
addFoLink( r->next, r->rname, RulePtr[q->rulenum] ); | |
r->linked = TRUE; | |
} | |
} | |
FoLink( r->next ); | |
return; | |
case nToken : | |
FoLink( ((TokNode *)p)->next ); | |
return; | |
case nAction : | |
FoLink( ((ActionNode *)p)->next ); | |
return; | |
default : | |
fatal_internal("invalid node type"); | |
} | |
} | |
/* | |
* Add a reference to the end of a rule. | |
* | |
* 'r' points to the RuleBlk node in a rule. r->end points to the last node | |
* (EndRule jtype) in a rule. | |
* | |
* Initial: | |
* r->end --> o | |
* | |
* After: | |
* r->end --> o-->o--> Ptr to node following rule r in another rule | |
* | | |
* o--> Ptr to node following another reference to r. | |
* | |
* Note that the links are added to the head of the list so that r->end->p1 | |
* always points to the most recently added follow-link. At the end, it should | |
* point to the last reference found in the grammar (starting from the 1st rule). | |
*/ | |
void | |
#ifdef __USE_PROTOS | |
addFoLink( Node *p, char *rname, Junction *r ) | |
#else | |
addFoLink( p, rname, r ) | |
Node *p; | |
char *rname; | |
Junction *r; | |
#endif | |
{ | |
Junction *j; | |
require(r!=NULL, "addFoLink: incorrect rule graph"); | |
require(r->end!=NULL, "addFoLink: incorrect rule graph"); | |
require(r->end->jtype==EndRule, "addFoLink: incorrect rule graph"); | |
require(p!=NULL, "addFoLink: NULL FOLLOW link"); | |
j = newJunction(); | |
j->rname = rname; /* rname on follow links point to target rule */ | |
j->p1 = p; /* link to other rule */ | |
j->p2 = (Node *) r->end->p1;/* point to head of list */ | |
r->end->p1 = (Node *) j; /* reset head to point to new node */ | |
} | |
void | |
#ifdef __USE_PROTOS | |
GenCrossRef( Junction *p ) | |
#else | |
GenCrossRef( p ) | |
Junction *p; | |
#endif | |
{ | |
set a; | |
Junction *j; | |
RuleEntry *q; | |
unsigned e; | |
require(p!=NULL, "GenCrossRef: why are you passing me a null grammar?"); | |
printf("Cross Reference:\n\n"); | |
a = empty; | |
for (; p!=NULL; p = (Junction *)p->p2) | |
{ | |
printf("Rule %20s referenced by {", p->rname); | |
/* make a set of rules for uniqueness */ | |
for (j = (Junction *)(p->end)->p1; j!=NULL; j = (Junction *)j->p2) | |
{ | |
q = (RuleEntry *) hash_get(Rname, j->rname); | |
require(q!=NULL, "GenCrossRef: FoLinks are screwed up"); | |
set_orel(q->rulenum, &a); | |
} | |
for (; !set_nil(a); set_rm(e, a)) | |
{ | |
e = set_int(a); | |
printf(" %s", RulePtr[e]->rname); | |
} | |
printf(" }\n"); | |
} | |
set_free( a ); | |
} | |
/* | |
The single argument is a pointer to the start of an element of a | |
C++ style function prototypet list. Given a pointer to the start of | |
an formal we must locate the comma (or the end of the string) | |
and locate the datatype, formal name, and initial value expression. | |
The function returns a pointer to the character following the comma | |
which terminates the formal declaration, or a pointer to the end of | |
the string if none was found. | |
I thought we were parsing specialists, how come I'm doing this by | |
hand written code ? | |
Examples of input: | |
Foo f, | |
Foo f = Foo(1), | |
Foo f = Foo(1,2), | |
Foo f = &farray[1,2], | |
Foo f = ",", | |
Foo f = ',', | |
TFoo<int,char> f = TFoo<int,char>(1,2), | |
A non-zero value for nesting indicates a problem matching '(' and ')', | |
'[' and ']', '<' and '>', '{' and '}', or improperly terminated string | |
or character literal. | |
*/ | |
/* | |
* Don't care if it is a valid string literal or not, just find the end | |
* Start with pointer to leading "\"" | |
*/ | |
#ifdef __USE_PROTOS | |
char * skipStringLiteral(char *pCurrent) | |
#else | |
char * skipStringLiteral(pCurrent) | |
char *pCurrent; | |
#endif | |
{ | |
char *p = pCurrent; | |
if (*p == 0) return p; | |
require (*p == '\"', "skipStringLiteral") | |
p++; | |
for (p = p; *p != 0; p++) { | |
if (*p == '\\') { | |
p++; | |
if (*p == 0) break; | |
p++; | |
} | |
if (*p == '\"') { | |
p++; | |
break; | |
} | |
} | |
return p; | |
} | |
/* | |
* Don't care if it is a valid character literal or not, just find the end | |
* Start with pointer to leading "'" | |
*/ | |
#ifdef __USE_PROTOS | |
char * skipCharLiteral(char *pStart) | |
#else | |
char * skipCharLiteral(pStart) | |
char *pStart; | |
#endif | |
{ | |
char *p = pStart; | |
if (*p == 0) return p; | |
require (*p == '\'', "skipCharLiteral") | |
p++; | |
for (p = p; *p != 0; p++) { | |
if (*p == '\\') { | |
p++; | |
if (*p == 0) break; | |
p++; | |
} | |
if (*p == '\'') { | |
p++; | |
break; | |
} | |
} | |
return p; | |
} | |
#ifdef __USE_PROTOS | |
char * skipSpaces(char *pStart) | |
#else | |
char * skipSpaces(pStart) | |
char * pStart; | |
#endif | |
{ | |
char *p = pStart; | |
while (*p != 0 && isspace(*p)) p++; | |
return p; | |
} | |
#ifdef __USE_PROTOS | |
char * skipToSeparatorOrEqualSign(char *pStart, int *pNest) | |
#else | |
char * skipToSeparatorOrEqualSign(pStart, pNest) | |
char *pStart; | |
int *pNest; | |
#endif | |
{ | |
char *p = pStart; | |
int nest = 0; | |
*pNest = (-1); | |
while (*p != 0) { | |
switch (*p) { | |
case '(' : | |
case '[' : | |
case '<' : | |
case '{' : | |
nest++; | |
p++; | |
break; | |
case ')' : | |
case ']' : | |
case '>' : | |
case '}' : | |
nest--; | |
p++; | |
break; | |
case '"' : | |
p = skipStringLiteral(p); | |
break; | |
case '\'' : | |
p = skipCharLiteral(p); | |
break; | |
case '\\': | |
p++; | |
if (*p == 0) goto EXIT; | |
p++; | |
break; | |
case ',': | |
case '=': | |
if (nest == 0) goto EXIT; | |
p++; | |
break; | |
default: | |
p++; | |
} | |
} | |
EXIT: | |
*pNest = nest; | |
return p; | |
} | |
#ifdef __USE_PROTOS | |
char * skipToSeparator(char *pStart, int *pNest) | |
#else | |
char * skipToSeparator(pStart, pNest) | |
char *pStart; | |
int *pNest; | |
#endif | |
{ | |
char * p = pStart; | |
for ( ; ; ) { | |
p = skipToSeparatorOrEqualSign(p, pNest); | |
if (*pNest != 0) return p; | |
if (*p == ',') return p; | |
if (*p == 0) return p; | |
p++; | |
} | |
} | |
/* skip to just past the "=" separating the declaration from the initialization value */ | |
#ifdef __USE_PROTOS | |
char * getInitializer(char *pStart) | |
#else | |
char * getInitializer(pStart) | |
char * pStart; | |
#endif | |
{ | |
char *p; | |
char *pDataType; | |
char *pSymbol; | |
char *pEqualSign; | |
char *pValue; | |
char *pSeparator; | |
int nest = 0; | |
require(pStart!=NULL, "getInitializer: invalid string"); | |
p = endFormal(pStart, | |
&pDataType, | |
&pSymbol, | |
&pEqualSign, | |
&pValue, | |
&pSeparator, | |
&nest); | |
if (nest != 0) return NULL; | |
if (pEqualSign == NULL) return NULL; | |
if (pValue == NULL) return NULL; | |
return strBetween(pValue, NULL, pSeparator); | |
} | |
/* | |
Examines the string from pStart to pEnd-1. | |
If the string has 0 length or is entirely white space | |
returns 1. Otherwise 0. | |
*/ | |
#ifdef __USE_PROTOS | |
int isWhiteString(const char *pStart, const char *pEnd) | |
#else | |
int isWhiteString(pStart, pEnd) | |
const char *pStart; | |
const char *pEnd; | |
#endif | |
{ | |
const char *p; | |
for (p = pStart; p < pEnd; p++) { | |
if (! isspace(*p)) return 0; | |
} | |
return 1; | |
} | |
/* | |
This replaces HasComma() which couldn't distinguish | |
foo ["a,b"] | |
from: | |
foo[a,b] | |
*/ | |
#ifdef __USE_PROTOS | |
int hasMultipleOperands(char *pStart) | |
#else | |
int hasMultipleOperands(pStart) | |
char *pStart; | |
#endif | |
{ | |
char *p = pStart; | |
int nest = 0; | |
p = skipSpaces(p); | |
if (*p == 0) return 0; | |
p = skipToSeparator(p, &nest); | |
if (nest == 0 && *p == ',') return 1; | |
return 0; | |
} | |
#define MAX_STR_BETWEEN_WORK_AREA 1000 | |
static char strBetweenWorkArea[MAX_STR_BETWEEN_WORK_AREA]; | |
/* | |
strBetween(pStart, pNext, pStop) | |
Creates a null terminated string by copying the text between two pointers | |
to a work area. The start of the string is pStart. The end of the string | |
is the character before pNext, or if pNext is null then the character before | |
pStop. Trailing spaces are not included in the copy operation. | |
This is used when a string contains several parts. The pNext part may be | |
optional. The pStop will stop the scan when the optional part is not present | |
(is a null pointer). | |
*/ | |
#ifdef __USE_PROTOS | |
char *strBetween(char *pStart, char *pNext, char *pStop) | |
#else | |
char *strBetween(pStart, pNext, pStop) | |
char *pStart; | |
char *pNext; | |
char *pStop; | |
#endif | |
{ | |
char *p; | |
char *q = strBetweenWorkArea; | |
const char *pEnd; | |
pEnd = (pNext != NULL) ? pNext : pStop; | |
require (pEnd != NULL, "pEnd == NULL"); | |
require (pEnd >= pStart, "pEnd < pStart"); | |
for (pEnd--; pEnd >= pStart; pEnd--) { /* MR31 */ | |
if (! isspace(*pEnd)) break; | |
} | |
for (p = pStart; | |
p <= pEnd && q < &strBetweenWorkArea[MAX_STR_BETWEEN_WORK_AREA-2]; | |
p++, q++) { | |
*q = *p; | |
} | |
*q = 0; | |
return strBetweenWorkArea; | |
} | |
/* | |
function Returns pointer to character following separator at | |
value which to continue search for next formal. If at the | |
end of the string a pointer to the null byte at the | |
end of the string is returned. | |
pStart Pointer to the starting position of the formal list | |
This may be the middle of a longer string, for example | |
when looking for the end of formal #3 starting from | |
the middle of the complete formal list. | |
ppDataType Returns a pointer to the start of the data type in the | |
formal. Example: pointer to "Foo". | |
ppSymbol Returns a pointer to the start of the formal symbol. | |
Example: pointer to "f". | |
ppEqualSign Returns a pointer to the equal sign separating the | |
formal symbol from the initial value. If there is | |
no "=" then this will be NULL. | |
ppValue Returns a pointer to the initial value part of the | |
formal declaration. Example: pointer to "&farray[1,2]" | |
ppSeparator Returns a pointer to the character which terminated the | |
scan. This should be a pointer to a comma or a null | |
byte which terminates the string. | |
pNest Returns the nesting level when a separator was found. | |
This is non-zero for any kind of error. This is zero | |
for a successful parse of this portion of the formal | |
list. | |
*/ | |
#ifdef __USE_PROTOS | |
char * endFormal(char *pStart, | |
char **ppDataType, | |
char **ppSymbol, | |
char **ppEqualSign, | |
char **ppValue, | |
char **ppSeparator, | |
int *pNest) | |
#else | |
char * endFormal(pStart, | |
ppDataType, | |
ppSymbol, | |
ppEqualSign, | |
ppValue, | |
ppSeparator, | |
pNest) | |
char *pStart; | |
char **ppDataType; | |
char **ppSymbol; | |
char **ppEqualSign; | |
char **ppValue; | |
char **ppSeparator; | |
int *pNest; | |
#endif | |
{ | |
char *p = pStart; | |
char *q; | |
*ppDataType = NULL; | |
*ppSymbol = NULL; | |
*ppEqualSign = NULL; | |
*ppValue = NULL; | |
*ppSeparator = NULL; | |
*pNest = 0; | |
/* The first non-blank is the start of the datatype */ | |
p = skipSpaces(p); | |
if (*p == 0) goto EXIT; | |
*ppDataType = p; | |
/* We are not looking for the symbol, we are looking | |
for the separator that follows the symbol. Then | |
we'll back up. | |
Search for the ',' or '=" or null terminator. | |
*/ | |
p = skipToSeparatorOrEqualSign(p, pNest); | |
if (*pNest != 0) goto EXIT; | |
/* | |
Work backwards to find start of symbol | |
Skip spaces between the end of symbol and separator | |
Assume that there are no spaces in the formal, but | |
there is a space preceding the formal | |
*/ | |
for (q = &p[-1]; q >= *ppDataType; q--) { | |
if (! isspace(*q)) break; | |
} | |
if (q < *ppDataType) goto EXIT; | |
/* | |
MR26 Handle things like: IIR_Bool (IIR_Decl::*constraint)() | |
Backup until we hit the end of a symbol string, then find the | |
start of the symbol string. This wont' work for functions | |
with prototypes, but works for the most common cases. For | |
others, use typedef names. | |
*/ | |
/* MR26 */ for (q = q; q >= *ppDataType; q--) { | |
/* MR26 */ if (isalpha(*q) || isdigit(*q) || *q == '_' || *q == '$') break; | |
/* MR26 */ } | |
/* MR26 */ if (q < *ppDataType) goto EXIT; | |
for (q = q; q >= *ppDataType; q--) { | |
if ( ! (isalpha(*q) || isdigit(*q) || *q == '_' || *q == '$')) break; | |
} | |
*ppSymbol = &q[1]; | |
if (*p == ',' || *p == 0) { | |
*ppSeparator = p; | |
goto EXIT; | |
} | |
*ppEqualSign = p; | |
p = skipSpaces(++p); | |
*ppValue = p; | |
if (*p == 0) goto EXIT; | |
while (*p != 0 && *pNest == 0 && *p != ',') { | |
p = skipToSeparator(p, pNest); | |
} | |
if (*pNest == 0) *ppSeparator = p; | |
EXIT: | |
if (*p == ',') p++; | |
return p; | |
} |