/* Pawn compiler - File input, preprocessing and lexical analysis functions * * Copyright (c) ITB CompuPhase, 1997-2005 * * This software is provided "as-is", without any express or implied warranty. * In no event will the authors be held liable for any damages arising from * the use of this software. * * Permission is granted to anyone to use this software for any purpose, * including commercial applications, and to alter it and redistribute it * freely, subject to the following restrictions: * * 1. The origin of this software must not be misrepresented; you must not * claim that you wrote the original software. If you use this software in * a product, an acknowledgment in the product documentation would be * appreciated but is not required. * 2. Altered source versions must be plainly marked as such, and must not be * misrepresented as being the original software. * 3. This notice may not be removed or altered from any source distribution. * * Version: $Id$ */ #include #include #include #include #include #include #include "sc.h" #if defined LINUX || defined __FreeBSD__ || defined __OpenBSD__ #include #endif #if defined FORTIFY #include "fortify.h" #endif /* flags for litchar() */ #define RAWMODE 1 #define UTF8MODE 2 static cell litchar(const unsigned char **lptr,int flags); static void substallpatterns(unsigned char *line,int buffersize); static int match(char *st,int end); static int alpha(char c); #define SKIPMODE 1 /* bit field in "#if" stack */ #define PARSEMODE 2 /* bit field in "#if" stack */ #define HANDLED_ELSE 4 /* bit field in "#if" stack */ #define SKIPPING (skiplevel>0 && (ifstack[skiplevel-1] & SKIPMODE)==SKIPMODE) static short icomment; /* currently in multiline comment? */ static char ifstack[sCOMP_STACK]; /* "#if" stack */ static short iflevel; /* nesting level if #if/#else/#endif */ static short skiplevel; /* level at which we started skipping (including nested #if .. #endif) */ static unsigned char term_expr[] = ""; static int listline=-1; /* "current line" for the list file */ /* pushstk & popstk * * Uses a LIFO stack to store information. The stack is used by doinclude(), * doswitch() (to hold the state of "swactive") and some other routines. * * Porting note: I made the bold assumption that an integer will not be * larger than a pointer (it may be smaller). That is, the stack element * is typedef'ed as a pointer type, but I also store integers on it. See * SC.H for "stkitem" * * Global references: stack,stkidx,stktop (private to pushstk(), popstk() * and clearstk()) */ static stkitem *stack=NULL; static int stkidx=0,stktop=0; SC_FUNC void pushstk(stkitem val) { assert(stkidx<=stktop); if (stkidx==stktop) { stkitem *newstack; int newsize= (stktop==0) ? 16 : 2*stktop; /* try to resize the stack */ assert(newsize>stktop); newstack=(stkitem*)malloc(newsize*sizeof(stkitem)); if (newstack==NULL) error(102,"parser stack"); /* stack overflow (recursive include?) */ /* swap the stacks */ memcpy(newstack,stack,stkidx*sizeof(stkitem)); free(stack); stack=newstack; stktop=newsize; } /* if */ assert(stkidx'); /* termination character */ lptr++; while (*lptr<=' ' && *lptr!='\0') /* skip whitespace after quote */ lptr++; } else { c='\0'; } /* if */ i=0; while (*lptr!=c && *lptr!='\0' && i0 && name[i-1]<=' ') i--; /* strip trailing whitespace */ assert(i>=0 && i are only read from the list of include directories. */ result=plungefile(name,(c!='>'),TRUE); if (!result && !silent) error(100,name); /* cannot read from ... (fatal error) */ } /* readline * * Reads in a new line from the input file pointed to by "inpf". readline() * concatenates lines that end with a \ with the next line. If no more data * can be read from the file, readline() attempts to pop off the previous file * from the stack. If that fails too, it sets "freading" to 0. * * Global references: inpf,fline,inpfname,freading,icomment (altered) */ static void readline(unsigned char *line) { int i,num,cont; unsigned char *ptr; if (lptr==term_expr) return; num=sLINEMAX; cont=FALSE; do { if (inpf==NULL || pc_eofsrc(inpf)) { if (cont) error(49); /* invalid line continuation */ if (inpf!=NULL && inpf!=inpf_org) pc_closesrc(inpf); i=POPSTK_I(); if (i==-1) { /* All's done; popstk() returns "stack is empty" */ freading=FALSE; *line='\0'; /* when there is nothing more to read, the #if/#else stack should * be empty and we should not be in a comment */ assert(iflevel>=0); if (iflevel>0) error(1,"#endif","-end of file-"); else if (icomment!=0) error(1,"*/","-end of file-"); return; } /* if */ fline=i; fcurrent=(short)POPSTK_I(); icomment=(short)POPSTK_I(); sc_is_utf8=(short)POPSTK_I(); iflevel=(short)POPSTK_I(); skiplevel=iflevel; /* this condition held before including the file */ assert(!SKIPPING); /* idem ditto */ curlibrary=(constvalue *)POPSTK_P(); free(inpfname); /* return memory allocated for the include file name */ inpfname=(char *)POPSTK_P(); inpf=(FILE *)POPSTK_P(); insert_dbgfile(inpfname); setfiledirect(inpfname); listline=-1; /* force a #line directive when changing the file */ } /* if */ if (pc_readsrc(inpf,line,num)==NULL) { *line='\0'; /* delete line */ cont=FALSE; } else { /* check whether to erase leading spaces */ if (cont) { unsigned char *ptr=line; while (*ptr<=' ' && *ptr!='\0') ptr++; if (ptr!=line) memmove(line,ptr,strlen((char*)ptr)+1); } /* if */ cont=FALSE; /* check whether a full line was read */ if (strchr((char*)line,'\n')==NULL && !pc_eofsrc(inpf)) error(75); /* line too long */ /* check if the next line must be concatenated to this line */ if ((ptr=(unsigned char*)strchr((char*)line,'\n'))==NULL) ptr=(unsigned char*)strchr((char*)line,'\r'); if (ptr!=NULL && ptr>line) { assert(*(ptr+1)=='\0'); /* '\n' or '\r' should be last in the string */ while (ptr>line && *ptr<=' ') ptr--; /* skip trailing whitespace */ if (*ptr=='\\') { cont=TRUE; /* set '\a' at the position of '\\' to make it possible to check * for a line continuation in a single line comment (error 49) */ *ptr++='\a'; *ptr='\0'; /* erase '\n' (and any trailing whitespace) */ } /* if */ } /* if */ num-=strlen((char*)line); line+=strlen((char*)line); } /* if */ fline+=1; } while (num>=0 && cont); } /* stripcom * * Replaces all comments from the line by space characters. It updates * a global variable ("icomment") for multiline comments. * * This routine also supports the C++ extension for single line comments. * These comments are started with "//" and end at the end of the line. * * The function also detects (and manages) "documentation comments". The * global variable "icomment" is set to 2 for documentation comments. * * Global references: icomment (private to "stripcom") */ static void stripcom(unsigned char *line) { char c; #if !defined SC_LIGHT #define COMMENT_LIMIT 100 #define COMMENT_MARGIN 40 /* length of the longest word */ char comment[COMMENT_LIMIT+COMMENT_MARGIN]; int commentidx=0; int skipstar=TRUE; static int prev_singleline=FALSE; int singleline=prev_singleline; prev_singleline=FALSE; /* preset */ #endif while (*line){ if (icomment!=0) { if (*line=='*' && *(line+1)=='/') { #if !defined SC_LIGHT if (icomment==2) { assert(commentidx0) insert_docstring(comment); } /* if */ #endif icomment=0; /* comment has ended */ *line=' '; /* replace '*' and '/' characters by spaces */ *(line+1)=' '; line+=2; } else { if (*line=='/' && *(line+1)=='*') error(216); /* nested comment */ #if !defined SC_LIGHT /* collect the comment characters in a string */ if (icomment==2) { if (skipstar && (*line!='\0' && *line<=' ' || *line=='*')) { /* ignore leading whitespace and '*' characters */ } else if (commentidxCOMMENT_LIMIT && *line!='\0' && *line<=' ') { comment[commentidx]='\0'; insert_docstring(comment); commentidx=0; } /* if */ skipstar=FALSE; } /* if */ } /* if */ #endif *line=' '; /* replace comments by spaces */ line+=1; } /* if */ } else { if (*line=='/' && *(line+1)=='*'){ icomment=1; /* start comment */ #if !defined SC_LIGHT /* there must be two "*" behind the slash and then white space */ if (*(line+2)=='*' && *(line+3)<=' ') { /* if we are not in a function, we must attach the previous block * to the global documentation */ if (curfunc==NULL && get_docstring(0)!=NULL) sc_attachdocumentation(NULL); icomment=2; /* documentation comment */ } /* if */ commentidx=0; skipstar=TRUE; #endif *line=' '; /* replace '/' and '*' characters by spaces */ *(line+1)=' '; line+=2; if (icomment==2) *line++=' '; } else if (*line=='/' && *(line+1)=='/'){ /* comment to end of line */ if (strchr((char*)line,'\a')!=NULL) error(49); /* invalid line continuation */ #if !defined SC_LIGHT if (*(line+2)=='/' && *(line+3)<=' ') { /* documentation comment */ char *str=(char*)line+3; char *end; while (*str<=' ' && *str!='\0') str++; /* skip leading whitespace */ if ((end=strrchr(str,'\n'))!=NULL) *end='\0';/* erase trailing '\n' */ /* if there is a disjunct block, we may need to attach the previous * block to the global documentation */ if (!singleline && curfunc==NULL && get_docstring(0)!=NULL) sc_attachdocumentation(NULL); insert_docstring(str); prev_singleline=TRUE; } /* if */ #endif *line++='\n'; /* put "newline" at first slash */ *line='\0'; /* put "zero-terminator" at second slash */ } else { if (*line=='\"' || *line=='\''){ /* leave literals unaltered */ c=*line; /* ending quote, single or double */ line+=1; while ((*line!=c || *(line-1)==sc_ctrlchar) && *line!='\0') line+=1; line+=1; /* skip final quote */ } else { line+=1; } /* if */ } /* if */ } /* if */ } /* while */ #if !defined SC_LIGHT if (icomment==2) { assert(commentidx0) insert_docstring(comment); } /* if */ #endif } /* btoi * * Attempts to interpret a numeric symbol as a boolean value. On success * it returns the number of characters processed (so the line pointer can be * adjusted) and the value is stored in "val". Otherwise it returns 0 and * "val" is garbage. * * A boolean value must start with "0b" */ static int btoi(cell *val,const unsigned char *curptr) { const unsigned char *ptr; *val=0; ptr=curptr; if (*ptr=='0' && *(ptr+1)=='b') { ptr+=2; while (*ptr=='0' || *ptr=='1' || *ptr=='_') { if (*ptr!='_') *val=(*val<<1) | (*ptr-'0'); ptr++; } /* while */ } else { return 0; } /* if */ if (alphanum(*ptr)) /* number must be delimited by non-alphanumeric char */ return 0; else return (int)(ptr-curptr); } /* dtoi * * Attempts to interpret a numeric symbol as a decimal value. On success * it returns the number of characters processed and the value is stored in * "val". Otherwise it returns 0 and "val" is garbage. */ static int dtoi(cell *val,const unsigned char *curptr) { const unsigned char *ptr; *val=0; ptr=curptr; if (!isdigit(*ptr)) /* should start with digit */ return 0; while (isdigit(*ptr) || *ptr=='_') { if (*ptr!='_') *val=(*val*10)+(*ptr-'0'); ptr++; } /* while */ if (alphanum(*ptr)) /* number must be delimited by non-alphanumerical */ return 0; if (*ptr=='.' && isdigit(*(ptr+1))) return 0; /* but a fractional part must not be present */ return (int)(ptr-curptr); } /* htoi * * Attempts to interpret a numeric symbol as a hexadecimal value. On * success it returns the number of characters processed and the value is * stored in "val". Otherwise it return 0 and "val" is garbage. */ static int htoi(cell *val,const unsigned char *curptr) { const unsigned char *ptr; *val=0; ptr=curptr; if (!isdigit(*ptr)) /* should start with digit */ return 0; if (*ptr=='0' && *(ptr+1)=='x') { /* C style hexadecimal notation */ ptr+=2; while (ishex(*ptr) || *ptr=='_') { if (*ptr!='_') { assert(ishex(*ptr)); *val= *val<<4; if (isdigit(*ptr)) *val+= (*ptr-'0'); else *val+= (tolower(*ptr)-'a'+10); } /* if */ ptr++; } /* while */ } else { return 0; } /* if */ if (alphanum(*ptr)) return 0; else return (int)(ptr-curptr); } #if defined __GNUC__ static double pow10(int value) { double res=1.0; while (value>=4) { res*=10000.0; value-=5; } /* while */ while (value>=2) { res*=100.0; value-=2; } /* while */ while (value>=1) { res*=10.0; value-=1; } /* while */ return res; } #endif /* ftoi * * Attempts to interpret a numeric symbol as a rational number, either as * IEEE 754 single/double precision floating point or as a fixed point integer. * On success it returns the number of characters processed and the value is * stored in "val". Otherwise it returns 0 and "val" is unchanged. * * Pawn has stricter definition for rational numbers than most: * o the value must start with a digit; ".5" is not a valid number, you * should write "0.5" * o a period must appear in the value, even if an exponent is given; "2e3" * is not a valid number, you should write "2.0e3" * o at least one digit must follow the period; "6." is not a valid number, * you should write "6.0" */ static int ftoi(cell *val,const unsigned char *curptr) { const unsigned char *ptr; double fnum,ffrac,fmult; unsigned long dnum,dbase; int i, ignore; assert(rational_digits>=0 && rational_digits<9); for (i=0,dbase=1; i0 && !ignore) { error(222); /* number of digits exceeds rational number precision */ ignore=TRUE; } /* if */ } /* if */ ptr++; } /* while */ fnum += ffrac*fmult; /* form the number so far */ if (*ptr=='e') { /* optional fractional part */ int exp,sign; ptr++; if (*ptr=='-') { sign=-1; ptr++; } else { sign=1; } /* if */ if (!isdigit(*ptr)) /* 'e' should be followed by a digit */ return 0; exp=0; while (isdigit(*ptr)) { exp=(exp*10)+(*ptr-'0'); ptr++; } /* while */ #if defined __GNUC__ fmult=pow10(exp*sign); #else fmult=pow(10,exp*sign); #endif fnum *= fmult; dnum *= (unsigned long)(fmult+0.5); } /* if */ /* decide how to store the number */ if (sc_rationaltag==0) { error(70); /* rational number support was not enabled */ *val=0; } else if (rational_digits==0) { /* floating point */ #if PAWN_CELL_SIZE==32 float value=(float)fnum; *val=*((cell *)&value); #if !defined NDEBUG /* I assume that the C/C++ compiler stores "float" values in IEEE 754 * format (as mandated in the ANSI standard). Test this assumption * anyway. * Note: problems have been reported with GCC 3.2.x, version 3.3.x works. */ { float test1 = 0.0, test2 = 50.0, test3 = -50.0; uint32_t bit = 1; /* test 0.0 == all bits 0 */ assert(*(uint32_t*)&test1==0x00000000L); /* test sign & magnitude format */ assert(((*(uint32_t*)&test2) ^ (*(uint32_t*)&test3)) == (bit << (PAWN_CELL_SIZE-1))); /* test a known value */ assert(*(uint32_t*)&test2==0x42480000L); } #endif #elif PAWN_CELL_SIZE==64 *val=*((cell *)&fnum); #else #error Unsupported cell size #endif } else { /* fixed point */ *val=(cell)dnum; } /* if */ return (int)(ptr-curptr); } /* number * * Reads in a number (binary, decimal or hexadecimal). It returns the number * of characters processed or 0 if the symbol couldn't be interpreted as a * number (in this case the argument "val" remains unchanged). This routine * relies on the 'early dropout' implementation of the logical or (||) * operator. * * Note: the routine doesn't check for a sign (+ or -). The - is checked * for at "hier2()" (in fact, it is viewed as an operator, not as a * sign) and the + is invalid (as in K&R C, and unlike ANSI C). */ static int number(cell *val,const unsigned char *curptr) { int i; cell value; if ((i=btoi(&value,curptr))!=0 /* binary? */ || (i=htoi(&value,curptr))!=0 /* hexadecimal? */ || (i=dtoi(&value,curptr))!=0) /* decimal? */ { *val=value; return i; } else { return 0; /* else not a number */ } /* if */ } static void chrcat(char *str,char chr) { str=strchr(str,'\0'); *str++=chr; *str='\0'; } static int preproc_expr(cell *val,int *tag) { int result; int index; cell code_index; char *term; /* Disable staging; it should be disabled already because * expressions may not be cut off half-way between conditional * compilations. Reset the staging index, but keep the code * index. */ if (stgget(&index,&code_index)) { error(57); /* unfinished expression */ stgdel(0,code_index); stgset(FALSE); } /* if */ assert((lptr-pline)<(int)strlen((char*)pline)); /* lptr must point inside the string */ #if !defined NO_DEFINE /* preprocess the string */ substallpatterns(pline,sLINEMAX); assert((lptr-pline)<(int)strlen((char*)pline)); /* lptr must STILL point inside the string */ #endif /* append a special symbol to the string, so the expression * analyzer won't try to read a next line when it encounters * an end-of-line */ assert(strlen((char*)pline)=0); if (iflevel>=sCOMP_STACK) error(102,"Conditional compilation stack"); /* table overflow */ iflevel++; if (SKIPPING) break; /* break out of switch */ skiplevel=iflevel; preproc_expr(&val,NULL); /* get value (or 0 on error) */ ifstack[iflevel-1]=(char)(val ? PARSEMODE : SKIPMODE); check_empty(lptr); break; case tpELSE: case tpELSEIF: ret=CMD_IF; assert(iflevel>=0); if (iflevel==0) { error(26); /* no matching #if */ errorset(sRESET); } else { /* check for earlier #else */ if ((ifstack[iflevel-1] & HANDLED_ELSE)==HANDLED_ELSE) { if (tok==tpELSEIF) error(61); /* #elseif directive may not follow an #else */ else error(60); /* multiple #else directives between #if ... #endif */ errorset(sRESET); } else { assert(iflevel>0); /* if there has been a "parse mode" on this level, set "skip mode", * otherwise, clear "skip mode" */ if ((ifstack[iflevel-1] & PARSEMODE)==PARSEMODE) { /* there has been a parse mode already on this level, so skip the rest */ ifstack[iflevel-1] |= (char)SKIPMODE; } else { /* previous conditions were all FALSE */ if (tok==tpELSEIF) { /* get new expression */ preproc_expr(&val,NULL); /* get value (or 0 on error) */ ifstack[iflevel-1]=(char)(val ? PARSEMODE : SKIPMODE); } else { /* a simple #else, clear skip mode */ ifstack[iflevel-1] &= (char)~SKIPMODE; } /* if */ } /* if */ } /* if */ } /* if */ check_empty(lptr); break; case tpENDIF: ret=CMD_IF; if (iflevel==0){ error(26); /* no matching "#if" */ errorset(sRESET); } else { iflevel--; if (iflevel0) { free(inpfname); inpfname=duplicatestring(pathname); if (inpfname==NULL) error(103); /* insufficient memory */ } /* if */ } /* if */ check_empty(lptr); break; case tpLINE: if (!SKIPPING) { if (lex(&val,&str)!=tNUMBER) error(8); /* invalid/non-constant expression */ fline=(int)val; } /* if */ check_empty(lptr); break; case tpASSERT: if (!SKIPPING && (sc_debug & sCHKBOUNDS)!=0) { for (str=(char*)lptr; *str<=' ' && *str!='\0'; str++) /* nothing */; /* save start of expression */ preproc_expr(&val,NULL); /* get constant expression (or 0 on error) */ if (!val) error(110,str); /* assertion failed */ check_empty(lptr); } /* if */ break; case tpPRAGMA: if (!SKIPPING) { if (lex(&val,&str)==tSYMBOL) { if (strcmp(str,"amxlimit")==0) { preproc_expr(&sc_amxlimit,NULL); } else if (strcmp(str,"codepage")==0) { char name[sNAMEMAX+1]; while (*lptr<=' ' && *lptr!='\0') lptr++; if (*lptr=='"') { lptr=getstring((unsigned char*)name,sizeof name,lptr); } else { int i; for (i=0; i9) { error(68); /* invalid rational number precision */ digits=0; } /* if */ if (*lptr==')') lptr++; } /* if */ /* add the tag (make it public) and check the values */ i=pc_addtag(name); exporttag(i); if (sc_rationaltag==0 || (sc_rationaltag==i && rational_digits==(int)digits)) { sc_rationaltag=i; rational_digits=(int)digits; } else { error(69); /* rational number format already set, can only be set once */ } /* if */ } else if (strcmp(str,"semicolon")==0) { cell val; preproc_expr(&val,NULL); sc_needsemicolon=(int)val; } else if (strcmp(str,"tabsize")==0) { cell val; preproc_expr(&val,NULL); sc_tabsize=(int)val; } else if (strcmp(str,"align")==0) { sc_alignnext=TRUE; } else if (strcmp(str,"unused")==0) { char name[sNAMEMAX+1]; int i,comma; symbol *sym; do { /* get the name */ while (*lptr<=' ' && *lptr!='\0') lptr++; for (i=0; iusage |= uREAD; if (sym->ident==iVARIABLE || sym->ident==iREFERENCE || sym->ident==iARRAY || sym->ident==iREFARRAY) sym->usage |= uWRITTEN; } else { error(17,name); /* undefined symbol */ } /* if */ /* see if a comma follows the name */ while (*lptr<=' ' && *lptr!='\0') lptr++; comma= (*lptr==','); if (comma) lptr++; } while (comma); } else { error(207); /* unknown #pragma */ } /* if */ } else { error(207); /* unknown #pragma */ } /* if */ check_empty(lptr); } /* if */ break; case tpENDINPUT: case tpENDSCRPT: if (!SKIPPING) { check_empty(lptr); assert(inpf!=NULL); if (inpf!=inpf_org) pc_closesrc(inpf); inpf=NULL; } /* if */ break; #if !defined NOEMIT case tpEMIT: { /* write opcode to output file */ char name[40]; int i; while (*lptr<=' ' && *lptr!='\0') lptr++; for (i=0; i<40 && (isalpha(*lptr) || *lptr=='.'); i++,lptr++) name[i]=(char)tolower(*lptr); name[i]='\0'; stgwrite("\t"); stgwrite(name); stgwrite(" "); code_idx+=opcodes(1); /* write parameter (if any) */ while (*lptr<=' ' && *lptr!='\0') lptr++; if (*lptr!='\0') { symbol *sym; tok=lex(&val,&str); switch (tok) { case tNUMBER: case tRATIONAL: outval(val,FALSE); code_idx+=opargs(1); break; case tSYMBOL: sym=findloc(str); if (sym==NULL) sym=findglb(str); if (sym==NULL || sym->ident!=iFUNCTN && sym->ident!=iREFFUNC && (sym->usage & uDEFINE)==0) { error(17,str); /* undefined symbol */ } else { outval(sym->addr,FALSE); /* mark symbol as "used", unknown whether for read or write */ markusage(sym,uREAD | uWRITTEN); code_idx+=opargs(1); } /* if */ break; default: { char s2[20]; extern char *sc_tokens[];/* forward declaration */ if (tok<256) sprintf(s2,"%c",(char)tok); else strcpy(s2,sc_tokens[tok-tFIRST]); error(1,sc_tokens[tSYMBOL-tFIRST],s2); break; } /* case */ } /* switch */ } /* if */ stgwrite("\n"); check_empty(lptr); break; } /* case */ #endif #if !defined NO_DEFINE case tpDEFINE: { ret=CMD_DEFINE; if (!SKIPPING) { char *pattern,*substitution; const unsigned char *start,*end; int count,prefixlen; stringpair *def; /* find the pattern to match */ while (*lptr<=' ' && *lptr!='\0') lptr++; start=lptr; /* save starting point of the match pattern */ count=0; while (*lptr>' ' && *lptr!='\0') { litchar(&lptr,0); /* litchar() advances "lptr" and handles escape characters */ count++; } /* while */ end=lptr; /* check pattern to match */ if (!isalpha(*start) && *start!='_') { error(74); /* pattern must start with an alphabetic character */ break; } /* if */ /* store matched pattern */ pattern=(char*)malloc(count+1); if (pattern==NULL) error(103); /* insufficient memory */ lptr=start; count=0; while (lptr!=end) { assert(lptr=2 && isdigit(pattern[count-1]) && pattern[count-2]=='%') pattern[count-2]='\0'; /* find substitution string */ while (*lptr<=' ' && *lptr!='\0') lptr++; start=lptr; /* save starting point of the match pattern */ count=0; end=NULL; while (*lptr!='\0') { /* keep position of the start of trailing whitespace */ if (*lptr<=' ') { if (end==NULL) end=lptr; } else { end=NULL; } /* if */ count++; lptr++; } /* while */ if (end==NULL) end=lptr; /* store matched substitution */ substitution=(char*)malloc(count+1); /* +1 for '\0' */ if (substitution==NULL) error(103); /* insufficient memory */ lptr=start; count=0; while (lptr!=end) { assert(lptr0); if ((def=find_subst(pattern,prefixlen))!=NULL) { if (strcmp(def->first,pattern)!=0 || strcmp(def->second,substitution)!=0) error(201,pattern); /* redefinition of macro (non-identical) */ delete_subst(pattern,prefixlen); } /* if */ /* add the pattern/substitution pair to the list */ assert(strlen(pattern)>0); insert_subst(pattern,substitution,prefixlen); free(pattern); free(substitution); } /* if */ break; } /* case */ case tpUNDEF: if (!SKIPPING) { if (lex(&val,&str)==tSYMBOL) { if (!delete_subst(str,strlen(str))) error(17,str); /* undefined symbol */ } else { error(20,str); /* invalid symbol name */ } /* if */ check_empty(lptr); } /* if */ break; #endif case tpERROR: while (*lptr<=' ' && *lptr!='\0') lptr++; error(111,lptr); /* user error */ break; default: error(31); /* unknown compiler directive */ ret=SKIPPING ? CMD_CONDFALSE : CMD_NONE; /* process as normal line */ } /* switch */ return ret; } #if !defined NO_DEFINE static int is_startstring(const unsigned char *string) { if (*string=='\"' || *string=='\'') return TRUE; /* "..." */ if (*string=='!') { string++; if (*string=='\"' || *string=='\'') return TRUE; /* !"..." */ if (*string==sc_ctrlchar) { string++; if (*string=='\"' || *string=='\'') return TRUE; /* !\"..." */ } /* if */ } else if (*string==sc_ctrlchar) { string++; if (*string=='\"' || *string=='\'') return TRUE; /* \"..." */ if (*string=='!') { string++; if (*string=='\"' || *string=='\'') return TRUE; /* \!"..." */ } /* if */ } /* if */ return FALSE; } static const unsigned char *skipstring(const unsigned char *string) { char endquote; int flags=0; while (*string=='!' || *string==sc_ctrlchar) { if (*string==sc_ctrlchar) flags=RAWMODE; string++; } /* while */ endquote=*string; assert(endquote=='"' || endquote=='\''); string++; /* skip open quote */ while (*string!=endquote && *string!='\0') litchar(&string,flags); return string; } static const unsigned char *skippgroup(const unsigned char *string) { int nest=0; char open=*string; char close; switch (open) { case '(': close=')'; break; case '{': close='}'; break; case '[': close=']'; break; case '<': close='>'; break; default: assert(0); close='\0'; /* only to avoid a compiler warning */ }/* switch */ string++; while (*string!=close || nest>0) { if (*string==open) nest++; else if (*string==close) nest--; else if (is_startstring(string)) string=skipstring(string); if (*string=='\0') break; string++; } /* while */ return string; } static char *strdel(char *str,size_t len) { size_t length=strlen(str); if (len>length) len=length; memmove(str, str+len, length-len+1); /* include EOS byte */ return str; } static char *strins(char *dest,char *src,size_t srclen) { size_t destlen=strlen(dest); assert(srclen<=strlen(src)); memmove(dest+srclen, dest, destlen+1);/* include EOS byte */ memcpy(dest, src, srclen); return dest; } static int substpattern(unsigned char *line,size_t buffersize,char *pattern,char *substitution) { int prefixlen; const unsigned char *p,*s,*e; unsigned char *args[10]; int match,arg,len; memset(args,0,sizeof args); /* check the length of the prefix */ for (prefixlen=0,s=(unsigned char*)pattern; isalpha(*s) || isdigit(*s) || *s=='_'; prefixlen++,s++) /* nothing */; assert(prefixlen>0); assert(strncmp((char*)line,pattern,prefixlen)==0); /* pattern prefix matches; match the rest of the pattern, gather * the parameters */ s=line+prefixlen; p=(unsigned char*)pattern+prefixlen; match=TRUE; /* so far, pattern matches */ while (match && *s!='\0' && *p!='\0') { if (*p=='%') { p++; /* skip '%' */ if (isdigit(*p)) { arg=*p-'0'; assert(arg>=0 && arg<=9); p++; /* skip parameter id */ assert(*p!='\0'); /* match the source string up to the character after the digit * (skipping strings in the process */ e=s; while (*e!=*p && *e!='\0' && *e!='\n') { if (is_startstring(e)) /* skip strings */ e=skipstring(e); else if (strchr("({[",*e)!=NULL) /* skip parenthized groups */ e=skippgroup(e); if (*e!='\0') e++; /* skip non-alphapetic character (or closing quote of * a string, or the closing paranthese of a group) */ } /* while */ /* store the parameter (overrule any earlier) */ if (args[arg]!=NULL) free(args[arg]); len=(int)(e-s); args[arg]=(unsigned char*)malloc(len+1); if (args[arg]==NULL) error(103); /* insufficient memory */ strncpy((char*)args[arg],(char*)s,len); args[arg][len]='\0'; /* character behind the pattern was matched too */ if (*e==*p) { s=e+1; } else if (*e=='\n' && *p==';' && *(p+1)=='\0' && !sc_needsemicolon) { s=e; /* allow a trailing ; in the pattern match to end of line */ } else { assert(*e=='\0' || *e=='\n'); match=FALSE; s=e; } /* if */ p++; } else { match=FALSE; } /* if */ } else if (*p==';' && *(p+1)=='\0' && !sc_needsemicolon) { /* source may be ';' or end of the line */ while (*s<=' ' && *s!='\0') s++; /* skip white space */ if (*s!=';' && *s!='\0') match=FALSE; p++; /* skip the semicolon in the pattern */ } else { cell ch; /* skip whitespace between two non-alphanumeric characters, except * for two identical symbols */ assert((char*)p>pattern); if (!alphanum(*p) && *(p-1)!=*p) while (*s<=' ' && *s!='\0') s++; /* skip white space */ ch=litchar(&p,0); /* this increments "p" */ if (*s!=ch) match=FALSE; else s++; /* this character matches */ } /* if */ } /* while */ if (match && *p=='\0') { /* if the last character to match is an alphanumeric character, the * current character in the source may not be alphanumeric */ assert(p>(unsigned char*)pattern); if (alphanum(*(p-1)) && alphanum(*s)) match=FALSE; } /* if */ if (match) { /* calculate the length of the substituted string */ for (e=(unsigned char*)substitution,len=0; *e!='\0'; e++) { if (*e=='%' && isdigit(*(e+1))) { arg=*(e+1)-'0'; assert(arg>=0 && arg<=9); if (args[arg]!=NULL) len+=strlen((char*)args[arg]); e++; /* skip %, digit is skipped later */ } else { len++; } /* if */ } /* for */ /* check length of the string after substitution */ if (strlen((char*)line) + len - (int)(s-line) > buffersize) { error(75); /* line too long */ } else { /* substitute pattern */ strdel((char*)line,(int)(s-line)); for (e=(unsigned char*)substitution,s=line; *e!='\0'; e++) { if (*e=='%' && isdigit(*(e+1))) { arg=*(e+1)-'0'; assert(arg>=0 && arg<=9); if (args[arg]!=NULL) { strins((char*)s,(char*)args[arg],strlen((char*)args[arg])); s+=strlen((char*)args[arg]); } /* if */ e++; /* skip %, digit is skipped later */ } else { strins((char*)s,(char*)e,1); s++; } /* if */ } /* for */ } /* if */ } /* if */ for (arg=0; arg<10; arg++) if (args[arg]!=NULL) free(args[arg]); return match; } static void substallpatterns(unsigned char *line,int buffersize) { unsigned char *start, *end; int prefixlen; stringpair *subst; start=line; while (*start!='\0') { /* find the start of a prefix (skip all non-alphabetic characters), * also skip strings */ while (!isalpha(*start) && *start!='_' && *start!='\0') { /* skip strings */ if (is_startstring(start)) { start=(unsigned char *)skipstring(start); if (*start=='\0') break; /* abort loop on error */ } /* if */ start++; /* skip non-alphapetic character (or closing quote of a string) */ } /* while */ if (*start=='\0') break; /* abort loop on error */ /* if matching the operator "defined", skip it plus the symbol behind it */ if (strncmp((char*)start,"defined",7)==0 && !isalpha((char)*(start+7))) { start+=7; /* skip "defined" */ /* skip white space & parantheses */ while (*start<=' ' && *start!='\0' || *start=='(') start++; /* skip the symbol behind it */ while (isalpha(*start) || isdigit(*start) || *start=='_') start++; /* drop back into the main loop */ continue; } /* if */ /* get the prefix (length), look for a matching definition */ prefixlen=0; end=start; while (isalpha(*end) || isdigit(*end) || *end=='_') { prefixlen++; end++; } /* while */ assert(prefixlen>0); subst=find_subst((char*)start,prefixlen); if (subst!=NULL) { /* properly match the pattern and substitute */ if (!substpattern(start,buffersize-(int)(start-line),subst->first,subst->second)) start=end; /* match failed, skip this prefix */ /* match succeeded: do not update "start", because the substitution text * may be matched by other macros */ } else { start=end; /* no macro with this prefix, skip this prefix */ } /* if */ } /* while */ } #endif /* preprocess * * Reads a line by readline() into "pline" and performs basic preprocessing: * deleting comments, skipping lines with false "#if.." code and recognizing * other compiler directives. There is an indirect recursion: lex() calls * preprocess() if a new line must be read, preprocess() calls command(), * which at his turn calls lex() to identify the token. * * Global references: lptr (altered) * pline (altered) * freading (referred to only) */ SC_FUNC void preprocess(void) { int iscommand; if (!freading) return; do { readline(pline); stripcom(pline); /* ??? no need for this when reading back from list file (in the second pass) */ lptr=pline; /* set "line pointer" to start of the parsing buffer */ iscommand=command(); if (iscommand!=CMD_NONE) errorset(sRESET); /* reset error flag ("panic mode") on empty line or directive */ #if !defined NO_DEFINE if (iscommand==CMD_NONE) { assert(lptr!=term_expr); substallpatterns(pline,sLINEMAX); lptr=pline; /* reset "line pointer" to start of the parsing buffer */ } /* if */ #endif if (sc_status==statFIRST && sc_listing && freading && (iscommand==CMD_NONE || iscommand==CMD_EMPTYLINE || iscommand==CMD_DIRECTIVE)) { listline++; if (fline!=listline) { listline=fline; setlinedirect(fline); } /* if */ if (iscommand==CMD_EMPTYLINE) pc_writeasm(outf,"\n"); else pc_writeasm(outf,(char*)pline); } /* if */ } while (iscommand!=CMD_NONE && iscommand!=CMD_TERM && freading); /* enddo */ } static const unsigned char *unpackedstring(const unsigned char *lptr,int flags) { while (*lptr!='\"' && *lptr!='\0') { if (*lptr=='\a') { /* ignore '\a' (which was inserted at a line concatenation) */ lptr++; continue; } /* if */ litadd(litchar(&lptr,flags | UTF8MODE)); /* litchar() alters "lptr" */ } /* while */ litadd(0); /* terminate string */ return lptr; } static const unsigned char *packedstring(const unsigned char *lptr,int flags) { int i; ucell val,c; i=sizeof(ucell)-(sCHARBITS/8); /* start at most significant byte */ val=0; while (*lptr!='\"' && *lptr!='\0') { if (*lptr=='\a') { /* ignore '\a' (which was inserted at a line concatenation) */ lptr++; continue; } /* if */ c=litchar(&lptr,flags); /* litchar() alters "lptr" */ if (c>=(ucell)(1 << sCHARBITS)) error(43); /* character constant exceeds range */ val |= (c << 8*i); if (i==0) { litadd(val); val=0; } /* if */ i=(i+sizeof(ucell)-(sCHARBITS/8)) % sizeof(ucell); } /* if */ /* save last code; make sure there is at least one terminating zero character */ if (i!=(int)(sizeof(ucell)-(sCHARBITS/8))) litadd(val); /* at least one zero character in "val" */ else litadd(0); /* add full cell of zeros */ return lptr; } /* lex(lexvalue,lexsym) Lexical Analysis * * lex() first deletes leading white space, then checks for multi-character * operators, keywords (including most compiler directives), numbers, * labels, symbols and literals (literal characters are converted to a number * and are returned as such). If every check fails, the line must contain * a single-character operator. So, lex() returns this character. In the other * case (something did match), lex() returns the number of the token. All * these tokens have been assigned numbers above 255. * * Some tokens have "attributes": * tNUMBER the value of the number is return in "lexvalue". * tRATIONAL the value is in IEEE 754 encoding or in fixed point * encoding in "lexvalue". * tSYMBOL the first sNAMEMAX characters of the symbol are * stored in a buffer, a pointer to this buffer is * returned in "lexsym". * tLABEL the first sNAMEMAX characters of the label are * stored in a buffer, a pointer to this buffer is * returned in "lexsym". * tSTRING the string is stored in the literal pool, the index * in the literal pool to this string is stored in * "lexvalue". * * lex() stores all information (the token found and possibly its attribute) * in global variables. This allows a token to be examined twice. If "_pushed" * is true, this information is returned. * * Global references: lptr (altered) * fline (referred to only) * litidx (referred to only) * _lextok, _lexval, _lexstr * _pushed */ static int _pushed; static int _lextok; static cell _lexval; static char _lexstr[sLINEMAX+1]; static int _lexnewline; SC_FUNC void lexinit(void) { stkidx=0; /* index for pushstk() and popstk() */ iflevel=0; /* preprocessor: nesting of "#if" is currently 0 */ skiplevel=0; /* preprocessor: not currently skipping */ icomment=0; /* currently not in a multiline comment */ _pushed=FALSE; /* no token pushed back into lex */ _lexnewline=FALSE; } char *sc_tokens[] = { "*=", "/=", "%=", "+=", "-=", "<<=", ">>>=", ">>=", "&=", "^=", "|=", "||", "&&", "==", "!=", "<=", ">=", "<<", ">>>", ">>", "++", "--", "...", "..", "::", "assert", "break", "case", "char", "const", "continue", "default", "defined", "do", "else", "enum", "exit", "for", "forward", "goto", "if", "native", "new", "operator", "public", "return", "sizeof", "sleep", "state", "static", "stock", "switch", "tagof", "while", "#assert", "#define", "#else", "#elseif", "#emit", "#endif", "#endinput", "#endscript", "#error", "#file", "#if", "#include", "#line", "#pragma", "#tryinclude", "#undef", ";", ";", "-integer value-", "-rational value-", "-identifier-", "-label-", "-string-" }; SC_FUNC int lex(cell *lexvalue,char **lexsym) { int i,toolong,newline,stringflags; char **tokptr; const unsigned char *starttoken; if (_pushed) { _pushed=FALSE; /* reset "_pushed" flag */ *lexvalue=_lexval; *lexsym=_lexstr; return _lextok; } /* if */ _lextok=0; /* preset all values */ _lexval=0; _lexstr[0]='\0'; *lexvalue=_lexval; *lexsym=_lexstr; _lexnewline=FALSE; if (!freading) return 0; newline= (lptr==pline); /* does lptr point to start of line buffer */ while (*lptr<=' ') { /* delete leading white space */ if (*lptr=='\0') { preprocess(); /* preprocess resets "lptr" */ if (!freading) return 0; if (lptr==term_expr) /* special sequence to terminate a pending expression */ return (_lextok=tENDEXPR); _lexnewline=TRUE; /* set this after preprocess(), because * preprocess() calls lex() recursively */ newline=TRUE; } else { lptr+=1; } /* if */ } /* while */ if (newline) { stmtindent=0; for (i=0; i<(int)(lptr-pline); i++) if (pline[i]=='\t' && sc_tabsize>0) stmtindent += (int)(sc_tabsize - (stmtindent+sc_tabsize) % sc_tabsize); else stmtindent++; } /* if */ i=tFIRST; tokptr=sc_tokens; while (i<=tMIDDLE) { /* match multi-character operators */ if (*lptr==**tokptr && match(*tokptr,FALSE)) { _lextok=i; if (pc_docexpr) /* optionally concatenate to documentation string */ insert_autolist(*tokptr); return _lextok; } /* if */ i+=1; tokptr+=1; } /* while */ while (i<=tLAST) { /* match reserved words and compiler directives */ if (*lptr==**tokptr && match(*tokptr,TRUE)) { _lextok=i; errorset(sRESET); /* reset error flag (clear the "panic mode")*/ if (pc_docexpr) /* optionally concatenate to documentation string */ insert_autolist(*tokptr); return _lextok; } /* if */ i+=1; tokptr+=1; } /* while */ starttoken=lptr; /* save start pointer (for concatenating to documentation string) */ if ((i=number(&_lexval,lptr))!=0) { /* number */ _lextok=tNUMBER; *lexvalue=_lexval; lptr+=i; } else if ((i=ftoi(&_lexval,lptr))!=0) { _lextok=tRATIONAL; *lexvalue=_lexval; lptr+=i; } else if (alpha(*lptr)) { /* symbol or label */ /* Note: only sNAMEMAX characters are significant. The compiler * generates a warning if a symbol exceeds this length. */ _lextok=tSYMBOL; i=0; toolong=0; while (alphanum(*lptr)){ _lexstr[i]=*lptr; lptr+=1; if (i=litmax) { cell *p; litmax+=sDEF_LITMAX; p=(cell *)realloc(litq,litmax*sizeof(cell)); if (p==NULL) error(102,"literal table"); /* literal table overflow (fatal error) */ litq=p; } /* if */ } /* litadd * * Adds a value at the end of the literal queue. The literal queue is used * for literal strings used in functions and for initializing array variables. * * Global references: litidx (altered) * litq (altered) */ SC_FUNC void litadd(cell value) { chk_grow_litq(); assert(litidx=0 && pos<=litidx); memmove(litq+(pos+1),litq+pos,(litidx-pos)*sizeof(cell)); litidx++; litq[pos]=value; } /* litchar * * Return current literal character and increase the pointer to point * just behind this literal character. * * Note: standard "escape sequences" are suported, but the backslash may be * replaced by another character; the syntax '\ddd' is supported, * but ddd must be decimal! */ static cell litchar(const unsigned char **lptr,int flags) { cell c=0; const unsigned char *cptr; cptr=*lptr; if ((flags & RAWMODE)!=0 || *cptr!=sc_ctrlchar) { /* no escape character */ #if !defined NO_UTF8 if (sc_is_utf8 && (flags & UTF8MODE)!=0) { c=get_utf8_char(cptr,&cptr); assert(c>=0); /* file was already scanned for conformance to UTF-8 */ } else { #endif #if !defined NO_CODEPAGE c=cp_translate(cptr,&cptr); #else c=*cptr; cptr+=1; #endif #if !defined NO_UTF8 } /* if */ #endif } else { cptr+=1; if (*cptr==sc_ctrlchar) { c=*cptr; /* \\ == \ (the escape character itself) */ cptr+=1; } else { switch (*cptr) { case 'a': /* \a == audible alarm */ c=7; cptr+=1; break; case 'b': /* \b == backspace */ c=8; cptr+=1; break; case 'e': /* \e == escape */ c=27; cptr+=1; break; case 'f': /* \f == form feed */ c=12; cptr+=1; break; case 'n': /* \n == NewLine character */ c=10; cptr+=1; break; case 'r': /* \r == carriage return */ c=13; cptr+=1; break; case 't': /* \t == horizontal TAB */ c=9; cptr+=1; break; case 'v': /* \v == vertical TAB */ c=11; cptr+=1; break; case 'x': cptr+=1; c=0; while (ishex(*cptr)) { if (isdigit(*cptr)) c=(c<<4)+(*cptr-'0'); else c=(c<<4)+(tolower(*cptr)-'a'+10); cptr++; } /* while */ if (*cptr==';') cptr++; /* swallow a trailing ';' */ break; case '\'': /* \' == ' (single quote) */ case '"': /* \" == " (single quote) */ case '%': /* \% == % (percent) */ c=*cptr; cptr+=1; break; default: if (isdigit(*cptr)) { /* \ddd */ c=0; while (*cptr>='0' && *cptr<='9') /* decimal! */ c=c*10 + *cptr++ - '0'; if (*cptr==';') cptr++; /* swallow a trailing ';' */ } else { error(27); /* invalid character constant */ } /* if */ } /* switch */ } /* if */ } /* if */ *lptr=cptr; assert(c>=0); return c; } /* alpha * * Test if character "c" is alphabetic ("a".."z"), an underscore ("_") * or an "at" sign ("@"). The "@" is an extension to standard C. */ static int alpha(char c) { return (isalpha(c) || c=='_' || c==PUBLIC_CHAR); } /* alphanum * * Test if character "c" is alphanumeric ("a".."z", "0".."9", "_" or "@") */ SC_FUNC int alphanum(char c) { return (alpha(c) || isdigit(c)); } /* ishex * * Test if character "c" is a hexadecimal digit ("0".."9" or "a".."f"). */ SC_FUNC int ishex(char c) { return (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F'); } /* The local variable table must be searched backwards, so that the deepest * nesting of local variables is searched first. The simplest way to do * this is to insert all new items at the head of the list. * In the global list, the symbols are kept in sorted order, so that the * public functions are written in sorted order. */ static symbol *add_symbol(symbol *root,symbol *entry,int sort) { symbol *newsym; if (sort) while (root->next!=NULL && strcmp(entry->name,root->next->name)>0) root=root->next; if ((newsym=(symbol *)malloc(sizeof(symbol)))==NULL) { error(103); return NULL; } /* if */ memcpy(newsym,entry,sizeof(symbol)); newsym->next=root->next; root->next=newsym; return newsym; } static void free_symbol(symbol *sym) { arginfo *arg; /* free all sub-symbol allocated memory blocks, depending on the * kind of the symbol */ assert(sym!=NULL); if (sym->ident==iFUNCTN) { /* run through the argument list; "default array" arguments * must be freed explicitly; the tag list must also be freed */ assert(sym->dim.arglist!=NULL); for (arg=sym->dim.arglist; arg->ident!=0; arg++) { if (arg->ident==iREFARRAY && arg->hasdefault) free(arg->defvalue.array.data); else if (arg->ident==iVARIABLE && ((arg->hasdefault & uSIZEOF)!=0 || (arg->hasdefault & uTAGOF)!=0)) free(arg->defvalue.size.symname); assert(arg->tags!=NULL); free(arg->tags); } /* for */ free(sym->dim.arglist); if (sym->states!=NULL) { delete_consttable(sym->states); free(sym->states); } /* if */ } else if (sym->ident==iCONSTEXPR && (sym->usage & uENUMROOT)==uENUMROOT) { /* free the constant list of an enum root */ assert(sym->dim.enumlist!=NULL); delete_consttable(sym->dim.enumlist); free(sym->dim.enumlist); } /* if */ assert(sym->refer!=NULL); free(sym->refer); if (sym->documentation!=NULL) free(sym->documentation); free(sym); } SC_FUNC void delete_symbol(symbol *root,symbol *sym) { /* find the symbol and its predecessor * (this function assumes that you will never delete a symbol that is not * in the table pointed at by "root") */ assert(root!=sym); while (root->next!=sym) { root=root->next; assert(root!=NULL); } /* while */ /* unlink it, then free it */ root->next=sym->next; free_symbol(sym); } SC_FUNC void delete_symbols(symbol *root,int level,int delete_labels,int delete_functions) { symbol *sym,*parent_sym; constvalue *stateptr; int mustdelete; /* erase only the symbols with a deeper nesting level than the * specified nesting level */ while (root->next!=NULL) { sym=root->next; if (sym->compoundident) { case iLABEL: mustdelete=delete_labels; break; case iVARIABLE: case iARRAY: /* do not delete global variables if functions are preserved */ mustdelete=delete_functions; break; case iREFERENCE: /* always delete references (only exist as function parameters) */ mustdelete=TRUE; break; case iREFARRAY: /* a global iREFARRAY symbol is the return value of a function: delete * this only if "globals" must be deleted; other iREFARRAY instances * (locals) are also deleted */ mustdelete=delete_functions; for (parent_sym=sym->parent; parent_sym!=NULL && parent_sym->ident!=iFUNCTN; parent_sym=parent_sym->parent) assert(parent_sym->ident==iREFARRAY); assert(parent_sym==NULL || (parent_sym->ident==iFUNCTN && parent_sym->parent==NULL)); if (parent_sym==NULL || parent_sym->ident!=iFUNCTN) mustdelete=TRUE; break; case iCONSTEXPR: /* delete constants, except predefined constants */ mustdelete=delete_functions || (sym->usage & uPREDEF)==0; break; case iFUNCTN: /* optionally preserve globals (variables & functions), but NOT native functions */ mustdelete=delete_functions || (sym->usage & uNATIVE)!=0; assert(sym->parent==NULL); break; case iARRAYCELL: case iARRAYCHAR: case iEXPRESSION: case iVARARGS: default: assert(0); break; } /* switch */ if (mustdelete) { root->next=sym->next; free_symbol(sym); } else { /* if the function was prototyped, but not implemented in this source, * mark it as such, so that its use can be flagged */ if (sym->ident==iFUNCTN && (sym->usage & uDEFINE)==0) sym->usage |= uMISSING; if (sym->ident==iFUNCTN || sym->ident==iVARIABLE || sym->ident==iARRAY) sym->usage &= ~uDEFINE; /* clear "defined" flag */ /* set all states as "undefined" too */ if (sym->states!=NULL) for (stateptr=sym->states->next; stateptr!=NULL; stateptr=stateptr->next) stateptr->value=0; /* for user defined operators, also remove the "prototyped" flag, as * user-defined operators *must* be declared before use */ if (sym->ident==iFUNCTN && !isalpha(*sym->name) && *sym->name!='_' && *sym->name!=PUBLIC_CHAR) sym->usage &= ~uPROTOTYPED; root=sym; /* skip the symbol */ } /* if */ } /* if */ } /* The purpose of the hash is to reduce the frequency of a "name" * comparison (which is costly). There is little interest in avoiding * clusters in similar names, which is why this function is plain simple. */ SC_FUNC uint32_t namehash(const char *name) { const unsigned char *ptr=(const unsigned char *)name; int len=strlen(name); if (len==0) return 0L; assert(len<256); return (len<<24Lu) + (ptr[0]<<16Lu) + (ptr[len-1]<<8Lu) + (ptr[len>>1Lu]); } static symbol *find_symbol(const symbol *root,const char *name,int fnumber,int includechildren) { symbol *ptr=root->next; unsigned long hash=namehash(name); while (ptr!=NULL) { if (hash==ptr->hash && strcmp(name,ptr->name)==0 && (ptr->parent==NULL || includechildren) && (fnumber<0 || (ptr->fnumber<0 || ptr->fnumber==fnumber))) return ptr; ptr=ptr->next; } /* while */ return NULL; } static symbol *find_symbol_child(const symbol *root,const symbol *sym) { symbol *ptr=root->next; while (ptr!=NULL) { if (ptr->parent==sym) return ptr; ptr=ptr->next; } /* while */ return NULL; } /* Adds "bywhom" to the list of referrers of "entry". Typically, * bywhom will be the function that uses a variable or that calls * the function. */ SC_FUNC int refer_symbol(symbol *entry,symbol *bywhom) { int count; assert(bywhom!=NULL); /* it makes no sense to add a "void" referrer */ assert(entry!=NULL); assert(entry->refer!=NULL); /* see if it is already there */ for (count=0; countnumrefers && entry->refer[count]!=bywhom; count++) /* nothing */; if (countnumrefers) { assert(entry->refer[count]==bywhom); return TRUE; } /* if */ /* see if there is an empty spot in the referrer list */ for (count=0; countnumrefers && entry->refer[count]!=NULL; count++) /* nothing */; assert(count <= entry->numrefers); if (count==entry->numrefers) { symbol **refer; int newsize=2*entry->numrefers; assert(newsize>0); /* grow the referrer list */ refer=(symbol**)realloc(entry->refer,newsize*sizeof(symbol*)); if (refer==NULL) return FALSE; /* insufficient memory */ /* initialize the new entries */ entry->refer=refer; for (count=entry->numrefers; countrefer[count]=NULL; count=entry->numrefers; /* first empty spot */ entry->numrefers=newsize; } /* if */ /* add the referrer */ assert(entry->refer[count]==NULL); entry->refer[count]=bywhom; return TRUE; } SC_FUNC void markusage(symbol *sym,int usage) { sym->usage |= (char)usage; /* check if (global) reference must be added to the symbol */ if ((usage & (uREAD | uWRITTEN))!=0) { /* only do this for global symbols */ if (sym->vclass==sGLOBAL) { /* "curfunc" should always be valid, since statements may not occurs * outside functions; in the case of syntax errors, however, the * compiler may arrive through this function */ if (curfunc!=NULL) refer_symbol(sym,curfunc); } /* if */ } /* if */ } /* findglb * * Returns a pointer to the global symbol (if found) or NULL (if not found) */ SC_FUNC symbol *findglb(const char *name) { return find_symbol(&glbtab,name,fcurrent,FALSE); } /* findloc * * Returns a pointer to the local symbol (if found) or NULL (if not found). * See add_symbol() how the deepest nesting level is searched first. */ SC_FUNC symbol *findloc(const char *name) { return find_symbol(&loctab,name,-1,FALSE); } SC_FUNC symbol *findconst(const char *name) { symbol *sym; sym=find_symbol(&loctab,name,-1,TRUE); /* try local symbols first */ if (sym==NULL || sym->ident!=iCONSTEXPR) /* not found, or not a constant */ sym=find_symbol(&glbtab,name,fcurrent,TRUE); if (sym==NULL || sym->ident!=iCONSTEXPR) return NULL; assert(sym->parent==NULL || (sym->usage & uENUMFIELD)!=0); /* ^^^ constants have no hierarchy, but enumeration fields may have a parent */ return sym; } SC_FUNC symbol *finddepend(const symbol *parent) { symbol *sym; sym=find_symbol_child(&loctab,parent); /* try local symbols first */ if (sym==NULL) /* not found */ sym=find_symbol_child(&glbtab,parent); return sym; } /* addsym * * Adds a symbol to the symbol table (either global or local variables, * or global and local constants). */ SC_FUNC symbol *addsym(const char *name,cell addr,int ident,int vclass,int tag,int usage) { symbol entry, **refer; /* global variables/constants/functions may only be defined once */ assert(!(ident==iFUNCTN || ident==iCONSTEXPR) || vclass!=sGLOBAL || findglb(name)==NULL); /* labels may only be defined once */ assert(ident!=iLABEL || findloc(name)==NULL); /* create an empty referrer list */ if ((refer=(symbol**)malloc(sizeof(symbol*)))==NULL) { error(103); /* insufficient memory */ return NULL; } /* if */ *refer=NULL; /* first fill in the entry */ strcpy(entry.name,name); entry.hash=namehash(name); entry.addr=addr; entry.codeaddr=code_idx; entry.vclass=(char)vclass; entry.ident=(char)ident; entry.tag=tag; entry.usage=(char)usage; entry.compound=0; /* may be overridden later */ entry.states=NULL; entry.fnumber=-1; /* assume global visibility (ignored for local symbols) */ entry.numrefers=1; entry.refer=refer; entry.parent=NULL; entry.fieldtag=0; entry.documentation=NULL; /* then insert it in the list */ if (vclass==sGLOBAL) return add_symbol(&glbtab,&entry,TRUE); else return add_symbol(&loctab,&entry,FALSE); } SC_FUNC symbol *addvariable(const char *name,cell addr,int ident,int vclass,int tag, int dim[],int numdim,int idxtag[]) { symbol *sym; /* global variables may only be defined once * One complication is that functions returning arrays declare an array * with the same name as the function, so the assertion must allow for * this special case. */ assert(vclass!=sGLOBAL || (sym=findglb(name))==NULL || (sym->usage & uDEFINE)==0 || sym->ident==iFUNCTN && sym==curfunc); if (ident==iARRAY || ident==iREFARRAY) { symbol *parent=NULL,*top; int level; sym=NULL; /* to avoid a compiler warning */ for (level=0; leveldim.array.length=dim[level]; top->dim.array.level=(short)(numdim-level-1); top->x.idxtag=idxtag[level]; top->parent=parent; parent=top; if (level==0) sym=top; } /* for */ } else { sym=addsym(name,addr,ident,vclass,tag,uDEFINE); } /* if */ return sym; } /* getlabel * * Returns te next internal label number. The global variable sc_labnum is * initialized to zero. */ SC_FUNC int getlabel(void) { return sc_labnum++; } /* itoh * * Converts a number to a hexadecimal string and returns a pointer to that * string. This function is NOT re-entrant. */ SC_FUNC char *itoh(ucell val) { static char itohstr[30]; char *ptr; int i,nibble[16]; /* a 64-bit hexadecimal cell has 16 nibbles */ int max; #if PAWN_CELL_SIZE==16 max=4; #elif PAWN_CELL_SIZE==32 max=8; #elif PAWN_CELL_SIZE==64 max=16; #else #error Unsupported cell size #endif ptr=itohstr; for (i=0; i>=4; } /* endfor */ i=max-1; while (nibble[i]==0 && i>0) /* search for highest non-zero nibble */ i-=1; while (i>=0){ if (nibble[i]>=10) *ptr++=(char)('a'+(nibble[i]-10)); else *ptr++=(char)('0'+nibble[i]); i-=1; } /* while */ *ptr='\0'; /* and a zero-terminator */ return itohstr; }