581 lines
18 KiB
C
Executable File
581 lines
18 KiB
C
Executable File
/* Pawn compiler - Staging buffer and optimizer
|
|
*
|
|
* The staging buffer
|
|
* ------------------
|
|
* The staging buffer allows buffered output of generated code, deletion
|
|
* of redundant code, optimization by a tinkering process and reversing
|
|
* the ouput of evaluated expressions (which is used for the reversed
|
|
* evaluation of arguments in functions).
|
|
* Initially, stgwrite() writes to the file directly, but after a call to
|
|
* stgset(TRUE), output is redirected to the buffer. After a call to
|
|
* stgset(FALSE), stgwrite()'s output is directed to the file again. Thus
|
|
* only one routine is used for writing to the output, which can be
|
|
* buffered output or direct output.
|
|
*
|
|
* staging buffer variables: stgbuf - the buffer
|
|
* stgidx - current index in the staging buffer
|
|
* staging - if true, write to the staging buffer;
|
|
* if false, write to file directly.
|
|
*
|
|
* Copyright (c) ITB CompuPhase, 1997-2005
|
|
*
|
|
* This software is provided "as-is", without any express or implied warranty.
|
|
* In no event will the authors be held liable for any damages arising from
|
|
* the use of this software.
|
|
*
|
|
* Permission is granted to anyone to use this software for any purpose,
|
|
* including commercial applications, and to alter it and redistribute it
|
|
* freely, subject to the following restrictions:
|
|
*
|
|
* 1. The origin of this software must not be misrepresented; you must not
|
|
* claim that you wrote the original software. If you use this software in
|
|
* a product, an acknowledgment in the product documentation would be
|
|
* appreciated but is not required.
|
|
* 2. Altered source versions must be plainly marked as such, and must not be
|
|
* misrepresented as being the original software.
|
|
* 3. This notice may not be removed or altered from any source distribution.
|
|
*/
|
|
|
|
#include <assert.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h> /* for atoi() */
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
#if defined FORTIFY
|
|
#include "fortify.h"
|
|
#endif
|
|
#include "sc.h"
|
|
|
|
#if defined _MSC_VER
|
|
#pragma warning(push)
|
|
#pragma warning(disable:4125) /* decimal digit terminates octal escape sequence */
|
|
#endif
|
|
|
|
#include "sc7-in.scp"
|
|
|
|
#if defined _MSC_VER
|
|
#pragma warning(pop)
|
|
#endif
|
|
|
|
static void stgstring(char *start,char *end);
|
|
static void stgopt(char *start,char *end);
|
|
|
|
|
|
#define sSTG_GROW 512
|
|
#define sSTG_MAX 20480
|
|
|
|
static char *stgbuf = NULL;
|
|
static int stgmax = 0; /* current size of the staging buffer */
|
|
|
|
#define CHECK_STGBUFFER(index) if ((int)(index)>=stgmax) grow_stgbuffer((index)+1)
|
|
|
|
static void grow_stgbuffer(int requiredsize)
|
|
{
|
|
char *p;
|
|
int clear = stgbuf==NULL; /* if previously none, empty buffer explicitly */
|
|
|
|
assert(stgmax<requiredsize);
|
|
/* if the staging buffer (holding intermediate code for one line) grows
|
|
* over a few kBytes, there is probably a run-away expression
|
|
*/
|
|
if (requiredsize>sSTG_MAX)
|
|
error(102,"staging buffer"); /* staging buffer overflow (fatal error) */
|
|
stgmax=requiredsize+sSTG_GROW;
|
|
if (stgbuf!=NULL)
|
|
p=(char *)realloc(stgbuf,stgmax*sizeof(char));
|
|
else
|
|
p=(char *)malloc(stgmax*sizeof(char));
|
|
if (p==NULL)
|
|
error(102,"staging buffer"); /* staging buffer overflow (fatal error) */
|
|
stgbuf=p;
|
|
if (clear)
|
|
*stgbuf='\0';
|
|
}
|
|
|
|
SC_FUNC void stgbuffer_cleanup(void)
|
|
{
|
|
if (stgbuf!=NULL) {
|
|
free(stgbuf);
|
|
stgbuf=NULL;
|
|
stgmax=0;
|
|
} /* if */
|
|
}
|
|
|
|
/* the variables "stgidx" and "staging" are declared in "scvars.c" */
|
|
|
|
/* stgmark
|
|
*
|
|
* Copies a mark into the staging buffer. At this moment there are three
|
|
* possible marks:
|
|
* sSTARTREORDER identifies the beginning of a series of expression
|
|
* strings that must be written to the output file in
|
|
* reordered order
|
|
* sENDREORDER identifies the end of 'reverse evaluation'
|
|
* sEXPRSTART + idx only valid within a block that is evaluated in
|
|
* reordered order, it identifies the start of an
|
|
* expression; the "idx" value is the argument position
|
|
*
|
|
* Global references: stgidx (altered)
|
|
* stgbuf (altered)
|
|
* staging (referred to only)
|
|
*/
|
|
SC_FUNC void stgmark(char mark)
|
|
{
|
|
if (staging) {
|
|
CHECK_STGBUFFER(stgidx);
|
|
stgbuf[stgidx++]=mark;
|
|
} /* if */
|
|
}
|
|
|
|
static int filewrite(char *str)
|
|
{
|
|
if (sc_status==statWRITE)
|
|
return pc_writeasm(outf,str);
|
|
return TRUE;
|
|
}
|
|
|
|
/* stgwrite
|
|
*
|
|
* Writes the string "st" to the staging buffer or to the output file. In the
|
|
* case of writing to the staging buffer, the terminating byte of zero is
|
|
* copied too, but... the optimizer can only work on complete lines (not on
|
|
* fractions of it. Therefore if the string is staged, if the last character
|
|
* written to the buffer is a '\0' and the previous-to-last is not a '\n',
|
|
* the string is concatenated to the last string in the buffer (the '\0' is
|
|
* overwritten). This also means an '\n' used in the middle of a string isn't
|
|
* recognized and could give wrong results with the optimizer.
|
|
* Even when writing to the output file directly, all strings are buffered
|
|
* until a whole line is complete.
|
|
*
|
|
* Global references: stgidx (altered)
|
|
* stgbuf (altered)
|
|
* staging (referred to only)
|
|
*/
|
|
SC_FUNC void stgwrite(const char *st)
|
|
{
|
|
int len;
|
|
|
|
CHECK_STGBUFFER(0);
|
|
if (staging) {
|
|
if (stgidx>=2 && stgbuf[stgidx-1]=='\0' && stgbuf[stgidx-2]!='\n')
|
|
stgidx-=1; /* overwrite last '\0' */
|
|
while (*st!='\0') { /* copy to staging buffer */
|
|
CHECK_STGBUFFER(stgidx);
|
|
stgbuf[stgidx++]=*st++;
|
|
} /* while */
|
|
CHECK_STGBUFFER(stgidx);
|
|
stgbuf[stgidx++]='\0';
|
|
} else {
|
|
CHECK_STGBUFFER(strlen(stgbuf)+strlen(st)+1);
|
|
strcat(stgbuf,st);
|
|
len=strlen(stgbuf);
|
|
if (len>0 && stgbuf[len-1]=='\n') {
|
|
filewrite(stgbuf);
|
|
stgbuf[0]='\0';
|
|
} /* if */
|
|
} /* if */
|
|
}
|
|
|
|
/* stgout
|
|
*
|
|
* Writes the staging buffer to the output file via stgstring() (for
|
|
* reversing expressions in the buffer) and stgopt() (for optimizing). It
|
|
* resets "stgidx".
|
|
*
|
|
* Global references: stgidx (altered)
|
|
* stgbuf (referred to only)
|
|
* staging (referred to only)
|
|
*/
|
|
SC_FUNC void stgout(int index)
|
|
{
|
|
if (!staging)
|
|
return;
|
|
stgstring(&stgbuf[index],&stgbuf[stgidx]);
|
|
stgidx=index;
|
|
}
|
|
|
|
typedef struct {
|
|
char *start,*end;
|
|
} argstack;
|
|
|
|
/* stgstring
|
|
*
|
|
* Analyses whether code strings should be output to the file as they appear
|
|
* in the staging buffer or whether portions of it should be re-ordered.
|
|
* Re-ordering takes place in function argument lists; Pawn passes arguments
|
|
* to functions from right to left. When arguments are "named" rather than
|
|
* positional, the order in the source stream is indeterminate.
|
|
* This function calls itself recursively in case it needs to re-order code
|
|
* strings, and it uses a private stack (or list) to mark the start and the
|
|
* end of expressions in their correct (reversed) order.
|
|
* In any case, stgstring() sends a block as large as possible to the
|
|
* optimizer stgopt().
|
|
*
|
|
* In "reorder" mode, each set of code strings must start with the token
|
|
* sEXPRSTART, even the first. If the token sSTARTREORDER is represented
|
|
* by '[', sENDREORDER by ']' and sEXPRSTART by '|' the following applies:
|
|
* '[]...' valid, but useless; no output
|
|
* '[|...] valid, but useless; only one string
|
|
* '[|...|...] valid and usefull
|
|
* '[...|...] invalid, first string doesn't start with '|'
|
|
* '[|...|] invalid
|
|
*/
|
|
static void stgstring(char *start,char *end)
|
|
{
|
|
char *ptr;
|
|
int nest,argc,arg;
|
|
argstack *stack;
|
|
|
|
while (start<end) {
|
|
if (*start==sSTARTREORDER) {
|
|
start+=1; /* skip token */
|
|
/* allocate a argstack with sMAXARGS items */
|
|
stack=(argstack *)malloc(sMAXARGS*sizeof(argstack));
|
|
if (stack==NULL)
|
|
error(103); /* insufficient memory */
|
|
nest=1; /* nesting counter */
|
|
argc=0; /* argument counter */
|
|
arg=-1; /* argument index; no valid argument yet */
|
|
do {
|
|
switch (*start) {
|
|
case sSTARTREORDER:
|
|
nest++;
|
|
start++;
|
|
break;
|
|
case sENDREORDER:
|
|
nest--;
|
|
start++;
|
|
break;
|
|
default:
|
|
if ((*start & sEXPRSTART)==sEXPRSTART) {
|
|
if (nest==1) {
|
|
if (arg>=0)
|
|
stack[arg].end=start-1; /* finish previous argument */
|
|
arg=(unsigned char)*start - sEXPRSTART;
|
|
stack[arg].start=start+1;
|
|
if (arg>=argc)
|
|
argc=arg+1;
|
|
} /* if */
|
|
start++;
|
|
} else {
|
|
start+=strlen(start)+1;
|
|
} /* if */
|
|
} /* switch */
|
|
} while (nest); /* enddo */
|
|
if (arg>=0)
|
|
stack[arg].end=start-1; /* finish previous argument */
|
|
while (argc>0) {
|
|
argc--;
|
|
stgstring(stack[argc].start,stack[argc].end);
|
|
} /* while */
|
|
free(stack);
|
|
} else {
|
|
ptr=start;
|
|
while (ptr<end && *ptr!=sSTARTREORDER)
|
|
ptr+=strlen(ptr)+1;
|
|
stgopt(start,ptr);
|
|
start=ptr;
|
|
} /* if */
|
|
} /* while */
|
|
}
|
|
|
|
/* stgdel
|
|
*
|
|
* Scraps code from the staging buffer by resetting "stgidx" to "index".
|
|
*
|
|
* Global references: stgidx (altered)
|
|
* staging (reffered to only)
|
|
*/
|
|
SC_FUNC void stgdel(int index,cell code_index)
|
|
{
|
|
if (staging) {
|
|
stgidx=index;
|
|
code_idx=code_index;
|
|
} /* if */
|
|
}
|
|
|
|
SC_FUNC int stgget(int *index,cell *code_index)
|
|
{
|
|
if (staging) {
|
|
*index=stgidx;
|
|
*code_index=code_idx;
|
|
} /* if */
|
|
return staging;
|
|
}
|
|
|
|
/* stgset
|
|
*
|
|
* Sets staging on or off. If it's turned off, the staging buffer must be
|
|
* initialized to an empty string. If it's turned on, the routine makes sure
|
|
* the index ("stgidx") is set to 0 (it should already be 0).
|
|
*
|
|
* Global references: staging (altered)
|
|
* stgidx (altered)
|
|
* stgbuf (contents altered)
|
|
*/
|
|
SC_FUNC void stgset(int onoff)
|
|
{
|
|
staging=onoff;
|
|
if (staging){
|
|
assert(stgidx==0);
|
|
stgidx=0;
|
|
CHECK_STGBUFFER(stgidx);
|
|
/* write any contents that may be put in the buffer by stgwrite()
|
|
* when "staging" was 0
|
|
*/
|
|
if (strlen(stgbuf)>0)
|
|
filewrite(stgbuf);
|
|
} /* if */
|
|
stgbuf[0]='\0';
|
|
}
|
|
|
|
/* phopt_init
|
|
* Initialize all sequence strings of the peehole optimizer. The strings
|
|
* are embedded in the .EXE file in compressed format, here we expand
|
|
* them (and allocate memory for the sequences).
|
|
*/
|
|
static SEQUENCE *sequences = sequences_cmp;
|
|
|
|
SC_FUNC int phopt_init(void)
|
|
{
|
|
return TRUE;
|
|
}
|
|
|
|
SC_FUNC int phopt_cleanup(void)
|
|
{
|
|
return FALSE;
|
|
}
|
|
|
|
#define MAX_OPT_VARS 4
|
|
#define MAX_OPT_CAT 4 /* max. values that are concatenated */
|
|
#if sNAMEMAX > (PAWN_CELL_SIZE/4) * MAX_OPT_CAT
|
|
#define MAX_ALIAS sNAMEMAX
|
|
#else
|
|
#define MAX_ALIAS (PAWN_CELL_SIZE/4) * MAX_OPT_CAT
|
|
#endif
|
|
|
|
static int matchsequence(const char *start,const char *end,const char *pattern,
|
|
char symbols[MAX_OPT_VARS][MAX_ALIAS+1],
|
|
int *match_length)
|
|
{
|
|
int var,i;
|
|
char str[MAX_ALIAS+1];
|
|
const char *start_org=start;
|
|
cell value;
|
|
char *ptr;
|
|
|
|
*match_length=0;
|
|
for (var=0; var<MAX_OPT_VARS; var++)
|
|
symbols[var][0]='\0';
|
|
|
|
while (*start=='\t' || *start==' ')
|
|
start++;
|
|
while (*pattern) {
|
|
if (start>=end)
|
|
return FALSE;
|
|
switch (*pattern) {
|
|
case '%': /* new "symbol" */
|
|
pattern++;
|
|
assert(isdigit(*pattern));
|
|
var=atoi(pattern) - 1;
|
|
assert(var>=0 && var<MAX_OPT_VARS);
|
|
assert(*start=='-' || alphanum(*start));
|
|
for (i=0; start<end && (*start=='-' || *start=='+' || alphanum(*start)); i++,start++) {
|
|
assert(i<=MAX_ALIAS);
|
|
str[i]=*start;
|
|
} /* for */
|
|
str[i]='\0';
|
|
if (symbols[var][0]!='\0') {
|
|
if (strcmp(symbols[var],str)!=0)
|
|
return FALSE; /* symbols should be identical */
|
|
} else {
|
|
strcpy(symbols[var],str);
|
|
} /* if */
|
|
break;
|
|
case '-':
|
|
value=-strtol(pattern+1,(char **)&pattern,16);
|
|
ptr=itoh((ucell)value);
|
|
while (*ptr!='\0') {
|
|
if (tolower(*start) != tolower(*ptr))
|
|
return FALSE;
|
|
start++;
|
|
ptr++;
|
|
} /* while */
|
|
pattern--; /* there is an increment following at the end of the loop */
|
|
break;
|
|
case ' ':
|
|
if (*start!='\t' && *start!=' ')
|
|
return FALSE;
|
|
while ((start<end && *start=='\t') || *start==' ')
|
|
start++;
|
|
break;
|
|
case '!':
|
|
while ((start<end && *start=='\t') || *start==' ')
|
|
start++; /* skip trailing white space */
|
|
if (*start!='\n')
|
|
return FALSE;
|
|
assert(*(start+1)=='\0');
|
|
start+=2; /* skip '\n' and '\0' */
|
|
if (*(pattern+1)!='\0')
|
|
while ((start<end && *start=='\t') || *start==' ')
|
|
start++; /* skip leading white space of next instruction */
|
|
break;
|
|
default:
|
|
if (tolower(*start) != tolower(*pattern))
|
|
return FALSE;
|
|
start++;
|
|
} /* switch */
|
|
pattern++;
|
|
} /* while */
|
|
|
|
*match_length=(int)(start-start_org);
|
|
return TRUE;
|
|
}
|
|
|
|
static char *replacesequence(const char *pattern,char symbols[MAX_OPT_VARS][MAX_ALIAS+1],int *repl_length)
|
|
{
|
|
const char *lptr;
|
|
int var;
|
|
char *buffer;
|
|
|
|
/* calculate the length of the new buffer
|
|
* this is the length of the pattern plus the length of all symbols (note
|
|
* that the same symbol may occur multiple times in the pattern) plus
|
|
* line endings and startings ('\t' to start a line and '\n\0' to end one)
|
|
*/
|
|
assert(repl_length!=NULL);
|
|
*repl_length=0;
|
|
lptr=pattern;
|
|
while (*lptr) {
|
|
switch (*lptr) {
|
|
case '%':
|
|
lptr++; /* skip '%' */
|
|
assert(isdigit(*lptr));
|
|
var=atoi(lptr) - 1;
|
|
assert(var>=0 && var<MAX_OPT_VARS);
|
|
assert(symbols[var][0]!='\0'); /* variable should be defined */
|
|
*repl_length+=strlen(symbols[var]);
|
|
break;
|
|
case '!':
|
|
*repl_length+=3; /* '\t', '\n' & '\0' */
|
|
break;
|
|
default:
|
|
*repl_length+=1;
|
|
} /* switch */
|
|
lptr++;
|
|
} /* while */
|
|
|
|
/* allocate a buffer to replace the sequence in */
|
|
if ((buffer=(char*)malloc(*repl_length))==NULL)
|
|
return (char*)error(103);
|
|
|
|
/* replace the pattern into this temporary buffer */
|
|
char *ptr=buffer;
|
|
*ptr++='\t'; /* the "replace" patterns do not have tabs */
|
|
while (*pattern) {
|
|
assert((int)(ptr-buffer)<*repl_length);
|
|
switch (*pattern) {
|
|
case '%':
|
|
/* write out the symbol */
|
|
pattern++;
|
|
assert(isdigit(*pattern));
|
|
var=atoi(pattern) - 1;
|
|
assert(var>=0 && var<MAX_OPT_VARS);
|
|
assert(symbols[var][0]!='\0'); /* variable should be defined */
|
|
strcpy(ptr,symbols[var]);
|
|
ptr+=strlen(symbols[var]);
|
|
break;
|
|
case '!':
|
|
/* finish the line, optionally start the next line with an indent */
|
|
*ptr++='\n';
|
|
*ptr++='\0';
|
|
if (*(pattern+1)!='\0')
|
|
*ptr++='\t';
|
|
break;
|
|
default:
|
|
*ptr++=*pattern;
|
|
} /* switch */
|
|
pattern++;
|
|
} /* while */
|
|
|
|
assert((int)(ptr-buffer)==*repl_length);
|
|
return buffer;
|
|
}
|
|
|
|
static void strreplace(char *dest,char *replace,int sub_length,int repl_length,int dest_length)
|
|
{
|
|
int offset=sub_length-repl_length;
|
|
if (offset>0) { /* delete a section */
|
|
memmove(dest,dest+offset,dest_length-offset);
|
|
memset(dest+dest_length-offset,0xcc,offset); /* not needed, but for cleanlyness */
|
|
} else if (offset<0) { /* insert a section */
|
|
memmove(dest-offset, dest, dest_length);
|
|
} /* if */
|
|
memcpy(dest, replace, repl_length);
|
|
}
|
|
|
|
/* stgopt
|
|
*
|
|
* Optimizes the staging buffer by checking for series of instructions that
|
|
* can be coded more compact. The routine expects the lines in the staging
|
|
* buffer to be separated with '\n' and '\0' characters.
|
|
*
|
|
* The longest sequences should probably be checked first.
|
|
*/
|
|
|
|
static void stgopt(char *start,char *end)
|
|
{
|
|
char symbols[MAX_OPT_VARS][MAX_ALIAS+1];
|
|
int seq,match_length,repl_length;
|
|
int matches;
|
|
char *debut=start;
|
|
|
|
assert(sequences!=NULL);
|
|
/* do not match anything if debug-level is maximum */
|
|
if ((sc_debug & sNOOPTIMIZE)==0 && sc_status==statWRITE) {
|
|
do {
|
|
matches=0;
|
|
start=debut;
|
|
while (start<end) {
|
|
seq=0;
|
|
while (sequences[seq].find!=NULL) {
|
|
assert(seq>=0);
|
|
if (matchsequence(start,end,sequences[seq].find,symbols,&match_length)) {
|
|
char *replace=replacesequence(sequences[seq].replace,symbols,&repl_length);
|
|
/* If the replacement is bigger than the original section, we may need
|
|
* to "grow" the staging buffer. This is quite complex, due to the
|
|
* re-ordering of expressions that can also happen in the staging
|
|
* buffer. In addition, it should not happen: the peephole optimizer
|
|
* must replace sequences with *shorter* sequences, not longer ones.
|
|
* So, I simply forbid sequences that are longer than the ones they
|
|
* are meant to replace.
|
|
*/
|
|
assert(match_length>=repl_length);
|
|
if (match_length>=repl_length) {
|
|
strreplace(start,replace,match_length,repl_length,(int)(end-start));
|
|
end-=match_length-repl_length;
|
|
free(replace);
|
|
code_idx-=sequences[seq].savesize;
|
|
seq=0; /* restart search for matches */
|
|
matches++;
|
|
} else {
|
|
/* actually, we should never get here (match_length<repl_length) */
|
|
assert(0);
|
|
seq++;
|
|
} /* if */
|
|
} else {
|
|
seq++;
|
|
} /* if */
|
|
} /* while */
|
|
assert(sequences[seq].find==NULL);
|
|
start += strlen(start) + 1; /* to next string */
|
|
} /* while (start<end) */
|
|
} while (matches>0);
|
|
} /* if ((sc_debug & sNOOPTIMIZE)==0 && sc_status==statWRITE) */
|
|
|
|
for (start=debut; start<end; start+=strlen(start)+1)
|
|
filewrite(start);
|
|
}
|
|
|
|
#undef SCPACK_TABLE
|