Compiler: Use hash table for global name lookups.

Imported from SM: https://bugs.alliedmods.net/show_bug.cgi?id=4496.
This commit is contained in:
Arkshine 2014-08-17 13:23:05 +02:00
parent a876962405
commit f60b00ee71
9 changed files with 298 additions and 23 deletions

View File

@ -75,6 +75,7 @@ binary.sources = [
'libpawnc.c', 'libpawnc.c',
'prefix.c', 'prefix.c',
'memfile.c', 'memfile.c',
'sp_symhash.c',
] ]
if builder.target_platform == 'windows': if builder.target_platform == 'windows':

View File

@ -51,7 +51,7 @@
</PropertyGroup> </PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile> <ClCompile>
<PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBPC300_EXPORTS;PAWNC_DLL;PAWN_CELL_SIZE=32;NO_MAIN;_CRT_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBPC300_EXPORTS;PAWNC_DLL;PAWN_CELL_SIZE=32;NO_MAIN;_CRT_SECURE_NO_DEPRECATE;HAVE_STDINT_H;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary> <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<RuntimeTypeInfo>false</RuntimeTypeInfo> <RuntimeTypeInfo>false</RuntimeTypeInfo>
<PrecompiledHeader> <PrecompiledHeader>
@ -71,7 +71,7 @@
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile> <ClCompile>
<Optimization>Disabled</Optimization> <Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;LIBPC300_EXPORTS;PAWNC_DLL;PAWN_CELL_SIZE=32;NO_MAIN;_CRT_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;LIBPC300_EXPORTS;PAWNC_DLL;PAWN_CELL_SIZE=32;NO_MAIN;_CRT_SECURE_NO_DEPRECATE;HAVE_STDINT_H;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>true</MinimalRebuild> <MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>StackFrameRuntimeCheck</BasicRuntimeChecks> <BasicRuntimeChecks>StackFrameRuntimeCheck</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary> <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
@ -105,11 +105,13 @@
<ClCompile Include="scmemfil.c" /> <ClCompile Include="scmemfil.c" />
<ClCompile Include="scstate.c" /> <ClCompile Include="scstate.c" />
<ClCompile Include="scvars.c" /> <ClCompile Include="scvars.c" />
<ClCompile Include="sp_symhash.c" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="amx.h" /> <ClInclude Include="amx.h" />
<ClInclude Include="memfile.h" /> <ClInclude Include="memfile.h" />
<ClInclude Include="sc.h" /> <ClInclude Include="sc.h" />
<ClInclude Include="sp_symhash.h" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ResourceCompile Include="libpawnc.rc" /> <ResourceCompile Include="libpawnc.rc" />

View File

@ -60,6 +60,9 @@
<ClCompile Include="scvars.c"> <ClCompile Include="scvars.c">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="sp_symhash.c">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="amx.h"> <ClInclude Include="amx.h">
@ -71,6 +74,9 @@
<ClInclude Include="sc.h"> <ClInclude Include="sc.h">
<Filter>Header Files</Filter> <Filter>Header Files</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="sp_symhash.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ResourceCompile Include="libpawnc.rc"> <ResourceCompile Include="libpawnc.rc">

View File

@ -545,7 +545,6 @@ SC_FUNC void delete_symbol(symbol *root,symbol *sym);
SC_FUNC void delete_symbols(symbol *root,int level,int del_labels,int delete_functions); SC_FUNC void delete_symbols(symbol *root,int level,int del_labels,int delete_functions);
SC_FUNC int refer_symbol(symbol *entry,symbol *bywhom); SC_FUNC int refer_symbol(symbol *entry,symbol *bywhom);
SC_FUNC void markusage(symbol *sym,int usage); SC_FUNC void markusage(symbol *sym,int usage);
SC_FUNC uint32_t namehash(const char *name);
SC_FUNC symbol *findglb(const char *name); SC_FUNC symbol *findglb(const char *name);
SC_FUNC symbol *findloc(const char *name); SC_FUNC symbol *findloc(const char *name);
SC_FUNC symbol *findconst(const char *name); SC_FUNC symbol *findconst(const char *name);
@ -741,6 +740,8 @@ SC_FUNC void state_conflict(symbol *root);
/* external variables (defined in scvars.c) */ /* external variables (defined in scvars.c) */
#if !defined SC_SKIP_VDECL #if !defined SC_SKIP_VDECL
typedef struct HashTable HashTable;
SC_VDECL struct HashTable *sp_Globals;
SC_VDECL symbol loctab; /* local symbol table */ SC_VDECL symbol loctab; /* local symbol table */
SC_VDECL symbol glbtab; /* global symbol table */ SC_VDECL symbol glbtab; /* global symbol table */
SC_VDECL cell *litq; /* the literal queue */ SC_VDECL cell *litq; /* the literal queue */

View File

@ -65,6 +65,8 @@
#include <time.h> #include <time.h>
#include "sc.h" #include "sc.h"
#include "sp_symhash.h"
#define VERSION_STR "3.0.3367-amxx" #define VERSION_STR "3.0.3367-amxx"
#define VERSION_INT 0x30A #define VERSION_INT 0x30A
@ -470,6 +472,10 @@ int pc_compile(int argc, char *argv[])
if ((jmpcode=setjmp(errbuf))!=0) if ((jmpcode=setjmp(errbuf))!=0)
goto cleanup; goto cleanup;
sp_Globals = NewHashTable();
if (!sp_Globals)
error(123);
/* allocate memory for fixed tables */ /* allocate memory for fixed tables */
inpfname=(char*)malloc(_MAX_PATH); inpfname=(char*)malloc(_MAX_PATH);
if (inpfname==NULL) if (inpfname==NULL)
@ -745,6 +751,7 @@ cleanup:
delete_symbols(&loctab,0,TRUE,TRUE); /* delete local variables if not yet delete_symbols(&loctab,0,TRUE,TRUE); /* delete local variables if not yet
* done (i.e. on a fatal error) */ * done (i.e. on a fatal error) */
delete_symbols(&glbtab,0,TRUE,TRUE); delete_symbols(&glbtab,0,TRUE,TRUE);
DestroyHashTable(sp_Globals);
delete_consttable(&tagname_tab); delete_consttable(&tagname_tab);
delete_consttable(&libname_tab); delete_consttable(&libname_tab);
delete_consttable(&sc_automaton_tab); delete_consttable(&sc_automaton_tab);
@ -2960,8 +2967,10 @@ static int operatoradjust(int opertok,symbol *sym,char *opername,int resulttag)
refer_symbol(sym,oldsym->refer[i]); refer_symbol(sym,oldsym->refer[i]);
delete_symbol(&glbtab,oldsym); delete_symbol(&glbtab,oldsym);
} /* if */ } /* if */
RemoveFromHashTable(sp_Globals, sym);
strcpy(sym->name,tmpname); strcpy(sym->name,tmpname);
sym->hash=namehash(sym->name);/* calculate new hash */ sym->hash=NameHash(sym->name);/* calculate new hash */
AddToHashTable(sp_Globals, sym);
/* operators should return a value, except the '~' operator */ /* operators should return a value, except the '~' operator */
if (opertok!='~') if (opertok!='~')

View File

@ -29,6 +29,7 @@
#if defined LINUX || defined __FreeBSD__ || defined __OpenBSD__ || defined __APPLE__ #if defined LINUX || defined __FreeBSD__ || defined __OpenBSD__ || defined __APPLE__
#include <sclinux.h> #include <sclinux.h>
#endif #endif
#include "sp_symhash.h"
#if defined FORTIFY #if defined FORTIFY
#include "fortify.h" #include "fortify.h"
@ -2370,6 +2371,7 @@ SC_FUNC int ishex(char c)
static symbol *add_symbol(symbol *root,symbol *entry,int sort) static symbol *add_symbol(symbol *root,symbol *entry,int sort)
{ {
symbol *newsym; symbol *newsym;
int global = root==&glbtab;
if (sort) if (sort)
while (root->next!=NULL && strcmp(entry->name,root->next->name)>0) while (root->next!=NULL && strcmp(entry->name,root->next->name)>0)
@ -2382,6 +2384,8 @@ static symbol *add_symbol(symbol *root,symbol *entry,int sort)
memcpy(newsym,entry,sizeof(symbol)); memcpy(newsym,entry,sizeof(symbol));
newsym->next=root->next; newsym->next=root->next;
root->next=newsym; root->next=newsym;
if (global)
AddToHashTable(sp_Globals, newsym);
return newsym; return newsym;
} }
@ -2426,6 +2430,7 @@ static void free_symbol(symbol *sym)
SC_FUNC void delete_symbol(symbol *root,symbol *sym) SC_FUNC void delete_symbol(symbol *root,symbol *sym)
{ {
symbol *origRoot = root;
/* find the symbol and its predecessor /* find the symbol and its predecessor
* (this function assumes that you will never delete a symbol that is not * (this function assumes that you will never delete a symbol that is not
* in the table pointed at by "root") * in the table pointed at by "root")
@ -2436,6 +2441,9 @@ SC_FUNC void delete_symbol(symbol *root,symbol *sym)
assert(root!=NULL); assert(root!=NULL);
} /* while */ } /* while */
if (origRoot==&glbtab)
RemoveFromHashTable(sp_Globals, sym);
/* unlink it, then free it */ /* unlink it, then free it */
root->next=sym->next; root->next=sym->next;
free_symbol(sym); free_symbol(sym);
@ -2453,6 +2461,7 @@ SC_FUNC int get_actual_compound(symbol *sym)
SC_FUNC void delete_symbols(symbol *root,int level,int delete_labels,int delete_functions) SC_FUNC void delete_symbols(symbol *root,int level,int delete_labels,int delete_functions)
{ {
symbol *origRoot=root;
symbol *sym,*parent_sym; symbol *sym,*parent_sym;
constvalue *stateptr; constvalue *stateptr;
int mustdelete=0; int mustdelete=0;
@ -2506,6 +2515,8 @@ SC_FUNC void delete_symbols(symbol *root,int level,int delete_labels,int delete_
break; break;
} /* switch */ } /* switch */
if (mustdelete) { if (mustdelete) {
if (origRoot == &glbtab)
RemoveFromHashTable(sp_Globals, sym);
root->next=sym->next; root->next=sym->next;
free_symbol(sym); free_symbol(sym);
} else { } else {
@ -2530,24 +2541,10 @@ SC_FUNC void delete_symbols(symbol *root,int level,int delete_labels,int delete_
} /* if */ } /* if */
} }
/* The purpose of the hash is to reduce the frequency of a "name"
* comparison (which is costly). There is little interest in avoiding
* clusters in similar names, which is why this function is plain simple.
*/
SC_FUNC uint32_t namehash(const char *name)
{
const unsigned char *ptr=(const unsigned char *)name;
int len=strlen(name);
if (len==0)
return 0L;
assert(len<256);
return (len<<24Lu) + (ptr[0]<<16Lu) + (ptr[len-1]<<8Lu) + (ptr[len>>1Lu]);
}
static symbol *find_symbol(const symbol *root,const char *name,int fnumber,int includechildren) static symbol *find_symbol(const symbol *root,const char *name,int fnumber,int includechildren)
{ {
symbol *ptr=root->next; symbol *ptr=root->next;
unsigned long hash=namehash(name); unsigned long hash=NameHash(name);
while (ptr!=NULL) { while (ptr!=NULL) {
if (hash==ptr->hash && strcmp(name,ptr->name)==0 if (hash==ptr->hash && strcmp(name,ptr->name)==0
&& (ptr->parent==NULL || includechildren) && (ptr->parent==NULL || includechildren)
@ -2661,7 +2658,7 @@ SC_FUNC symbol *findconst(const char *name)
sym=find_symbol(&loctab,name,-1,TRUE); /* try local symbols first */ sym=find_symbol(&loctab,name,-1,TRUE); /* try local symbols first */
if (sym==NULL || sym->ident!=iCONSTEXPR) /* not found, or not a constant */ if (sym==NULL || sym->ident!=iCONSTEXPR) /* not found, or not a constant */
sym=find_symbol(&glbtab,name,fcurrent,TRUE); sym=FindInHashTable(sp_Globals,name,fcurrent);
if (sym==NULL || sym->ident!=iCONSTEXPR) if (sym==NULL || sym->ident!=iCONSTEXPR)
return NULL; return NULL;
assert(sym->parent==NULL || (sym->usage & uENUMFIELD)!=0); assert(sym->parent==NULL || (sym->usage & uENUMFIELD)!=0);
@ -2702,7 +2699,7 @@ SC_FUNC symbol *addsym(const char *name,cell addr,int ident,int vclass,int tag,i
/* first fill in the entry */ /* first fill in the entry */
strcpy(entry.name,name); strcpy(entry.name,name);
entry.hash=namehash(name); entry.hash=NameHash(name);
entry.addr=addr; entry.addr=addr;
entry.codeaddr=code_idx; entry.codeaddr=code_idx;
entry.vclass=(char)vclass; entry.vclass=(char)vclass;
@ -2723,8 +2720,7 @@ SC_FUNC symbol *addsym(const char *name,cell addr,int ident,int vclass,int tag,i
/* then insert it in the list */ /* then insert it in the list */
if (vclass==sGLOBAL) if (vclass==sGLOBAL)
return add_symbol(&glbtab,&entry,TRUE); return add_symbol(&glbtab,&entry,TRUE);
else return add_symbol(&loctab, &entry, FALSE);
return add_symbol(&loctab,&entry,FALSE);
} }
SC_FUNC symbol *addvariable(const char *name,cell addr,int ident,int vclass,int tag, SC_FUNC symbol *addvariable(const char *name,cell addr,int ident,int vclass,int tag,

View File

@ -24,6 +24,7 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> /* for _MAX_PATH */ #include <stdlib.h> /* for _MAX_PATH */
#include "sc.h" #include "sc.h"
#include "sp_symhash.h"
/* global variables /* global variables
* *
@ -96,6 +97,8 @@ SC_VDEFINE FILE *outf = NULL; /* (intermediate) text file written to */
SC_VDEFINE jmp_buf errbuf; SC_VDEFINE jmp_buf errbuf;
SC_VDEFINE HashTable *sp_Globals = NULL;
#if !defined SC_LIGHT #if !defined SC_LIGHT
SC_VDEFINE int sc_makereport=FALSE; /* generate a cross-reference report */ SC_VDEFINE int sc_makereport=FALSE; /* generate a cross-reference report */
#endif #endif

View File

@ -0,0 +1,229 @@
/* vim: set ts=4 sw=4 tw=99 et: */
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "sc.h"
#include "sp_symhash.h"
SC_FUNC uint32_t
NameHash(const char *str)
{
size_t len = strlen(str);
const uint8_t *data = (uint8_t *)str;
#undef get16bits
#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
|| defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
#define get16bits(d) (*((const uint16_t *) (d)))
#endif
#if !defined (get16bits)
#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
+(uint32_t)(((const uint8_t *)(d))[0]) )
#endif
uint32_t hash = len, tmp;
int rem;
if (len <= 0 || data == NULL) return 0;
rem = len & 3;
len >>= 2;
/* Main loop */
for (;len > 0; len--) {
hash += get16bits (data);
tmp = (get16bits (data+2) << 11) ^ hash;
hash = (hash << 16) ^ tmp;
data += 2*sizeof (uint16_t);
hash += hash >> 11;
}
/* Handle end cases */
switch (rem) {
case 3: hash += get16bits (data);
hash ^= hash << 16;
hash ^= data[sizeof (uint16_t)] << 18;
hash += hash >> 11;
break;
case 2: hash += get16bits (data);
hash ^= hash << 11;
hash += hash >> 17;
break;
case 1: hash += *data;
hash ^= hash << 10;
hash += hash >> 1;
}
/* Force "avalanching" of final 127 bits */
hash ^= hash << 3;
hash += hash >> 5;
hash ^= hash << 4;
hash += hash >> 17;
hash ^= hash << 25;
hash += hash >> 6;
return hash;
#undef get16bits
}
SC_FUNC HashTable *NewHashTable()
{
HashTable *ht = (HashTable*)malloc(sizeof(HashTable));
if (!ht)
return ht;
ht->buckets = (HashEntry **)calloc(32, sizeof(HashEntry *));
if (!ht->buckets) {
free(ht);
return NULL;
}
ht->nbuckets = 32;
ht->nused = 0;
ht->bucketmask = 32 - 1;
return ht;
}
SC_FUNC void
DestroyHashTable(HashTable *ht)
{
uint32_t i;
if (!ht)
return;
for (i = 0; i < ht->nbuckets; i++) {
HashEntry *he = ht->buckets[i];
while (he != NULL) {
HashEntry *next = he->next;
free(he);
he = next;
}
}
free(ht->buckets);
free(ht);
}
SC_FUNC symbol *
FindTaggedInHashTable(HashTable *ht, const char *name, int fnumber,
int *cmptag)
{
int count=0;
symbol *firstmatch=NULL;
uint32_t hash = NameHash(name);
uint32_t bucket = hash & ht->bucketmask;
HashEntry *he = ht->buckets[bucket];
assert(cmptag!=NULL);
while (he != NULL) {
symbol *sym = he->sym;
if ((sym->parent==NULL || sym->ident==iCONSTEXPR) &&
(sym->fnumber<0 || sym->fnumber==fnumber) &&
(strcmp(sym->name, name) == 0))
{
/* return closest match or first match; count number of matches */
if (firstmatch==NULL)
firstmatch=sym;
if (*cmptag==0)
count++;
if (*cmptag==sym->tag) {
*cmptag=1; /* good match found, set number of matches to 1 */
return sym;
}
}
he = he->next;
}
if (firstmatch!=NULL)
*cmptag=count;
return firstmatch;
}
SC_FUNC symbol *
FindInHashTable(HashTable *ht, const char *name, int fnumber)
{
uint32_t hash = NameHash(name);
uint32_t bucket = hash & ht->bucketmask;
HashEntry *he = ht->buckets[bucket];
while (he != NULL) {
symbol *sym = he->sym;
if ((sym->parent==NULL || sym->ident==iCONSTEXPR) &&
(sym->fnumber<0 || sym->fnumber==fnumber) &&
(strcmp(sym->name, name) == 0))
{
return sym;
}
he = he->next;
}
return NULL;
}
static void
ResizeHashTable(HashTable *ht)
{
uint32_t i;
uint32_t xnbuckets = ht->nbuckets * 2;
uint32_t xbucketmask = xnbuckets - 1;
HashEntry **xbuckets = (HashEntry **)calloc(xnbuckets, sizeof(HashEntry*));
if (!xbuckets)
return;
for (i = 0; i < ht->nbuckets; i++) {
HashEntry *he = ht->buckets[i];
while (he != NULL) {
HashEntry *next = he->next;
uint32_t bucket = he->sym->hash & xbucketmask;
he->next = xbuckets[bucket];
xbuckets[bucket] = he;
he = next;
}
}
free(ht->buckets);
ht->buckets = xbuckets;
ht->nbuckets = xnbuckets;
ht->bucketmask = xbucketmask;
}
SC_FUNC void
AddToHashTable(HashTable *ht, symbol *sym)
{
uint32_t bucket = sym->hash & ht->bucketmask;
HashEntry **hep, *he;
hep = &ht->buckets[bucket];
while (*hep) {
assert((*hep)->sym != sym);
hep = &(*hep)->next;
}
he = (HashEntry *)malloc(sizeof(HashEntry));
if (!he)
error(163);
he->sym = sym;
he->next = NULL;
*hep = he;
ht->nused++;
if (ht->nused > ht->nbuckets && ht->nbuckets <= INT_MAX / 2)
ResizeHashTable(ht);
}
SC_FUNC void
RemoveFromHashTable(HashTable *ht, symbol *sym)
{
uint32_t bucket = sym->hash & ht->bucketmask;
HashEntry **hep = &ht->buckets[bucket];
HashEntry *he = *hep;
while (he != NULL) {
if (he->sym == sym) {
*hep = he->next;
free(he);
ht->nused--;
return;
}
hep = &he->next;
he = *hep;
}
assert(0);
}

View File

@ -0,0 +1,28 @@
/* vim: set ts=4 sw=4 tw=99 et: */
#ifndef _INCLUDE_SPCOMP_SYMHASH_H_
#define _INCLUDE_SPCOMP_SYMHASH_H_
SC_FUNC uint32_t NameHash(const char *str);
typedef struct HashEntry {
symbol *sym;
struct HashEntry *next;
} HashEntry;
struct HashTable {
uint32_t nbuckets;
uint32_t nused;
uint32_t bucketmask;
HashEntry **buckets;
};
SC_FUNC HashTable *NewHashTable();
SC_FUNC void DestroyHashTable(HashTable *ht);
SC_FUNC void AddToHashTable(HashTable *ht, symbol *sym);
SC_FUNC void RemoveFromHashTable(HashTable *ht, symbol *sym);
SC_FUNC symbol *FindInHashTable(HashTable *ht, const char *name, int fnumber);
SC_FUNC symbol *FindTaggedInHashTable(HashTable *ht, const char *name, int fnumber,
int *cmptag);
#endif /* _INCLUDE_SPCOMP_SYMHASH_H_ */