From f60b00ee711ded2872e7dde385e71cba116e8a5c Mon Sep 17 00:00:00 2001 From: Arkshine Date: Sun, 17 Aug 2014 13:23:05 +0200 Subject: [PATCH] Compiler: Use hash table for global name lookups. Imported from SM: https://bugs.alliedmods.net/show_bug.cgi?id=4496. --- compiler/libpc300/AMBuilder | 1 + compiler/libpc300/libpc300.vcxproj | 6 +- compiler/libpc300/libpc300.vcxproj.filters | 6 + compiler/libpc300/sc.h | 3 +- compiler/libpc300/sc1.c | 11 +- compiler/libpc300/sc2.c | 34 ++- compiler/libpc300/scvars.c | 3 + compiler/libpc300/sp_symhash.c | 229 +++++++++++++++++++++ compiler/libpc300/sp_symhash.h | 28 +++ 9 files changed, 298 insertions(+), 23 deletions(-) create mode 100644 compiler/libpc300/sp_symhash.c create mode 100644 compiler/libpc300/sp_symhash.h diff --git a/compiler/libpc300/AMBuilder b/compiler/libpc300/AMBuilder index 69c0a028..a2eacea6 100644 --- a/compiler/libpc300/AMBuilder +++ b/compiler/libpc300/AMBuilder @@ -75,6 +75,7 @@ binary.sources = [ 'libpawnc.c', 'prefix.c', 'memfile.c', + 'sp_symhash.c', ] if builder.target_platform == 'windows': diff --git a/compiler/libpc300/libpc300.vcxproj b/compiler/libpc300/libpc300.vcxproj index 126cde7a..5264d198 100644 --- a/compiler/libpc300/libpc300.vcxproj +++ b/compiler/libpc300/libpc300.vcxproj @@ -51,7 +51,7 @@ - WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBPC300_EXPORTS;PAWNC_DLL;PAWN_CELL_SIZE=32;NO_MAIN;_CRT_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions) + WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBPC300_EXPORTS;PAWNC_DLL;PAWN_CELL_SIZE=32;NO_MAIN;_CRT_SECURE_NO_DEPRECATE;HAVE_STDINT_H;%(PreprocessorDefinitions) MultiThreaded false @@ -71,7 +71,7 @@ Disabled - WIN32;_DEBUG;_WINDOWS;_USRDLL;LIBPC300_EXPORTS;PAWNC_DLL;PAWN_CELL_SIZE=32;NO_MAIN;_CRT_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions) + WIN32;_DEBUG;_WINDOWS;_USRDLL;LIBPC300_EXPORTS;PAWNC_DLL;PAWN_CELL_SIZE=32;NO_MAIN;_CRT_SECURE_NO_DEPRECATE;HAVE_STDINT_H;%(PreprocessorDefinitions) true StackFrameRuntimeCheck MultiThreadedDebug @@ -105,11 +105,13 @@ + + diff --git a/compiler/libpc300/libpc300.vcxproj.filters b/compiler/libpc300/libpc300.vcxproj.filters index 058ebb71..8d9c17a0 100644 --- a/compiler/libpc300/libpc300.vcxproj.filters +++ b/compiler/libpc300/libpc300.vcxproj.filters @@ -60,6 +60,9 @@ Source Files + + Source Files + @@ -71,6 +74,9 @@ Header Files + + Header Files + diff --git a/compiler/libpc300/sc.h b/compiler/libpc300/sc.h index eaf3783a..9eb47438 100755 --- a/compiler/libpc300/sc.h +++ b/compiler/libpc300/sc.h @@ -545,7 +545,6 @@ SC_FUNC void delete_symbol(symbol *root,symbol *sym); SC_FUNC void delete_symbols(symbol *root,int level,int del_labels,int delete_functions); SC_FUNC int refer_symbol(symbol *entry,symbol *bywhom); SC_FUNC void markusage(symbol *sym,int usage); -SC_FUNC uint32_t namehash(const char *name); SC_FUNC symbol *findglb(const char *name); SC_FUNC symbol *findloc(const char *name); SC_FUNC symbol *findconst(const char *name); @@ -741,6 +740,8 @@ SC_FUNC void state_conflict(symbol *root); /* external variables (defined in scvars.c) */ #if !defined SC_SKIP_VDECL +typedef struct HashTable HashTable; +SC_VDECL struct HashTable *sp_Globals; SC_VDECL symbol loctab; /* local symbol table */ SC_VDECL symbol glbtab; /* global symbol table */ SC_VDECL cell *litq; /* the literal queue */ diff --git a/compiler/libpc300/sc1.c b/compiler/libpc300/sc1.c index 8fa709fe..ff17d875 100755 --- a/compiler/libpc300/sc1.c +++ b/compiler/libpc300/sc1.c @@ -65,6 +65,8 @@ #include #include "sc.h" +#include "sp_symhash.h" + #define VERSION_STR "3.0.3367-amxx" #define VERSION_INT 0x30A @@ -470,6 +472,10 @@ int pc_compile(int argc, char *argv[]) if ((jmpcode=setjmp(errbuf))!=0) goto cleanup; + sp_Globals = NewHashTable(); + if (!sp_Globals) + error(123); + /* allocate memory for fixed tables */ inpfname=(char*)malloc(_MAX_PATH); if (inpfname==NULL) @@ -745,6 +751,7 @@ cleanup: delete_symbols(&loctab,0,TRUE,TRUE); /* delete local variables if not yet * done (i.e. on a fatal error) */ delete_symbols(&glbtab,0,TRUE,TRUE); + DestroyHashTable(sp_Globals); delete_consttable(&tagname_tab); delete_consttable(&libname_tab); delete_consttable(&sc_automaton_tab); @@ -2960,8 +2967,10 @@ static int operatoradjust(int opertok,symbol *sym,char *opername,int resulttag) refer_symbol(sym,oldsym->refer[i]); delete_symbol(&glbtab,oldsym); } /* if */ + RemoveFromHashTable(sp_Globals, sym); strcpy(sym->name,tmpname); - sym->hash=namehash(sym->name);/* calculate new hash */ + sym->hash=NameHash(sym->name);/* calculate new hash */ + AddToHashTable(sp_Globals, sym); /* operators should return a value, except the '~' operator */ if (opertok!='~') diff --git a/compiler/libpc300/sc2.c b/compiler/libpc300/sc2.c index 7e715645..360d60a7 100755 --- a/compiler/libpc300/sc2.c +++ b/compiler/libpc300/sc2.c @@ -29,6 +29,7 @@ #if defined LINUX || defined __FreeBSD__ || defined __OpenBSD__ || defined __APPLE__ #include #endif +#include "sp_symhash.h" #if defined FORTIFY #include "fortify.h" @@ -2370,6 +2371,7 @@ SC_FUNC int ishex(char c) static symbol *add_symbol(symbol *root,symbol *entry,int sort) { symbol *newsym; + int global = root==&glbtab; if (sort) while (root->next!=NULL && strcmp(entry->name,root->next->name)>0) @@ -2382,6 +2384,8 @@ static symbol *add_symbol(symbol *root,symbol *entry,int sort) memcpy(newsym,entry,sizeof(symbol)); newsym->next=root->next; root->next=newsym; + if (global) + AddToHashTable(sp_Globals, newsym); return newsym; } @@ -2426,6 +2430,7 @@ static void free_symbol(symbol *sym) SC_FUNC void delete_symbol(symbol *root,symbol *sym) { + symbol *origRoot = root; /* find the symbol and its predecessor * (this function assumes that you will never delete a symbol that is not * in the table pointed at by "root") @@ -2436,6 +2441,9 @@ SC_FUNC void delete_symbol(symbol *root,symbol *sym) assert(root!=NULL); } /* while */ + if (origRoot==&glbtab) + RemoveFromHashTable(sp_Globals, sym); + /* unlink it, then free it */ root->next=sym->next; free_symbol(sym); @@ -2453,6 +2461,7 @@ SC_FUNC int get_actual_compound(symbol *sym) SC_FUNC void delete_symbols(symbol *root,int level,int delete_labels,int delete_functions) { + symbol *origRoot=root; symbol *sym,*parent_sym; constvalue *stateptr; int mustdelete=0; @@ -2506,6 +2515,8 @@ SC_FUNC void delete_symbols(symbol *root,int level,int delete_labels,int delete_ break; } /* switch */ if (mustdelete) { + if (origRoot == &glbtab) + RemoveFromHashTable(sp_Globals, sym); root->next=sym->next; free_symbol(sym); } else { @@ -2530,24 +2541,10 @@ SC_FUNC void delete_symbols(symbol *root,int level,int delete_labels,int delete_ } /* if */ } -/* The purpose of the hash is to reduce the frequency of a "name" - * comparison (which is costly). There is little interest in avoiding - * clusters in similar names, which is why this function is plain simple. - */ -SC_FUNC uint32_t namehash(const char *name) -{ - const unsigned char *ptr=(const unsigned char *)name; - int len=strlen(name); - if (len==0) - return 0L; - assert(len<256); - return (len<<24Lu) + (ptr[0]<<16Lu) + (ptr[len-1]<<8Lu) + (ptr[len>>1Lu]); -} - static symbol *find_symbol(const symbol *root,const char *name,int fnumber,int includechildren) { symbol *ptr=root->next; - unsigned long hash=namehash(name); + unsigned long hash=NameHash(name); while (ptr!=NULL) { if (hash==ptr->hash && strcmp(name,ptr->name)==0 && (ptr->parent==NULL || includechildren) @@ -2661,7 +2658,7 @@ SC_FUNC symbol *findconst(const char *name) sym=find_symbol(&loctab,name,-1,TRUE); /* try local symbols first */ if (sym==NULL || sym->ident!=iCONSTEXPR) /* not found, or not a constant */ - sym=find_symbol(&glbtab,name,fcurrent,TRUE); + sym=FindInHashTable(sp_Globals,name,fcurrent); if (sym==NULL || sym->ident!=iCONSTEXPR) return NULL; assert(sym->parent==NULL || (sym->usage & uENUMFIELD)!=0); @@ -2702,7 +2699,7 @@ SC_FUNC symbol *addsym(const char *name,cell addr,int ident,int vclass,int tag,i /* first fill in the entry */ strcpy(entry.name,name); - entry.hash=namehash(name); + entry.hash=NameHash(name); entry.addr=addr; entry.codeaddr=code_idx; entry.vclass=(char)vclass; @@ -2723,8 +2720,7 @@ SC_FUNC symbol *addsym(const char *name,cell addr,int ident,int vclass,int tag,i /* then insert it in the list */ if (vclass==sGLOBAL) return add_symbol(&glbtab,&entry,TRUE); - else - return add_symbol(&loctab,&entry,FALSE); + return add_symbol(&loctab, &entry, FALSE); } SC_FUNC symbol *addvariable(const char *name,cell addr,int ident,int vclass,int tag, diff --git a/compiler/libpc300/scvars.c b/compiler/libpc300/scvars.c index fe9c9020..1c297060 100755 --- a/compiler/libpc300/scvars.c +++ b/compiler/libpc300/scvars.c @@ -24,6 +24,7 @@ #include #include /* for _MAX_PATH */ #include "sc.h" +#include "sp_symhash.h" /* global variables * @@ -96,6 +97,8 @@ SC_VDEFINE FILE *outf = NULL; /* (intermediate) text file written to */ SC_VDEFINE jmp_buf errbuf; +SC_VDEFINE HashTable *sp_Globals = NULL; + #if !defined SC_LIGHT SC_VDEFINE int sc_makereport=FALSE; /* generate a cross-reference report */ #endif diff --git a/compiler/libpc300/sp_symhash.c b/compiler/libpc300/sp_symhash.c new file mode 100644 index 00000000..19291fd2 --- /dev/null +++ b/compiler/libpc300/sp_symhash.c @@ -0,0 +1,229 @@ +/* vim: set ts=4 sw=4 tw=99 et: */ +#include +#include +#include +#include "sc.h" +#include "sp_symhash.h" + +SC_FUNC uint32_t +NameHash(const char *str) +{ + size_t len = strlen(str); + const uint8_t *data = (uint8_t *)str; + #undef get16bits + #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ + || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) + #define get16bits(d) (*((const uint16_t *) (d))) + #endif + #if !defined (get16bits) + #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\ + +(uint32_t)(((const uint8_t *)(d))[0]) ) + #endif + uint32_t hash = len, tmp; + int rem; + + if (len <= 0 || data == NULL) return 0; + + rem = len & 3; + len >>= 2; + + /* Main loop */ + for (;len > 0; len--) { + hash += get16bits (data); + tmp = (get16bits (data+2) << 11) ^ hash; + hash = (hash << 16) ^ tmp; + data += 2*sizeof (uint16_t); + hash += hash >> 11; + } + + /* Handle end cases */ + switch (rem) { + case 3: hash += get16bits (data); + hash ^= hash << 16; + hash ^= data[sizeof (uint16_t)] << 18; + hash += hash >> 11; + break; + case 2: hash += get16bits (data); + hash ^= hash << 11; + hash += hash >> 17; + break; + case 1: hash += *data; + hash ^= hash << 10; + hash += hash >> 1; + } + + /* Force "avalanching" of final 127 bits */ + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; + + return hash; + + #undef get16bits +} + +SC_FUNC HashTable *NewHashTable() +{ + HashTable *ht = (HashTable*)malloc(sizeof(HashTable)); + if (!ht) + return ht; + ht->buckets = (HashEntry **)calloc(32, sizeof(HashEntry *)); + if (!ht->buckets) { + free(ht); + return NULL; + } + ht->nbuckets = 32; + ht->nused = 0; + ht->bucketmask = 32 - 1; + return ht; +} + +SC_FUNC void +DestroyHashTable(HashTable *ht) +{ + uint32_t i; + if (!ht) + return; + for (i = 0; i < ht->nbuckets; i++) { + HashEntry *he = ht->buckets[i]; + while (he != NULL) { + HashEntry *next = he->next; + free(he); + he = next; + } + } + free(ht->buckets); + free(ht); +} + +SC_FUNC symbol * +FindTaggedInHashTable(HashTable *ht, const char *name, int fnumber, + int *cmptag) +{ + int count=0; + symbol *firstmatch=NULL; + uint32_t hash = NameHash(name); + uint32_t bucket = hash & ht->bucketmask; + HashEntry *he = ht->buckets[bucket]; + + assert(cmptag!=NULL); + + while (he != NULL) { + symbol *sym = he->sym; + if ((sym->parent==NULL || sym->ident==iCONSTEXPR) && + (sym->fnumber<0 || sym->fnumber==fnumber) && + (strcmp(sym->name, name) == 0)) + { + /* return closest match or first match; count number of matches */ + if (firstmatch==NULL) + firstmatch=sym; + if (*cmptag==0) + count++; + if (*cmptag==sym->tag) { + *cmptag=1; /* good match found, set number of matches to 1 */ + return sym; + } + } + he = he->next; + } + + if (firstmatch!=NULL) + *cmptag=count; + return firstmatch; +} + +SC_FUNC symbol * +FindInHashTable(HashTable *ht, const char *name, int fnumber) +{ + uint32_t hash = NameHash(name); + uint32_t bucket = hash & ht->bucketmask; + HashEntry *he = ht->buckets[bucket]; + + while (he != NULL) { + symbol *sym = he->sym; + if ((sym->parent==NULL || sym->ident==iCONSTEXPR) && + (sym->fnumber<0 || sym->fnumber==fnumber) && + (strcmp(sym->name, name) == 0)) + { + return sym; + } + he = he->next; + } + + return NULL; +} + +static void +ResizeHashTable(HashTable *ht) +{ + uint32_t i; + uint32_t xnbuckets = ht->nbuckets * 2; + uint32_t xbucketmask = xnbuckets - 1; + HashEntry **xbuckets = (HashEntry **)calloc(xnbuckets, sizeof(HashEntry*)); + if (!xbuckets) + return; + + for (i = 0; i < ht->nbuckets; i++) { + HashEntry *he = ht->buckets[i]; + while (he != NULL) { + HashEntry *next = he->next; + uint32_t bucket = he->sym->hash & xbucketmask; + he->next = xbuckets[bucket]; + xbuckets[bucket] = he; + he = next; + } + } + free(ht->buckets); + ht->buckets = xbuckets; + ht->nbuckets = xnbuckets; + ht->bucketmask = xbucketmask; +} + +SC_FUNC void +AddToHashTable(HashTable *ht, symbol *sym) +{ + uint32_t bucket = sym->hash & ht->bucketmask; + HashEntry **hep, *he; + + hep = &ht->buckets[bucket]; + while (*hep) { + assert((*hep)->sym != sym); + hep = &(*hep)->next; + } + + he = (HashEntry *)malloc(sizeof(HashEntry)); + if (!he) + error(163); + he->sym = sym; + he->next = NULL; + *hep = he; + ht->nused++; + + if (ht->nused > ht->nbuckets && ht->nbuckets <= INT_MAX / 2) + ResizeHashTable(ht); +} + +SC_FUNC void +RemoveFromHashTable(HashTable *ht, symbol *sym) +{ + uint32_t bucket = sym->hash & ht->bucketmask; + HashEntry **hep = &ht->buckets[bucket]; + HashEntry *he = *hep; + + while (he != NULL) { + if (he->sym == sym) { + *hep = he->next; + free(he); + ht->nused--; + return; + } + hep = &he->next; + he = *hep; + } + + assert(0); +} + diff --git a/compiler/libpc300/sp_symhash.h b/compiler/libpc300/sp_symhash.h new file mode 100644 index 00000000..beb0f1e5 --- /dev/null +++ b/compiler/libpc300/sp_symhash.h @@ -0,0 +1,28 @@ +/* vim: set ts=4 sw=4 tw=99 et: */ +#ifndef _INCLUDE_SPCOMP_SYMHASH_H_ +#define _INCLUDE_SPCOMP_SYMHASH_H_ + +SC_FUNC uint32_t NameHash(const char *str); + +typedef struct HashEntry { + symbol *sym; + struct HashEntry *next; +} HashEntry; + +struct HashTable { + uint32_t nbuckets; + uint32_t nused; + uint32_t bucketmask; + HashEntry **buckets; +}; + +SC_FUNC HashTable *NewHashTable(); +SC_FUNC void DestroyHashTable(HashTable *ht); +SC_FUNC void AddToHashTable(HashTable *ht, symbol *sym); +SC_FUNC void RemoveFromHashTable(HashTable *ht, symbol *sym); +SC_FUNC symbol *FindInHashTable(HashTable *ht, const char *name, int fnumber); +SC_FUNC symbol *FindTaggedInHashTable(HashTable *ht, const char *name, int fnumber, + int *cmptag); + +#endif /* _INCLUDE_SPCOMP_SYMHASH_H_ */ +