/* compress.c -- Byte Pair Encoding compression */ /* Copyright 1996 Philip Gage */ /* This program appeared in the September 1997 issue of * C/C++ Users Journal. The original source code may still * be found at the web site of the magazine (www.cuj.com). * * It has been modified by me (Thiadmer Riemersma) to * compress only a section of the input file and to store * the compressed output along with the input as "C" strings. * * Compiling instructions: * Borland C++ 16-bit (large memory model is required): * bcc -ml scpack.c * * Watcom C/C++ 32-bit: * wcl386 scpack.c * * GNU C (Linux), 32-bit: * gcc scpack.c -o scpack */ #include #include #include #include #include #if UINT_MAX > 0xFFFFU #define MAXSIZE 1024*1024L #else #define MAXSIZE UINT_MAX /* Input file buffer size */ #endif #define HASHSIZE 8192 /* Hash table size, power of 2 */ #define THRESHOLD 3 /* Increase for speed, min 3 */ #define START_TOKEN "#ifdef SCPACK" /* start reading the buffer here */ #define NAME_TOKEN "#define SCPACK_TABLE" #define SEP_TOKEN "#define SCPACK_SEPARATOR" #define TERM_TOKEN "#define SCPACK_TERMINATOR" #define TEMPFILE "~SCPACK.TMP" static char tablename[32+1] = "scpack_table"; static char separator[16]=","; static char terminator[16]=""; int compress(unsigned char *buffer, unsigned buffersize, unsigned char pairtable[128][2]) { unsigned char *left, *right, *count; unsigned char a, b, bestcount; unsigned i, j, index, bestindex, code=128; /* Dynamically allocate buffers and check for errors */ left = (unsigned char *)malloc(HASHSIZE); right = (unsigned char *)malloc(HASHSIZE); count = (unsigned char *)malloc(HASHSIZE); if (left==NULL || right==NULL || count==NULL) { printf("Error allocating memory\n"); exit(1); } /* Check for errors */ for (i=0; i 127) { printf("This program works only on text files (7-bit ASCII)\n"); exit(1); } memset(pairtable, 0, 128*2*sizeof(char)); do { /* Replace frequent pairs with bytes 128..255 */ /* Enter counts of all byte pairs into hash table */ memset(count,0,HASHSIZE); for (i=0; i bestcount) { bestcount = count[i]; bestindex = i; } } /* Compress if enough occurrences of pair */ if (bestcount >= THRESHOLD) { /* Add pair to table using code as index */ a = pairtable[code-128][0] = left[bestindex]; b = pairtable[code-128][1] = right[bestindex]; /* Replace all pair occurrences with unused byte */ for (i=0, j=0; i= 128 || *bufptr == '"' || *bufptr == '\\') fprintf(output, "\\%03o", *bufptr); else fprintf(output, "%c", *bufptr); bufptr++; } /* while */ fprintf(output, "\""); needseparator = 1; bufptr++; /* skip '\0' */ } /* while */ fprintf(output, "%s\n",terminator); bufptr++; /* skip the input file until the #endif section */ while (fgets(str,sizeof str,input)!=NULL) { if (strmatch(str,"#endif",NULL)) { fprintf(output,"%s",str); break; /* done */ } /* if */ } /* while */ } /* while - !feof(input) */ } static void usage(void) { printf("Usage: scpack [output file]\n"); exit(1); } int main(int argc, char **argv) { FILE *in, *out; unsigned char *buffer; unsigned buffersize, orgbuffersize; unsigned char pairtable[128][2]; if (argc < 2 || argc > 3) usage(); if ((in=fopen(argv[1],"rt"))==NULL) { printf("SCPACK: error opening input %s\n",argv[1]); usage(); } /* if */ if (argc == 2) { if ((out=fopen(TEMPFILE,"wt"))==NULL) { printf("SCPACK: error opening temporary file %s\n",TEMPFILE); usage(); } /* if */ } else { if ((out=fopen(argv[2],"wt"))==NULL) { printf("SCPACK: error opening output file %s\n",argv[2]); usage(); } /* if */ } /* if */ buffer = (unsigned char *)malloc(MAXSIZE); if (buffer == NULL) { printf("SCPACK: error allocating memory\n"); return 1; } /* if */ /* 1. read the buffer * 2. compress the buffer * 3. copy the file, insert the compressed buffer */ buffersize = readbuffer(in, buffer); orgbuffersize = buffersize; if (buffersize > 0) { buffersize = compress(buffer, buffersize, pairtable); writefile(in, out, buffer, buffersize, pairtable); printf("SCPACK: compression ratio: %ld%% (%d -> %d)\n", 100L-(100L*buffersize)/orgbuffersize, orgbuffersize, buffersize); } else { printf("SCPACK: no SCPACK section found, nothing to do\n"); } /* if */ fclose(out); fclose(in); /* let the new file replace the old file */ if (buffersize == 0) { if (argc == 2) remove(TEMPFILE); else remove(argv[2]); } else if (argc == 2) { remove(argv[1]); rename(TEMPFILE,argv[1]); } /* if */ return 0; }