ae75f9d4e5
- 755 -> 644
444 lines
14 KiB
C
444 lines
14 KiB
C
/* decomp16: decompress 16bit compressed files on a 16bit Intel processor
|
|
*
|
|
* Version 1.3 of 25 Mar 92.
|
|
*
|
|
* This was written by John N. White on 6/30/91 and is Public Domain.
|
|
* Patched to run under news by Will Rose, Feb 92.
|
|
* J N White's (earlier) patches added by Will Rose, 20 Feb 92.
|
|
* Unsigned int increment/wrap bug fixed by Will Rose, 24 Mar 92.
|
|
* Argument bug fixed, stdio generalised by Will Rose, 25 Mar 92.
|
|
*
|
|
* decomp16 can use as as little as 512 bytes of stack; since it forks
|
|
* four additional copies, it's probably worth using minimum stack rather
|
|
* than the 8192 byte Minix default. To reduce memory still further,
|
|
* change BUFSZ below to 256; it is currently set to 1024 for speed. The
|
|
* minimal decomp16 needs about 280k to run in pipe mode (56k per copy).
|
|
*
|
|
* This program acts as a filter:
|
|
* decomp16 < compressed_file > decompressed_file
|
|
* The arguments -0 to -4 run only the corresponding pass.
|
|
* Thus:
|
|
* decomp16 -4 < compressed_file > 3;
|
|
* decomp16 -3 < 3 > 2;
|
|
* decomp16 -2 < 2 > 1;
|
|
* decomp16 -1 < 1 > 0;
|
|
* decomp16 -0 < 0 > decompressed_file
|
|
* will also work, as will connecting the passes by explicit pipes if
|
|
* there is enough memory to do so. File name arguments can also be
|
|
* given directly on the command line.
|
|
*
|
|
* Compress uses a modified LZW compression algorithm. A compressed file
|
|
* is a set of indices into a dictionary of strings. The number of bits
|
|
* used to store each index depends on the number of entries currently
|
|
* in the dictionary. If there are between 257 and 512 entries, 9 bits
|
|
* are used. With 513 entries, 10 bits are used, etc. The initial dictionary
|
|
* consists of 0-255 (which are the corresponding chars) and 256 (which
|
|
* is a special CLEAR code). As each index in the compressed file is read,
|
|
* a new entry is added to the dictionary consisting of the current string
|
|
* with the first char of the next string appended. When the dictionary
|
|
* is full, no further entries are added. If a CLEAR code is received,
|
|
* the dictionary will be completely reset. The first two bytes of the
|
|
* compressed file are a magic number, and the third byte indicates the
|
|
* maximum number of bits, and whether the CLEAR code is used (older versions
|
|
* of compress didn't have CLEAR).
|
|
*
|
|
* This program works by forking four more copies of itself. The five
|
|
* programs form a pipeline. Copy 0 writes to stdout, and forks copy 1
|
|
* to supply its input, which in turn forks and reads from copy 2, etc.
|
|
* This sequence is used so that when the program exits, all writes
|
|
* are completed and a program that has exec'd uncompress (such as news)
|
|
* can immediately use the uncompressed data when the wait() call returns.
|
|
*
|
|
* If given a switch -#, where # is a digit from 0 to 4 (example: -2), the
|
|
* program will run as that copy, reading from stdin and writing to stdout.
|
|
* This allows decompressing with very limited RAM because only one of the
|
|
* five passes is in memory at a time.
|
|
*
|
|
* The compressed data is a series of string indices (and a header at
|
|
* the beginning and an occasional CLEAR code). As these indices flow
|
|
* through the pipes, each program decodes the ones it can. The result
|
|
* of each decoding will be indices that the following programs can handle.
|
|
*
|
|
* Each of the 65536 strings in the dictionary is an earlier string with
|
|
* some character added to the end (except for the the 256 predefined
|
|
* single char strings). When new entries are made to the dictionary,
|
|
* the string index part will just be the last index to pass through.
|
|
* But the char part is the first char of the next string, which isn't
|
|
* known yet. So the string can be stored as a pair of indices. When
|
|
* this string is specified, it is converted to this pair of indices,
|
|
* which are flagged so that the first will be decoded in full while
|
|
* the second will be decoded to its first char. The dictionary takes
|
|
* 256k to store (64k strings of 2 indices of 2 bytes each). This is
|
|
* too big for a 64k data segment, so it is divided into 5 equal parts.
|
|
* Copy 4 of the program maintains the high part and copy 0 holds the
|
|
* low part.
|
|
*/
|
|
|
|
#include <sys/types.h>
|
|
#include <fcntl.h>
|
|
#include <stdlib.h>
|
|
#include <unistd.h>
|
|
|
|
#define BUFSZ 1024 /* size of i/o buffers */
|
|
#define BUFSZ_2 (BUFSZ/2) /* # of unsigned shorts in i/o bufs */
|
|
#define DICTSZ (unsigned)13056 /* # of local dictionary entries */
|
|
#define EOF_INDEX (unsigned short)0xFFFF /* EOF flag for pipeline */
|
|
#define FALSE 0
|
|
#define TRUE ~FALSE
|
|
|
|
int fdin, fdout, fderr; /* input, output, and error file descriptors */
|
|
int ibufstart, obufind, ibufend;/* i/o buffer indices */
|
|
int ipbufind = BUFSZ_2; /* pipe buffer indices */
|
|
int opbufind = 0;
|
|
int pnum = -1; /* ID of this copy */
|
|
unsigned short ipbuf[BUFSZ_2]; /* for buffering input */
|
|
unsigned short opbuf[BUFSZ_2]; /* for buffering output */
|
|
unsigned char *ibuf = (unsigned char *) ipbuf;
|
|
unsigned char *obuf = (unsigned char *) opbuf;
|
|
|
|
unsigned short dindex[DICTSZ]; /* dictionary: index to substring */
|
|
unsigned short dchar[DICTSZ]; /* dictionary: last char of string */
|
|
unsigned iindex, tindex, tindex2; /* holds index being processed */
|
|
unsigned base; /* where in global dict local dict starts */
|
|
unsigned tbase;
|
|
unsigned locend; /* where in global dict local dict ends */
|
|
unsigned curend = 256; /* current end of global dict */
|
|
unsigned maxend; /* max end of global dict */
|
|
int dcharp; /* ptr to dchar that needs next index entry */
|
|
int curbits; /* number of bits for getbits() to read */
|
|
int maxbits; /* limit on number of bits */
|
|
int clearflg; /* if set, allow CLEAR */
|
|
int inmod; /* mod 8 for getbits() */
|
|
|
|
_PROTOTYPE(int main, (int argc, char **argv));
|
|
_PROTOTYPE(void ffork, (void));
|
|
_PROTOTYPE(void die, (char *s));
|
|
_PROTOTYPE(void myputc, (unsigned c));
|
|
_PROTOTYPE(unsigned mygetc, (void));
|
|
_PROTOTYPE(void getbits, (void));
|
|
_PROTOTYPE(void getpipe, (void));
|
|
_PROTOTYPE(void putpipe, (unsigned u, int flag));
|
|
|
|
int main(argc, argv)
|
|
int argc;
|
|
char **argv;
|
|
{
|
|
char c, *cp;
|
|
int j, k, fdtmp;
|
|
unsigned int len;
|
|
|
|
/* Find the program name */
|
|
j = 0;
|
|
while (argv[0][j] != '\0') j++;
|
|
len = (unsigned int) j;
|
|
while (j--)
|
|
if (argv[0][j] == '/') break;
|
|
if (argv[0][j] == '/') j++;
|
|
cp = argv[0] + j;
|
|
len -= j;
|
|
|
|
/* Sort out the flags */
|
|
for (k = 1; k < argc; k++) {
|
|
if (argv[k][0] == '-') {
|
|
c = argv[k][1];
|
|
switch (c) {
|
|
case '0': /* pass numbers */
|
|
case '1':
|
|
case '2':
|
|
case '3':
|
|
case '4': pnum = c - '0'; break;
|
|
case 'd': /* used by news */
|
|
break;
|
|
default:
|
|
(void) write(1, "Usage: ", 7);
|
|
(void) write(1, cp, len);
|
|
(void) write(1, " [-#] [in] [out]\n", 17);
|
|
exit(0);
|
|
break;
|
|
}
|
|
|
|
/* Once it's checked, lose it anyway */
|
|
for (j = k; j < argc; j++) argv[j] = argv[j + 1];
|
|
argc--;
|
|
k--;
|
|
}
|
|
}
|
|
|
|
/* Default i/o settings */
|
|
fdin = 0;
|
|
fdout = 1;
|
|
fderr = 2;
|
|
|
|
/* Try to open specific files and connect them to stdin/stdout */
|
|
if (argc > 1) {
|
|
if ((fdtmp = open(argv[1], 0)) == -1) die("input open failed");
|
|
(void) close(0);
|
|
if ((fdin = dup(fdtmp)) == -1) die("input dup failed\n");
|
|
(void) close(fdtmp);
|
|
}
|
|
if (argc > 2) {
|
|
(void) unlink(argv[2]);
|
|
if ((fdtmp = creat(argv[2], 0666)) == -1) die("output creat failed");
|
|
(void) close(1);
|
|
if ((fdout = dup(fdtmp)) == -1) die("output dup failed\n");
|
|
(void) close(fdtmp);
|
|
}
|
|
|
|
/* Sort out type of compression */
|
|
if (pnum == -1 || pnum == 4) {/* if this is pass 4 */
|
|
/* Check header of compressed file */
|
|
if (mygetc() != 0x1F || mygetc() != 0x9D) /* check magic number */
|
|
die("not a compressed file\n");
|
|
iindex = mygetc(); /* get compression style */
|
|
} else
|
|
getpipe(); /* get compression style */
|
|
|
|
maxbits = iindex & 0x1F;
|
|
clearflg = ((iindex & 0x80) != 0) ? TRUE : FALSE;
|
|
if (maxbits < 9 || maxbits > 16) /* check for valid maxbits */
|
|
die("can't decompress\n");
|
|
if (pnum != -1 && pnum != 0)
|
|
putpipe(iindex, 0); /* pass style to next copy */
|
|
|
|
/* Fork off an ancestor if necessary - ffork() increments pnum */
|
|
if (pnum == -1) {
|
|
pnum = 0;
|
|
if (pnum == 0) ffork();
|
|
if (pnum == 1) ffork();
|
|
if (pnum == 2) ffork();
|
|
if (pnum == 3) ffork();
|
|
}
|
|
|
|
/* Preliminary inits. Note: end/maxend/curend are highest, not
|
|
* highest + 1 */
|
|
base = DICTSZ * pnum + 256;
|
|
locend = base + DICTSZ - 1;
|
|
maxend = (1 << maxbits) - 1;
|
|
if (maxend > locend) maxend = locend;
|
|
|
|
while (TRUE) {
|
|
curend = 255 + (clearflg ? 1 : 0); /* init dictionary */
|
|
dcharp = DICTSZ; /* flag for none needed */
|
|
curbits = 9; /* init curbits (for copy 0) */
|
|
while (TRUE) { /* for each index in input */
|
|
if (pnum == 4) {/* get index using getbits() */
|
|
if (curbits < maxbits && (1 << curbits) <= curend) {
|
|
/* Curbits needs to be increased */
|
|
/* Due to uglyness in compress, these
|
|
* indices in the compressed file are
|
|
* wasted */
|
|
while (inmod) getbits();
|
|
curbits++;
|
|
}
|
|
getbits();
|
|
} else
|
|
getpipe(); /* get next index */
|
|
|
|
if (iindex == 256 && clearflg) {
|
|
if (pnum > 0) putpipe(iindex, 0);
|
|
/* Due to uglyness in compress, these indices
|
|
* in the compressed file are wasted */
|
|
while (inmod) getbits();
|
|
break;
|
|
}
|
|
tindex = iindex;
|
|
/* Convert the index part, ignoring spawned chars */
|
|
while (tindex >= base) tindex = dindex[tindex - base];
|
|
/* Pass on the index */
|
|
putpipe(tindex, 0);
|
|
/* Save the char of the last added entry, if any */
|
|
if (dcharp < DICTSZ) dchar[dcharp++] = tindex;
|
|
if (curend < maxend && ++curend > (base - 1))
|
|
dindex[dcharp = (curend - base)] = iindex;
|
|
|
|
/* Do spawned chars. They are naturally produced in
|
|
* the wrong order. To get them in the right order
|
|
* without using memory, a series of passes,
|
|
* progressively less deep, are used */
|
|
tbase = base;
|
|
while ((tindex = iindex) >= tbase) {/* for each char to spawn*/
|
|
while ((tindex2 = dindex[tindex - base]) >= tbase)
|
|
tindex = tindex2; /* scan to desired char */
|
|
putpipe(dchar[tindex-base], 1); /* put it to the pipe*/
|
|
tbase = tindex + 1;
|
|
if (tbase == 0) break; /* it's a wrap */
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/* F f o r k
|
|
*
|
|
* Fork off the previous pass - the parent reads from the child.
|
|
*/
|
|
void ffork()
|
|
{
|
|
int j, pfd[2];
|
|
|
|
if (pipe(pfd) == -1) die("pipe() error\n");
|
|
if ((j = fork()) == -1) die("fork() error\n");
|
|
if (j == 0) { /* this is the child */
|
|
if (close(1) == -1) die("close(1) error\n");
|
|
if (dup(pfd[1]) != 1) die("dup(1) error\n");
|
|
(void) close(pfd[0]);
|
|
pnum++;
|
|
} else { /* this is the parent */
|
|
if (close(0) == -1) die("close(0) error\n");
|
|
if (dup(pfd[0]) != 0) die("dup(0) error\n");
|
|
(void) close(pfd[1]);
|
|
}
|
|
}
|
|
|
|
|
|
/* D i e
|
|
*
|
|
* If s is a message, write it to stderr. Flush buffers if needed. Then exit.
|
|
*/
|
|
void die(s)
|
|
char *s;
|
|
{
|
|
/* Flush stdout buffer if needed */
|
|
if (obufind != 0) {
|
|
if (write(fdout, (char *) obuf, (unsigned) obufind) != obufind)
|
|
s = "bad stdout write\n";
|
|
obufind = 0;
|
|
}
|
|
|
|
/* Flush pipe if needed */
|
|
do
|
|
putpipe(EOF_INDEX, 0);
|
|
while (opbufind);
|
|
/* Write any error message */
|
|
if (s != (char *) NULL) {
|
|
while (*s) (void) write(fderr, s++, 1);
|
|
}
|
|
exit((s == (char *) NULL) ? 0 : 1);
|
|
}
|
|
|
|
|
|
/* M p u t c
|
|
*
|
|
* Put a char to stdout.
|
|
*/
|
|
void myputc(c)
|
|
unsigned c;
|
|
{
|
|
obuf[obufind++] = c;
|
|
if (obufind >= BUFSZ) { /* if stdout buffer full */
|
|
if (write(fdout, (char *) obuf, BUFSZ) != BUFSZ) /* flush to stdout */
|
|
die("bad stdout write\n");
|
|
obufind = 0;
|
|
}
|
|
}
|
|
|
|
|
|
/* M y g e t c
|
|
*
|
|
* Get a char from stdin. If EOF, then die() and exit.
|
|
*/
|
|
unsigned mygetc()
|
|
{
|
|
if (ibufstart >= ibufend) { /* if stdin buffer empty */
|
|
if ((ibufend = read(fdin, (char *) ibuf, BUFSZ)) <= 0)
|
|
die((char *) NULL); /* if EOF, do normal exit */
|
|
ibufstart = 0;
|
|
}
|
|
return(ibuf[ibufstart++] & 0xff);
|
|
}
|
|
|
|
|
|
/* G e t b i t s
|
|
*
|
|
* Put curbits bits into index from stdin. Note: only copy 4 uses this.
|
|
* The bits within a byte are in the correct order. But when the bits
|
|
* cross a byte boundry, the lowest bits will be in the higher part of
|
|
* the current byte, and the higher bits will be in the lower part of
|
|
* the next byte.
|
|
*/
|
|
void getbits()
|
|
{
|
|
int have;
|
|
static unsigned curbyte; /* byte having bits extracted from it */
|
|
static int left; /* how many bits are left in curbyte */
|
|
|
|
inmod = (inmod + 1) & 7; /* count input mod 8 */
|
|
iindex = curbyte;
|
|
have = left;
|
|
if (curbits - have > 8) {
|
|
iindex |= mygetc() << have;
|
|
have += 8;
|
|
}
|
|
iindex |= ((curbyte = mygetc()) << have) & ~((unsigned) 0xFFFF << curbits);
|
|
curbyte >>= curbits - have;
|
|
left = 8 - (curbits - have);
|
|
}
|
|
|
|
|
|
/* G e t p i p e
|
|
*
|
|
* Get an index from the pipeline. If flagged firstonly, handle it here.
|
|
*/
|
|
void getpipe()
|
|
{
|
|
static short flags;
|
|
static int n = 0; /* number of flags in flags */
|
|
|
|
while (TRUE) { /* while index with firstonly flag set */
|
|
if (n <= 0) {
|
|
if (ipbufind >= BUFSZ_2) { /* if pipe input buffer
|
|
* empty */
|
|
if (read(fdin, (char *) ipbuf, BUFSZ) != BUFSZ)
|
|
die("bad pipe read\n");
|
|
ipbufind = 0;
|
|
}
|
|
flags = ipbuf[ipbufind++];
|
|
n = 15;
|
|
}
|
|
iindex = ipbuf[ipbufind++];
|
|
if (iindex > curend)
|
|
die((iindex == EOF_INDEX) ? (char *) NULL : "invalid data\n");
|
|
flags <<= 1;
|
|
n--;
|
|
/* Assume flags < 0 if highest remaining flag is set */
|
|
if (flags < 0) { /* if firstonly flag for index is not set */
|
|
while (iindex >= base) iindex = dindex[iindex - base];
|
|
putpipe(iindex, 1);
|
|
} else
|
|
return; /* return with valid non-firstonly index */
|
|
}
|
|
}
|
|
|
|
|
|
/* P u t p i p e
|
|
*
|
|
* put an index into the pipeline.
|
|
*/
|
|
void putpipe(u, flag)
|
|
unsigned u;
|
|
int flag;
|
|
{
|
|
static unsigned short flags, *flagp;
|
|
static int n = 0; /* number of flags in flags */
|
|
|
|
if (pnum == 0) { /* if we should write to stdout */
|
|
myputc(u); /* index will be the char value */
|
|
return;
|
|
}
|
|
if (n == 0) { /* if we need to reserve a flag entry */
|
|
flags = 0;
|
|
flagp = opbuf + opbufind;
|
|
opbufind++;
|
|
}
|
|
opbuf[opbufind++] = u; /* add index to buffer */
|
|
flags = (flags << 1) | flag; /* add firstonly flag */
|
|
if (++n >= 15) { /* if block of 15 indices */
|
|
n = 0;
|
|
*flagp = flags; /* insert flags entry */
|
|
if (opbufind >= BUFSZ_2) { /* if pipe out buffer full */
|
|
opbufind = 0;
|
|
if (write(fdout, (char *) opbuf, BUFSZ) != BUFSZ)
|
|
die("bad pipe write\n");
|
|
}
|
|
}
|
|
}
|