You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
99 lines
3.8 KiB
99 lines
3.8 KiB
3 years ago
|
/* vi: set sw=4 ts=4: */
|
||
|
/* Small bzip2 deflate implementation, by Rob Landley (rob@landley.net).
|
||
|
|
||
|
Based on bzip2 decompression code by Julian R Seward (jseward@acm.org),
|
||
|
which also acknowledges contributions by Mike Burrows, David Wheeler,
|
||
|
Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten,
|
||
|
Robert Sedgewick, and Jon L. Bentley.
|
||
|
|
||
|
This code is licensed under the LGPLv2:
|
||
|
LGPL (http://www.gnu.org/copyleft/lgpl.html
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
Size and speed optimizations by Manuel Novoa III (mjn3@codepoet.org).
|
||
|
|
||
|
More efficient reading of huffman codes, a streamlined read_bunzip()
|
||
|
function, and various other tweaks. In (limited) tests, approximately
|
||
|
20% faster than bzcat on x86 and about 10% faster on arm.
|
||
|
|
||
|
Note that about 2/3 of the time is spent in read_unzip() reversing
|
||
|
the Burrows-Wheeler transformation. Much of that time is delay
|
||
|
resulting from cache misses.
|
||
|
|
||
|
I would ask that anyone benefiting from this work, especially those
|
||
|
using it in commercial products, consider making a donation to my local
|
||
|
non-profit hospice organization (see www.hospiceacadiana.com) in the
|
||
|
name of the woman I loved, Toni W. Hagan, who passed away Feb. 12, 2003.
|
||
|
|
||
|
Manuel
|
||
|
*/
|
||
|
|
||
|
/* Modified by Macoy Madson:
|
||
|
- Give no errors with a C++ compiler (all -fpermissive errors)
|
||
|
- Add header file
|
||
|
*/
|
||
|
|
||
|
#include <setjmp.h>
|
||
|
|
||
|
/* Constants for huffman coding */
|
||
|
#define MAX_GROUPS 6
|
||
|
#define GROUP_SIZE 50 /* 64 would have been more efficient */
|
||
|
#define MAX_HUFCODE_BITS 20 /* Longest huffman code allowed */
|
||
|
#define MAX_SYMBOLS 258 /* 256 literals + RUNA + RUNB */
|
||
|
#define SYMBOL_RUNA 0
|
||
|
#define SYMBOL_RUNB 1
|
||
|
|
||
|
/* Status return values */
|
||
|
#define RETVAL_OK 0
|
||
|
#define RETVAL_LAST_BLOCK (-1)
|
||
|
#define RETVAL_NOT_BZIP_DATA (-2)
|
||
|
#define RETVAL_UNEXPECTED_INPUT_EOF (-3)
|
||
|
#define RETVAL_UNEXPECTED_OUTPUT_EOF (-4)
|
||
|
#define RETVAL_DATA_ERROR (-5)
|
||
|
#define RETVAL_OUT_OF_MEMORY (-6)
|
||
|
#define RETVAL_OBSOLETE_INPUT (-7)
|
||
|
|
||
|
/* Other housekeeping constants */
|
||
|
#define IOBUF_SIZE 4096
|
||
|
|
||
|
/* This is what we know about each huffman coding group */
|
||
|
struct group_data {
|
||
|
/* We have an extra slot at the end of limit[] for a sentinal value. */
|
||
|
int limit[MAX_HUFCODE_BITS+1],base[MAX_HUFCODE_BITS],permute[MAX_SYMBOLS];
|
||
|
int minLen, maxLen;
|
||
|
};
|
||
|
|
||
|
/* Structure holding all the housekeeping data, including IO buffers and
|
||
|
memory that persists between calls to bunzip */
|
||
|
typedef struct {
|
||
|
/* State for interrupting output loop */
|
||
|
int writeCopies,writePos,writeRunCountdown,writeCount,writeCurrent;
|
||
|
/* I/O tracking data (file handles, buffers, positions, etc.) */
|
||
|
int in_fd,out_fd,inbufCount,inbufPos /*,outbufPos*/;
|
||
|
unsigned char *inbuf /*,*outbuf*/;
|
||
|
unsigned int inbufBitCount, inbufBits;
|
||
|
/* The CRC values stored in the block header and calculated from the data */
|
||
|
unsigned int crc32Table[256],headerCRC, totalCRC, writeCRC;
|
||
|
/* Intermediate buffer and its size (in bytes) */
|
||
|
unsigned int *dbuf, dbufSize;
|
||
|
/* These things are a bit too big to go on the stack */
|
||
|
unsigned char selectors[32768]; /* nSelectors=15 bits */
|
||
|
struct group_data groups[MAX_GROUPS]; /* huffman coding tables */
|
||
|
/* For I/O error handling */
|
||
|
jmp_buf jmpbuf;
|
||
|
} bunzip_data;
|
||
|
|
||
|
/* Allocate the structure, read file header. If in_fd==-1, inbuf must contain
|
||
|
a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are
|
||
|
ignored, and data is read from file handle into temporary buffer. */
|
||
|
int start_bunzip(bunzip_data **bdp, int in_fd, char *inbuf, int len);
|
||
|
|
||
|
/* Undo burrows-wheeler transform on intermediate buffer to produce output.
|
||
|
If start_bunzip was initialized with out_fd=-1, then up to len bytes of
|
||
|
data are written to outbuf. Return value is number of bytes written or
|
||
|
error (all errors are negative numbers). If out_fd!=-1, outbuf and len
|
||
|
are ignored, data is written to out_fd and return is RETVAL_OK or error.
|
||
|
*/
|
||
|
int read_bunzip(bunzip_data *bd, char *outbuf, int len);
|