├── LICENSE
├── Makefile
├── README.md
├── README_bzip2
├── blocksort.c
├── bzip2.c
├── bzip2recover.c
├── bzlib.c
├── bzlib.h
├── bzlib_private.h
├── compress.c
├── crctable.c
├── decompress.c
├── dlltest.c
├── gen.c
├── gpuBWTSort.cu
├── huffman.c
├── mk251.c
├── randtable.c
├── run.sh
├── spewG.c
└── unzcrash.c


/LICENSE:
--------------------------------------------------------------------------------
 1 | 
 2 | --------------------------------------------------------------------------
 3 | 
 4 | This program, "bzip2", the associated library "libbzip2", and all
 5 | documentation, are copyright (C) 1996-2010 Julian R Seward.  All
 6 | rights reserved.
 7 | 
 8 | Redistribution and use in source and binary forms, with or without
 9 | modification, are permitted provided that the following conditions
10 | are met:
11 | 
12 | 1. Redistributions of source code must retain the above copyright
13 |    notice, this list of conditions and the following disclaimer.
14 | 
15 | 2. The origin of this software must not be misrepresented; you must 
16 |    not claim that you wrote the original software.  If you use this 
17 |    software in a product, an acknowledgment in the product 
18 |    documentation would be appreciated but is not required.
19 | 
20 | 3. Altered source versions must be plainly marked as such, and must
21 |    not be misrepresented as being the original software.
22 | 
23 | 4. The name of the author may not be used to endorse or promote 
24 |    products derived from this software without specific prior written 
25 |    permission.
26 | 
27 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
28 | OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
29 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 | ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
31 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
33 | GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
35 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 | 
39 | Julian Seward, jseward@bzip.org
40 | bzip2/libbzip2 version 1.0.6 of 6 September 2010
41 | 
42 | --------------------------------------------------------------------------
43 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------
  2 | # This file is part of bzip2/libbzip2, a program and library for
  3 | # lossless, block-sorting data compression.
  4 | #
  5 | # bzip2/libbzip2 version 1.0.6 of 6 September 2010
  6 | # Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
  7 | #
  8 | # Please read the WARNING, DISCLAIMER and PATENTS sections in the 
  9 | # README file.
 10 | #
 11 | # This program is released under the terms of the license contained
 12 | # in the file LICENSE.
 13 | # ------------------------------------------------------------------
 14 | 
 15 | SHELL=/bin/sh
 16 | 
 17 | # To assist in cross-compiling
 18 | CC=g++
 19 | AR=ar
 20 | RANLIB=ranlib
 21 | CUDA_INSTALL_PATH=/usr/local/cuda
 22 | LDFLAGS=-L/usr/local/cuda/lib64 -lcudart
 23 | NVCC=nvcc
 24 | NVCCFLAGS=-lrt -lm -O2
 25 | 
 26 | BIGFILES=-D_FILE_OFFSET_BITS=64
 27 | #CFLAGS= -I. -I/usr/local/cuda/include/thrust_1_3 -I$(CUDA_INSTALL_PATH)/include -Wall -Winline -O2 -lrt -g $(BIGFILES) -fopenmp -lpthread
 28 | CFLAGS= -I. -Wall -Winline -O2 -lrt -g $(BIGFILES) -fopenmp -lpthread
 29 | 
 30 | # Where you want it installed when you do 'make install'
 31 | PREFIX=/usr/local
 32 | 
 33 | OBJS= huffman.o    \
 34 |       crctable.o   \
 35 |       randtable.o  \
 36 |       compress.o   \
 37 |       decompress.o \
 38 |       blocksort.o \
 39 |       gpuBWTSort.o \
 40 |       bzlib.o
 41 | 
 42 | all: libbz2.a bzip2 bzip2recover test
 43 | 
 44 | bzip2: libbz2.a bzip2.o
 45 | 	$(CC) $(CFLAGS) -o bzip2 bzip2.o -L. -lbz2 $(LDFLAGS)
 46 | 	
 47 | bzip2recover: bzip2recover.o
 48 | 	$(CC) $(CFLAGS) -o bzip2recover bzip2recover.o $(LDFLAGS)
 49 | 
 50 | libbz2.a: $(OBJS)
 51 | 	rm -f libbz2.a
 52 | 	$(AR) cq libbz2.a $(OBJS)
 53 | 	@if ( test -f $(RANLIB) -o -f /usr/bin/ranlib -o \
 54 | 		-f /bin/ranlib -o -f /usr/ccs/bin/ranlib ) ; then \
 55 | 		echo $(RANLIB) libbz2.a ; \
 56 | 		$(RANLIB) libbz2.a ; \
 57 | 	fi
 58 | 
 59 | check: test
 60 | test: bzip2
 61 | #	@cat words1
 62 | #	./bzip2 -1  < sample1.ref > sample1.rb2
 63 | #	./bzip2 -2  < sample2.ref > sample2.rb2
 64 | #	./bzip2 -3  < sample3.ref > sample3.rb2
 65 | #	./bzip2 -d  < sample1.bz2 > sample1.tst
 66 | #	./bzip2 -d  < sample2.bz2 > sample2.tst
 67 | #	./bzip2 -ds < sample3.bz2 > sample3.tst
 68 | #	cmp sample1.bz2 sample1.rb2 
 69 | #	cmp sample2.bz2 sample2.rb2
 70 | #	cmp sample3.bz2 sample3.rb2
 71 | #	cmp sample1.tst sample1.ref
 72 | #	cmp sample2.tst sample2.ref
 73 | #	cmp sample3.tst sample3.ref
 74 | #	@cat words3
 75 | 	echo "Skipped Test, since code under development"
 76 | 
 77 | install: bzip2 bzip2recover
 78 | 	if ( test ! -d $(PREFIX)/bin ) ; then mkdir -p $(PREFIX)/bin ; fi
 79 | 	if ( test ! -d $(PREFIX)/lib ) ; then mkdir -p $(PREFIX)/lib ; fi
 80 | 	if ( test ! -d $(PREFIX)/man ) ; then mkdir -p $(PREFIX)/man ; fi
 81 | 	if ( test ! -d $(PREFIX)/man/man1 ) ; then mkdir -p $(PREFIX)/man/man1 ; fi
 82 | 	if ( test ! -d $(PREFIX)/include ) ; then mkdir -p $(PREFIX)/include ; fi
 83 | 	cp -f bzip2 $(PREFIX)/bin/bzip2
 84 | 	cp -f bzip2 $(PREFIX)/bin/bunzip2
 85 | 	cp -f bzip2 $(PREFIX)/bin/bzcat
 86 | 	cp -f bzip2recover $(PREFIX)/bin/bzip2recover
 87 | 	chmod a+x $(PREFIX)/bin/bzip2
 88 | 	chmod a+x $(PREFIX)/bin/bunzip2
 89 | 	chmod a+x $(PREFIX)/bin/bzcat
 90 | 	chmod a+x $(PREFIX)/bin/bzip2recover
 91 | 	cp -f bzip2.1 $(PREFIX)/man/man1
 92 | 	chmod a+r $(PREFIX)/man/man1/bzip2.1
 93 | 	cp -f bzlib.h $(PREFIX)/include
 94 | 	chmod a+r $(PREFIX)/include/bzlib.h
 95 | 	cp -f libbz2.a $(PREFIX)/lib
 96 | 	chmod a+r $(PREFIX)/lib/libbz2.a
 97 | 	cp -f bzgrep $(PREFIX)/bin/bzgrep
 98 | 	ln -s -f $(PREFIX)/bin/bzgrep $(PREFIX)/bin/bzegrep
 99 | 	ln -s -f $(PREFIX)/bin/bzgrep $(PREFIX)/bin/bzfgrep
100 | 	chmod a+x $(PREFIX)/bin/bzgrep
101 | 	cp -f bzmore $(PREFIX)/bin/bzmore
102 | 	ln -s -f $(PREFIX)/bin/bzmore $(PREFIX)/bin/bzless
103 | 	chmod a+x $(PREFIX)/bin/bzmore
104 | 	cp -f bzdiff $(PREFIX)/bin/bzdiff
105 | 	ln -s -f $(PREFIX)/bin/bzdiff $(PREFIX)/bin/bzcmp
106 | 	chmod a+x $(PREFIX)/bin/bzdiff
107 | 	cp -f bzgrep.1 bzmore.1 bzdiff.1 $(PREFIX)/man/man1
108 | 	chmod a+r $(PREFIX)/man/man1/bzgrep.1
109 | 	chmod a+r $(PREFIX)/man/man1/bzmore.1
110 | 	chmod a+r $(PREFIX)/man/man1/bzdiff.1
111 | 	echo ".so man1/bzgrep.1" > $(PREFIX)/man/man1/bzegrep.1
112 | 	echo ".so man1/bzgrep.1" > $(PREFIX)/man/man1/bzfgrep.1
113 | 	echo ".so man1/bzmore.1" > $(PREFIX)/man/man1/bzless.1
114 | 	echo ".so man1/bzdiff.1" > $(PREFIX)/man/man1/bzcmp.1
115 | 
116 | clean: 
117 | 	rm -f *.o libbz2.a bzip2 bzip2recover
118 | 
119 | blocksort.o: blocksort.c
120 | 	#@cat words0
121 | 	$(CC) $(CFLAGS) -c blocksort.c
122 | huffman.o: huffman.c
123 | 	$(CC) $(CFLAGS) -c huffman.c
124 | crctable.o: crctable.c
125 | 	$(CC) $(CFLAGS) -c crctable.c
126 | randtable.o: randtable.c
127 | 	$(CC) $(CFLAGS) -c randtable.c
128 | compress.o: compress.c
129 | 	$(CC) $(CFLAGS) -c compress.c
130 | decompress.o: decompress.c
131 | 	$(CC) $(CFLAGS) -c decompress.c
132 | bzlib.o: bzlib.c
133 | 	$(CC) $(CFLAGS) -c bzlib.c
134 | bzip2.o: bzip2.c 
135 | 	$(CC) $(CFLAGS) -c bzip2.c
136 | bzip2recover.o: bzip2recover.c
137 | 	$(CC) $(CFLAGS) -c bzip2recover.c
138 | gpuBWTSort.o: gpuBWTSort.cu
139 | 	$(NVCC) $(NVCCFLAGS) -c gpuBWTSort.cu -o gpuBWTSort.o
140 | 
141 | 
142 | distclean: clean
143 | 	rm -f manual.ps manual.html manual.pdf
144 | 
145 | DISTNAME=bzip2-1.0.6
146 | dist: check manual
147 | 	rm -f $(DISTNAME)
148 | 	ln -s -f . $(DISTNAME)
149 | 	tar cvf $(DISTNAME).tar \
150 | 	   $(DISTNAME)/blocksort.c \
151 | 	   $(DISTNAME)/huffman.c \
152 | 	   $(DISTNAME)/crctable.c \
153 | 	   $(DISTNAME)/randtable.c \
154 | 	   $(DISTNAME)/compress.c \
155 | 	   $(DISTNAME)/decompress.c \
156 | 	   $(DISTNAME)/bzlib.c \
157 | 	   $(DISTNAME)/bzip2.c \
158 | 	   $(DISTNAME)/bzip2recover.c \
159 | 	   $(DISTNAME)/bzlib.h \
160 | 	   $(DISTNAME)/bzlib_private.h \
161 | 	   $(DISTNAME)/Makefile \
162 | 	   $(DISTNAME)/LICENSE \
163 | 	   $(DISTNAME)/bzip2.1 \
164 | 	   $(DISTNAME)/bzip2.1.preformatted \
165 | 	   $(DISTNAME)/bzip2.txt \
166 | 	   $(DISTNAME)/words0 \
167 | 	   $(DISTNAME)/words1 \
168 | 	   $(DISTNAME)/words2 \
169 | 	   $(DISTNAME)/words3 \
170 | 	   $(DISTNAME)/sample1.ref \
171 | 	   $(DISTNAME)/sample2.ref \
172 | 	   $(DISTNAME)/sample3.ref \
173 | 	   $(DISTNAME)/sample1.bz2 \
174 | 	   $(DISTNAME)/sample2.bz2 \
175 | 	   $(DISTNAME)/sample3.bz2 \
176 | 	   $(DISTNAME)/dlltest.c \
177 | 	   $(DISTNAME)/manual.html \
178 | 	   $(DISTNAME)/manual.pdf \
179 | 	   $(DISTNAME)/manual.ps \
180 | 	   $(DISTNAME)/README \
181 | 	   $(DISTNAME)/README.COMPILATION.PROBLEMS \
182 | 	   $(DISTNAME)/README.XML.STUFF \
183 | 	   $(DISTNAME)/CHANGES \
184 | 	   $(DISTNAME)/libbz2.def \
185 | 	   $(DISTNAME)/libbz2.dsp \
186 | 	   $(DISTNAME)/dlltest.dsp \
187 | 	   $(DISTNAME)/makefile.msc \
188 | 	   $(DISTNAME)/unzcrash.c \
189 | 	   $(DISTNAME)/spewG.c \
190 | 	   $(DISTNAME)/mk251.c \
191 | 	   $(DISTNAME)/bzdiff \
192 | 	   $(DISTNAME)/bzdiff.1 \
193 | 	   $(DISTNAME)/bzmore \
194 | 	   $(DISTNAME)/bzmore.1 \
195 | 	   $(DISTNAME)/bzgrep \
196 | 	   $(DISTNAME)/bzgrep.1 \
197 | 	   $(DISTNAME)/Makefile-libbz2_so \
198 | 	   $(DISTNAME)/bz-common.xsl \
199 | 	   $(DISTNAME)/bz-fo.xsl \
200 | 	   $(DISTNAME)/bz-html.xsl \
201 | 	   $(DISTNAME)/bzip.css \
202 | 	   $(DISTNAME)/entities.xml \
203 | 	   $(DISTNAME)/manual.xml \
204 | 	   $(DISTNAME)/format.pl \
205 | 	   $(DISTNAME)/xmlproc.sh
206 | 	gzip -v $(DISTNAME).tar
207 | 
208 | # For rebuilding the manual from sources on my SuSE 9.1 box
209 | 
210 | MANUAL_SRCS= 	bz-common.xsl bz-fo.xsl bz-html.xsl bzip.css \
211 | 		entities.xml manual.xml 
212 | 
213 | manual: manual.html manual.ps manual.pdf
214 | 
215 | manual.ps: $(MANUAL_SRCS)
216 | 	./xmlproc.sh -ps manual.xml
217 | 
218 | manual.pdf: $(MANUAL_SRCS)
219 | 	./xmlproc.sh -pdf manual.xml
220 | 
221 | manual.html: $(MANUAL_SRCS)
222 | 	./xmlproc.sh -html manual.xml
223 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 |   !!! THIS IS *NOT* THE ORIGINAL BZIP2 SOURCE CODE !!!
 3 | 
 4 | !!! THIS BZIP2 HAS BEEN MODIFIED FOR EXECUTION ON GPU !!!
 5 | 
 6 | This Bzip2 source has been modified to support All-Core Burrows Wheeler Compression as 
 7 | described in the paper: 
 8 | 
 9 | Aditya Deshpande and P J Narayanan, "[Fast Burrows Wheeler Compression Using All-Cores](http://ardeshp2.web.engr.illinois.edu/papers/Aditya2015Ashes.pdf)", IPDPSW'2014. 
10 | 
11 | The code is developed on Seward's popular open-source Bzip2 file compressor. The 
12 | partial sorts of Burrows Wheeler Transform step are performed on the GPU and merge,
13 | mtf and huffman steps are performed on the CPU in overlapped manner. The idle
14 | CPU cores use the state-of-the-art BWC by Seward and inter-block parallelism to
15 | process other blocks. The code has been written by 
16 | 
17 | Aditya Deshpande at Center for Visual Information Technology, International Institute of 
18 | Information Technology, Gachibowli, Hyderabad.
19 | 
20 | Datasets are available on CVIT Resources (http://cvit.iiit.ac.in/research/resources) page 
21 | and can be downloaded from publicly available URLs also
22 | 
23 | 1. Silesia Data Corpus ( URL: http://sun.aei.polsl.pl/sdeor/index.php?page=silesia )
24 | 2. Enwiki-latest-abstract10.xml ( URL: http://dumps.wikimedia.org/enwiki/latest/ )
25 | 3. linux-2.6.11.tar ( URL: http://www.kernel.org/pub/linux/kernel/v2.6/ )
26 | 4. enwik8 (URL: http://cs.fit.edu/~mmahoney/compression/textdata.html)
27 | 
28 | The details of this All-Core BWC software are as follows. 
29 | 
30 | Do a make to create the executable 'bzip2' in the current directory. 
31 | 
32 | ```
33 | make
34 | ```
35 | 
36 | To remove the installed bzip2 from the current directory. 
37 | 
38 | ```
39 | make clean
40 | ```
41 | 
42 | To run on test file, which in this case is README_bzip2.
43 | 
44 | ```
45 | bash run.sh -c 9 0 README_bzip2
46 | ```
47 |  
48 | To use this code, it would be better to use the run.sh script. 
49 | 
50 | ```
51 | bash run.sh -c <blocksize100K> <additional cpu threads> <filename>
52 | 
53 | -c stands for check by doing decompression of compressed file and diff with the original file
54 | 
55 | <blocksize100K> : blocksize in multiples of 100K, 9 will stand for 900K, 45 for ~4.5MB, 90 for ~9MB etc.
56 | 
57 | <additional cpu threads>: set to 0 if only CPU+GPU thread is to be run, 
58 | 			  set to 'k' if k additional CPU threads are to be run with 1 CPU+GPU thread
59 | ```
60 | 
61 | 
62 | Important Notice
63 | 
64 | ```
65 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
66 | OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
67 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 | ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
69 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
70 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
71 | GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
73 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
74 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
75 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
76 | ```
77 | 
78 | If you use this code, please cite
79 | 
80 | ```
81 | @INPROCEEDINGS{DeshpandeFastBurrowsWheeler15, 
82 |   author={Aditya Deshpande and P. J. Narayanan}, 
83 |   booktitle={2015 IEEE International Parallel and Distributed Processing Symposium Workshop}, 
84 |   title={Fast Burrows Wheeler Compression Using All-Cores}, 
85 |   year={2015}, 
86 |   pages={628-636}, 
87 |   month={May}}
88 | ```
89 | 


--------------------------------------------------------------------------------
/README_bzip2:
--------------------------------------------------------------------------------
  1 | 
  2 | This is the README for bzip2/libzip2.
  3 | This version is fully compatible with the previous public releases.
  4 | 
  5 | ------------------------------------------------------------------
  6 | This file is part of bzip2/libbzip2, a program and library for
  7 | lossless, block-sorting data compression.
  8 | 
  9 | bzip2/libbzip2 version 1.0.6 of 6 September 2010
 10 | Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
 11 | 
 12 | Please read the WARNING, DISCLAIMER and PATENTS sections in this file.
 13 | 
 14 | This program is released under the terms of the license contained
 15 | in the file LICENSE.
 16 | ------------------------------------------------------------------
 17 | 
 18 | Complete documentation is available in Postscript form (manual.ps),
 19 | PDF (manual.pdf) or html (manual.html).  A plain-text version of the
 20 | manual page is available as bzip2.txt.
 21 | 
 22 | 
 23 | HOW TO BUILD -- UNIX
 24 | 
 25 | Type 'make'.  This builds the library libbz2.a and then the programs
 26 | bzip2 and bzip2recover.  Six self-tests are run.  If the self-tests
 27 | complete ok, carry on to installation:
 28 | 
 29 | To install in /usr/local/bin, /usr/local/lib, /usr/local/man and
 30 | /usr/local/include, type
 31 | 
 32 |    make install
 33 | 
 34 | To install somewhere else, eg, /xxx/yyy/{bin,lib,man,include}, type
 35 | 
 36 |    make install PREFIX=/xxx/yyy
 37 | 
 38 | If you are (justifiably) paranoid and want to see what 'make install'
 39 | is going to do, you can first do
 40 | 
 41 |    make -n install                      or
 42 |    make -n install PREFIX=/xxx/yyy      respectively.
 43 | 
 44 | The -n instructs make to show the commands it would execute, but not
 45 | actually execute them.
 46 | 
 47 | 
 48 | HOW TO BUILD -- UNIX, shared library libbz2.so.
 49 | 
 50 | Do 'make -f Makefile-libbz2_so'.  This Makefile seems to work for
 51 | Linux-ELF (RedHat 7.2 on an x86 box), with gcc.  I make no claims
 52 | that it works for any other platform, though I suspect it probably
 53 | will work for most platforms employing both ELF and gcc.
 54 | 
 55 | bzip2-shared, a client of the shared library, is also built, but not
 56 | self-tested.  So I suggest you also build using the normal Makefile,
 57 | since that conducts a self-test.  A second reason to prefer the
 58 | version statically linked to the library is that, on x86 platforms,
 59 | building shared objects makes a valuable register (%ebx) unavailable
 60 | to gcc, resulting in a slowdown of 10%-20%, at least for bzip2.
 61 | 
 62 | Important note for people upgrading .so's from 0.9.0/0.9.5 to version
 63 | 1.0.X.  All the functions in the library have been renamed, from (eg)
 64 | bzCompress to BZ2_bzCompress, to avoid namespace pollution.
 65 | Unfortunately this means that the libbz2.so created by
 66 | Makefile-libbz2_so will not work with any program which used an older
 67 | version of the library.  I do encourage library clients to make the
 68 | effort to upgrade to use version 1.0, since it is both faster and more
 69 | robust than previous versions.
 70 | 
 71 | 
 72 | HOW TO BUILD -- Windows 95, NT, DOS, Mac, etc.
 73 | 
 74 | It's difficult for me to support compilation on all these platforms.
 75 | My approach is to collect binaries for these platforms, and put them
 76 | on the master web site (http://www.bzip.org).  Look there.  However
 77 | (FWIW), bzip2-1.0.X is very standard ANSI C and should compile
 78 | unmodified with MS Visual C.  If you have difficulties building, you
 79 | might want to read README.COMPILATION.PROBLEMS.
 80 | 
 81 | At least using MS Visual C++ 6, you can build from the unmodified
 82 | sources by issuing, in a command shell: 
 83 | 
 84 |    nmake -f makefile.msc
 85 | 
 86 | (you may need to first run the MSVC-provided script VCVARS32.BAT
 87 |  so as to set up paths to the MSVC tools correctly).
 88 | 
 89 | 
 90 | VALIDATION
 91 | 
 92 | Correct operation, in the sense that a compressed file can always be
 93 | decompressed to reproduce the original, is obviously of paramount
 94 | importance.  To validate bzip2, I used a modified version of Mark
 95 | Nelson's churn program.  Churn is an automated test driver which
 96 | recursively traverses a directory structure, using bzip2 to compress
 97 | and then decompress each file it encounters, and checking that the
 98 | decompressed data is the same as the original.
 99 | 
100 | 
101 | 
102 | Please read and be aware of the following:
103 | 
104 | WARNING:
105 | 
106 |    This program and library (attempts to) compress data by 
107 |    performing several non-trivial transformations on it.  
108 |    Unless you are 100% familiar with *all* the algorithms 
109 |    contained herein, and with the consequences of modifying them, 
110 |    you should NOT meddle with the compression or decompression 
111 |    machinery.  Incorrect changes can and very likely *will* 
112 |    lead to disastrous loss of data.
113 | 
114 | 
115 | DISCLAIMER:
116 | 
117 |    I TAKE NO RESPONSIBILITY FOR ANY LOSS OF DATA ARISING FROM THE
118 |    USE OF THIS PROGRAM/LIBRARY, HOWSOEVER CAUSED.
119 | 
120 |    Every compression of a file implies an assumption that the
121 |    compressed file can be decompressed to reproduce the original.
122 |    Great efforts in design, coding and testing have been made to
123 |    ensure that this program works correctly.  However, the complexity
124 |    of the algorithms, and, in particular, the presence of various
125 |    special cases in the code which occur with very low but non-zero
126 |    probability make it impossible to rule out the possibility of bugs
127 |    remaining in the program.  DO NOT COMPRESS ANY DATA WITH THIS
128 |    PROGRAM UNLESS YOU ARE PREPARED TO ACCEPT THE POSSIBILITY, HOWEVER
129 |    SMALL, THAT THE DATA WILL NOT BE RECOVERABLE.
130 | 
131 |    That is not to say this program is inherently unreliable.  
132 |    Indeed, I very much hope the opposite is true.  bzip2/libbzip2 
133 |    has been carefully constructed and extensively tested.
134 | 
135 | 
136 | PATENTS:
137 | 
138 |    To the best of my knowledge, bzip2/libbzip2 does not use any 
139 |    patented algorithms.  However, I do not have the resources 
140 |    to carry out a patent search.  Therefore I cannot give any 
141 |    guarantee of the above statement.
142 | 
143 | 
144 | 
145 | WHAT'S NEW IN 0.9.0 (as compared to 0.1pl2) ?
146 | 
147 |    * Approx 10% faster compression, 30% faster decompression
148 |    * -t (test mode) is a lot quicker
149 |    * Can decompress concatenated compressed files
150 |    * Programming interface, so programs can directly read/write .bz2 files
151 |    * Less restrictive (BSD-style) licensing
152 |    * Flag handling more compatible with GNU gzip
153 |    * Much more documentation, i.e., a proper user manual
154 |    * Hopefully, improved portability (at least of the library)
155 | 
156 | WHAT'S NEW IN 0.9.5 ?
157 | 
158 |    * Compression speed is much less sensitive to the input
159 |      data than in previous versions.  Specifically, the very
160 |      slow performance caused by repetitive data is fixed.
161 |    * Many small improvements in file and flag handling.
162 |    * A Y2K statement.
163 | 
164 | WHAT'S NEW IN 1.0.0 ?
165 | 
166 |    See the CHANGES file.
167 | 
168 | WHAT'S NEW IN 1.0.2 ?
169 | 
170 |    See the CHANGES file.
171 | 
172 | WHAT'S NEW IN 1.0.3 ?
173 | 
174 |    See the CHANGES file.
175 | 
176 | WHAT'S NEW IN 1.0.4 ?
177 | 
178 |    See the CHANGES file.
179 | 
180 | WHAT'S NEW IN 1.0.5 ?
181 | 
182 |    See the CHANGES file.
183 | 
184 | WHAT'S NEW IN 1.0.6 ?
185 | 
186 |    See the CHANGES file.
187 | 
188 | 
189 | I hope you find bzip2 useful.  Feel free to contact me at
190 |    jseward@bzip.org
191 | if you have any suggestions or queries.  Many people mailed me with
192 | comments, suggestions and patches after the releases of bzip-0.15,
193 | bzip-0.21, and bzip2 versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0, 1.0.1,
194 | 1.0.2 and 1.0.3, and the changes in bzip2 are largely a result of this
195 | feedback.  I thank you for your comments.
196 | 
197 | bzip2's "home" is http://www.bzip.org/
198 | 
199 | Julian Seward
200 | jseward@bzip.org
201 | Cambridge, UK.
202 | 
203 | 18     July 1996 (version 0.15)
204 | 25   August 1996 (version 0.21)
205 |  7   August 1997 (bzip2, version 0.1)
206 | 29   August 1997 (bzip2, version 0.1pl2)
207 | 23   August 1998 (bzip2, version 0.9.0)
208 |  8     June 1999 (bzip2, version 0.9.5)
209 |  4     Sept 1999 (bzip2, version 0.9.5d)
210 |  5      May 2000 (bzip2, version 1.0pre8)
211 | 30 December 2001 (bzip2, version 1.0.2pre1)
212 | 15 February 2005 (bzip2, version 1.0.3)
213 | 20 December 2006 (bzip2, version 1.0.4)
214 | 10 December 2007 (bzip2, version 1.0.5)
215 |  6     Sept 2010 (bzip2, version 1.0.6)
216 | 


--------------------------------------------------------------------------------
/blocksort.c:
--------------------------------------------------------------------------------
   1 | 
   2 | /*-------------------------------------------------------------*/
   3 | /*--- Block sorting machinery                               ---*/
   4 | /*---                                           blocksort.c ---*/
   5 | /*-------------------------------------------------------------*/
   6 | 
   7 | /* ------------------------------------------------------------------
   8 |    This file is part of bzip2/libbzip2, a program and library for
   9 |    lossless, block-sorting data compression.
  10 | 
  11 |    bzip2/libbzip2 version 1.0.6 of 6 September 2010
  12 |    Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
  13 | 
  14 |    Please read the WARNING, DISCLAIMER and PATENTS sections in the 
  15 |    README file.
  16 | 
  17 |    This program is released under the terms of the license contained
  18 |    in the file LICENSE.
  19 |    ------------------------------------------------------------------ */
  20 | 
  21 | 
  22 | #include "bzlib_private.h"
  23 | 
  24 | /*---------------------------------------------*/
  25 | /*--- Fallback O(N log(N)^2) sorting        ---*/
  26 | /*--- algorithm, for repetitive blocks      ---*/
  27 | /*---------------------------------------------*/
  28 | 
  29 | /*---------------------------------------------*/
  30 | static 
  31 | __inline__
  32 | void fallbackSimpleSort ( UInt32* fmap, 
  33 |                           UInt32* eclass, 
  34 |                           Int32   lo, 
  35 |                           Int32   hi )
  36 | {
  37 |    Int32 i, j, tmp;
  38 |    UInt32 ec_tmp;
  39 | 
  40 |    if (lo == hi) return;
  41 | 
  42 |    if (hi - lo > 3) {
  43 |       for ( i = hi-4; i >= lo; i-- ) {
  44 |          tmp = fmap[i];
  45 |          ec_tmp = eclass[tmp];
  46 |          for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
  47 |             fmap[j-4] = fmap[j];
  48 |          fmap[j-4] = tmp;
  49 |       }
  50 |    }
  51 | 
  52 |    for ( i = hi-1; i >= lo; i-- ) {
  53 |       tmp = fmap[i];
  54 |       ec_tmp = eclass[tmp];
  55 |       for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
  56 |          fmap[j-1] = fmap[j];
  57 |       fmap[j-1] = tmp;
  58 |    }
  59 | }
  60 | 
  61 | 
  62 | /*---------------------------------------------*/
  63 | #define fswap(zz1, zz2) \
  64 |    { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
  65 | 
  66 | #define fvswap(zzp1, zzp2, zzn)       \
  67 | {                                     \
  68 |    Int32 yyp1 = (zzp1);               \
  69 |    Int32 yyp2 = (zzp2);               \
  70 |    Int32 yyn  = (zzn);                \
  71 |    while (yyn > 0) {                  \
  72 |       fswap(fmap[yyp1], fmap[yyp2]);  \
  73 |       yyp1++; yyp2++; yyn--;          \
  74 |    }                                  \
  75 | }
  76 | 
  77 | 
  78 | #define fmin(a,b) ((a) < (b)) ? (a) : (b)
  79 | 
  80 | #define fpush(lz,hz) { stackLo[sp] = lz; \
  81 |                        stackHi[sp] = hz; \
  82 |                        sp++; }
  83 | 
  84 | #define fpop(lz,hz) { sp--;              \
  85 |                       lz = stackLo[sp];  \
  86 |                       hz = stackHi[sp]; }
  87 | 
  88 | #define FALLBACK_QSORT_SMALL_THRESH 10
  89 | #define FALLBACK_QSORT_STACK_SIZE   100
  90 | 
  91 | 
  92 | static
  93 | void fallbackQSort3 ( UInt32* fmap, 
  94 |                       UInt32* eclass,
  95 |                       Int32   loSt, 
  96 |                       Int32   hiSt )
  97 | {
  98 |    Int32 unLo, unHi, ltLo, gtHi, n, m;
  99 |    Int32 sp, lo, hi;
 100 |    UInt32 med, r, r3;
 101 |    Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
 102 |    Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
 103 | 
 104 |    r = 0;
 105 | 
 106 |    sp = 0;
 107 |    fpush ( loSt, hiSt );
 108 | 
 109 |    while (sp > 0) {
 110 | 
 111 |       AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
 112 | 
 113 |       fpop ( lo, hi );
 114 |       if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
 115 |          fallbackSimpleSort ( fmap, eclass, lo, hi );
 116 |          continue;
 117 |       }
 118 | 
 119 |       /* Random partitioning.  Median of 3 sometimes fails to
 120 |          avoid bad cases.  Median of 9 seems to help but 
 121 |          looks rather expensive.  This too seems to work but
 122 |          is cheaper.  Guidance for the magic constants 
 123 |          7621 and 32768 is taken from Sedgewick's algorithms
 124 |          book, chapter 35.
 125 |       */
 126 |       r = ((r * 7621) + 1) % 32768;
 127 |       r3 = r % 3;
 128 |       if (r3 == 0) med = eclass[fmap[lo]]; else
 129 |       if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
 130 |                    med = eclass[fmap[hi]];
 131 | 
 132 |       unLo = ltLo = lo;
 133 |       unHi = gtHi = hi;
 134 | 
 135 |       while (1) {
 136 |          while (1) {
 137 |             if (unLo > unHi) break;
 138 |             n = (Int32)eclass[fmap[unLo]] - (Int32)med;
 139 |             if (n == 0) { 
 140 |                fswap(fmap[unLo], fmap[ltLo]); 
 141 |                ltLo++; unLo++; 
 142 |                continue; 
 143 |             };
 144 |             if (n > 0) break;
 145 |             unLo++;
 146 |          }
 147 |          while (1) {
 148 |             if (unLo > unHi) break;
 149 |             n = (Int32)eclass[fmap[unHi]] - (Int32)med;
 150 |             if (n == 0) { 
 151 |                fswap(fmap[unHi], fmap[gtHi]); 
 152 |                gtHi--; unHi--; 
 153 |                continue; 
 154 |             };
 155 |             if (n < 0) break;
 156 |             unHi--;
 157 |          }
 158 |          if (unLo > unHi) break;
 159 |          fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
 160 |       }
 161 | 
 162 |       AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
 163 | 
 164 |       if (gtHi < ltLo) continue;
 165 | 
 166 |       n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
 167 |       m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
 168 | 
 169 |       n = lo + unLo - ltLo - 1;
 170 |       m = hi - (gtHi - unHi) + 1;
 171 | 
 172 |       if (n - lo > hi - m) {
 173 |          fpush ( lo, n );
 174 |          fpush ( m, hi );
 175 |       } else {
 176 |          fpush ( m, hi );
 177 |          fpush ( lo, n );
 178 |       }
 179 |    }
 180 | }
 181 | 
 182 | #undef fmin
 183 | #undef fpush
 184 | #undef fpop
 185 | #undef fswap
 186 | #undef fvswap
 187 | #undef FALLBACK_QSORT_SMALL_THRESH
 188 | #undef FALLBACK_QSORT_STACK_SIZE
 189 | 
 190 | 
 191 | /*---------------------------------------------*/
 192 | /* Pre:
 193 |       nblock > 0
 194 |       eclass exists for [0 .. nblock-1]
 195 |       ((UChar*)eclass) [0 .. nblock-1] holds block
 196 |       ptr exists for [0 .. nblock-1]
 197 | 
 198 |    Post:
 199 |       ((UChar*)eclass) [0 .. nblock-1] holds block
 200 |       All other areas of eclass destroyed
 201 |       fmap [0 .. nblock-1] holds sorted order
 202 |       bhtab [ 0 .. 2+(nblock/32) ] destroyed
 203 | */
 204 | 
 205 | #define       SET_BH(zz)  bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
 206 | #define     CLEAR_BH(zz)  bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
 207 | #define     ISSET_BH(zz)  (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
 208 | #define      WORD_BH(zz)  bhtab[(zz) >> 5]
 209 | #define UNALIGNED_BH(zz)  ((zz) & 0x01f)
 210 | 
 211 | static
 212 | void fallbackSort ( UInt32* fmap, 
 213 |                     UInt32* eclass, 
 214 |                     UInt32* bhtab,
 215 |                     Int32   nblock,
 216 |                     Int32   verb )
 217 | {
 218 |    Int32 ftab[257];
 219 |    Int32 ftabCopy[256];
 220 |    Int32 H, i, j, k, l, r, cc, cc1;
 221 |    Int32 nNotDone;
 222 |    Int32 nBhtab;
 223 |    UChar* eclass8 = (UChar*)eclass;
 224 | 
 225 |    /*--
 226 |       Initial 1-char radix sort to generate
 227 |       initial fmap and initial BH bits.
 228 |    --*/
 229 |    if (verb >= 4)
 230 |       VPrintf0 ( "        bucket sorting ...\n" );
 231 |    for (i = 0; i < 257;    i++) ftab[i] = 0;
 232 |    for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
 233 |    for (i = 0; i < 256;    i++) ftabCopy[i] = ftab[i];
 234 |    for (i = 1; i < 257;    i++) ftab[i] += ftab[i-1];
 235 | 
 236 |    for (i = 0; i < nblock; i++) {
 237 |       j = eclass8[i];
 238 |       k = ftab[j] - 1;
 239 |       ftab[j] = k;
 240 |       fmap[k] = i;
 241 |    }
 242 | 
 243 |    nBhtab = 2 + (nblock / 32);
 244 |    for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
 245 |    for (i = 0; i < 256; i++) SET_BH(ftab[i]);
 246 | 
 247 |    /*--
 248 |       Inductively refine the buckets.  Kind-of an
 249 |       "exponential radix sort" (!), inspired by the
 250 |       Manber-Myers suffix array construction algorithm.
 251 |    --*/
 252 | 
 253 |    /*-- set sentinel bits for block-end detection --*/
 254 |    for (i = 0; i < 32; i++) { 
 255 |       SET_BH(nblock + 2*i);
 256 |       CLEAR_BH(nblock + 2*i + 1);
 257 |    }
 258 | 
 259 |    /*-- the log(N) loop --*/
 260 |    H = 1;
 261 |    while (1) {
 262 | 
 263 |       if (verb >= 4) 
 264 |          VPrintf1 ( "        depth %6d has ", H );
 265 | 
 266 |       j = 0;
 267 |       for (i = 0; i < nblock; i++) {
 268 |          if (ISSET_BH(i)) j = i;
 269 |          k = fmap[i] - H; if (k < 0) k += nblock;
 270 |          eclass[k] = j;
 271 |       }
 272 | 
 273 |       nNotDone = 0;
 274 |       r = -1;
 275 |       while (1) {
 276 | 
 277 | 	 /*-- find the next non-singleton bucket --*/
 278 |          k = r + 1;
 279 |          while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
 280 |          if (ISSET_BH(k)) {
 281 |             while (WORD_BH(k) == 0xffffffff) k += 32;
 282 |             while (ISSET_BH(k)) k++;
 283 |          }
 284 |          l = k - 1;
 285 |          if (l >= nblock) break;
 286 |          while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
 287 |          if (!ISSET_BH(k)) {
 288 |             while (WORD_BH(k) == 0x00000000) k += 32;
 289 |             while (!ISSET_BH(k)) k++;
 290 |          }
 291 |          r = k - 1;
 292 |          if (r >= nblock) break;
 293 | 
 294 |          /*-- now [l, r] bracket current bucket --*/
 295 |          if (r > l) {
 296 |             nNotDone += (r - l + 1);
 297 |             fallbackQSort3 ( fmap, eclass, l, r );
 298 | 
 299 |             /*-- scan bucket and generate header bits-- */
 300 |             cc = -1;
 301 |             for (i = l; i <= r; i++) {
 302 |                cc1 = eclass[fmap[i]];
 303 |                if (cc != cc1) { SET_BH(i); cc = cc1; };
 304 |             }
 305 |          }
 306 |       }
 307 | 
 308 |       if (verb >= 4) 
 309 |          VPrintf1 ( "%6d unresolved strings\n", nNotDone );
 310 | 
 311 |       H *= 2;
 312 |       if (H > nblock || nNotDone == 0) break;
 313 |    }
 314 | 
 315 |    /*-- 
 316 |       Reconstruct the original block in
 317 |       eclass8 [0 .. nblock-1], since the
 318 |       previous phase destroyed it.
 319 |    --*/
 320 |    if (verb >= 4)
 321 |       VPrintf0 ( "        reconstructing block ...\n" );
 322 |    j = 0;
 323 |    for (i = 0; i < nblock; i++) {
 324 |       while (ftabCopy[j] == 0) j++;
 325 |       ftabCopy[j]--;
 326 |       eclass8[fmap[i]] = (UChar)j;
 327 |    }
 328 |    AssertH ( j < 256, 1005 );
 329 | }
 330 | 
 331 | #undef       SET_BH
 332 | #undef     CLEAR_BH
 333 | #undef     ISSET_BH
 334 | #undef      WORD_BH
 335 | #undef UNALIGNED_BH
 336 | 
 337 | 
 338 | /*---------------------------------------------*/
 339 | /*--- The main, O(N^2 log(N)) sorting       ---*/
 340 | /*--- algorithm.  Faster for "normal"       ---*/
 341 | /*--- non-repetitive blocks.                ---*/
 342 | /*---------------------------------------------*/
 343 | 
 344 | /*---------------------------------------------*/
 345 | static
 346 | __inline__
 347 | Bool mainGtU ( UInt32  i1, 
 348 |                UInt32  i2,
 349 |                UChar*  block, 
 350 |                UInt16* quadrant,
 351 |                UInt32  nblock,
 352 |                Int32*  budget )
 353 | {
 354 |    Int32  k;
 355 |    UChar  c1, c2;
 356 |    UInt16 s1, s2;
 357 | 
 358 |    AssertD ( i1 != i2, "mainGtU" );
 359 |    /* 1 */
 360 |    c1 = block[i1]; c2 = block[i2];
 361 |    if (c1 != c2) return (c1 > c2);
 362 |    i1++; i2++;
 363 |    /* 2 */
 364 |    c1 = block[i1]; c2 = block[i2];
 365 |    if (c1 != c2) return (c1 > c2);
 366 |    i1++; i2++;
 367 |    /* 3 */
 368 |    c1 = block[i1]; c2 = block[i2];
 369 |    if (c1 != c2) return (c1 > c2);
 370 |    i1++; i2++;
 371 |    /* 4 */
 372 |    c1 = block[i1]; c2 = block[i2];
 373 |    if (c1 != c2) return (c1 > c2);
 374 |    i1++; i2++;
 375 |    /* 5 */
 376 |    c1 = block[i1]; c2 = block[i2];
 377 |    if (c1 != c2) return (c1 > c2);
 378 |    i1++; i2++;
 379 |    /* 6 */
 380 |    c1 = block[i1]; c2 = block[i2];
 381 |    if (c1 != c2) return (c1 > c2);
 382 |    i1++; i2++;
 383 |    /* 7 */
 384 |    c1 = block[i1]; c2 = block[i2];
 385 |    if (c1 != c2) return (c1 > c2);
 386 |    i1++; i2++;
 387 |    /* 8 */
 388 |    c1 = block[i1]; c2 = block[i2];
 389 |    if (c1 != c2) return (c1 > c2);
 390 |    i1++; i2++;
 391 |    /* 9 */
 392 |    c1 = block[i1]; c2 = block[i2];
 393 |    if (c1 != c2) return (c1 > c2);
 394 |    i1++; i2++;
 395 |    /* 10 */
 396 |    c1 = block[i1]; c2 = block[i2];
 397 |    if (c1 != c2) return (c1 > c2);
 398 |    i1++; i2++;
 399 |    /* 11 */
 400 |    c1 = block[i1]; c2 = block[i2];
 401 |    if (c1 != c2) return (c1 > c2);
 402 |    i1++; i2++;
 403 |    /* 12 */
 404 |    c1 = block[i1]; c2 = block[i2];
 405 |    if (c1 != c2) return (c1 > c2);
 406 |    i1++; i2++;
 407 | 
 408 |    k = nblock + 8;
 409 | 
 410 |    do {
 411 |       /* 1 */
 412 |       c1 = block[i1]; c2 = block[i2];
 413 |       if (c1 != c2) return (c1 > c2);
 414 |       s1 = quadrant[i1]; s2 = quadrant[i2];
 415 |       if (s1 != s2) return (s1 > s2);
 416 |       i1++; i2++;
 417 |       /* 2 */
 418 |       c1 = block[i1]; c2 = block[i2];
 419 |       if (c1 != c2) return (c1 > c2);
 420 |       s1 = quadrant[i1]; s2 = quadrant[i2];
 421 |       if (s1 != s2) return (s1 > s2);
 422 |       i1++; i2++;
 423 |       /* 3 */
 424 |       c1 = block[i1]; c2 = block[i2];
 425 |       if (c1 != c2) return (c1 > c2);
 426 |       s1 = quadrant[i1]; s2 = quadrant[i2];
 427 |       if (s1 != s2) return (s1 > s2);
 428 |       i1++; i2++;
 429 |       /* 4 */
 430 |       c1 = block[i1]; c2 = block[i2];
 431 |       if (c1 != c2) return (c1 > c2);
 432 |       s1 = quadrant[i1]; s2 = quadrant[i2];
 433 |       if (s1 != s2) return (s1 > s2);
 434 |       i1++; i2++;
 435 |       /* 5 */
 436 |       c1 = block[i1]; c2 = block[i2];
 437 |       if (c1 != c2) return (c1 > c2);
 438 |       s1 = quadrant[i1]; s2 = quadrant[i2];
 439 |       if (s1 != s2) return (s1 > s2);
 440 |       i1++; i2++;
 441 |       /* 6 */
 442 |       c1 = block[i1]; c2 = block[i2];
 443 |       if (c1 != c2) return (c1 > c2);
 444 |       s1 = quadrant[i1]; s2 = quadrant[i2];
 445 |       if (s1 != s2) return (s1 > s2);
 446 |       i1++; i2++;
 447 |       /* 7 */
 448 |       c1 = block[i1]; c2 = block[i2];
 449 |       if (c1 != c2) return (c1 > c2);
 450 |       s1 = quadrant[i1]; s2 = quadrant[i2];
 451 |       if (s1 != s2) return (s1 > s2);
 452 |       i1++; i2++;
 453 |       /* 8 */
 454 |       c1 = block[i1]; c2 = block[i2];
 455 |       if (c1 != c2) return (c1 > c2);
 456 |       s1 = quadrant[i1]; s2 = quadrant[i2];
 457 |       if (s1 != s2) return (s1 > s2);
 458 |       i1++; i2++;
 459 | 
 460 |       if (i1 >= nblock) i1 -= nblock;
 461 |       if (i2 >= nblock) i2 -= nblock;
 462 | 
 463 |       k -= 8;
 464 |       (*budget)--;
 465 |    }
 466 |       while (k >= 0);
 467 | 
 468 |    return False;
 469 | }
 470 | 
 471 | 
 472 | /*---------------------------------------------*/
 473 | /*--
 474 |    Knuth's increments seem to work better
 475 |    than Incerpi-Sedgewick here.  Possibly
 476 |    because the number of elems to sort is
 477 |    usually small, typically <= 20.
 478 | --*/
 479 | static
 480 | Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
 481 |                    9841, 29524, 88573, 265720,
 482 |                    797161, 2391484 };
 483 | 
 484 | static
 485 | void mainSimpleSort ( UInt32* ptr,
 486 |                       UChar*  block,
 487 |                       UInt16* quadrant,
 488 |                       Int32   nblock,
 489 |                       Int32   lo, 
 490 |                       Int32   hi, 
 491 |                       Int32   d,
 492 |                       Int32*  budget )
 493 | {
 494 |    Int32 i, j, h, bigN, hp;
 495 |    UInt32 v;
 496 | 
 497 |    bigN = hi - lo + 1;
 498 |    if (bigN < 2) return;
 499 | 
 500 |    hp = 0;
 501 |    while (incs[hp] < bigN) hp++;
 502 |    hp--;
 503 | 
 504 |    for (; hp >= 0; hp--) {
 505 |       h = incs[hp];
 506 | 
 507 |       i = lo + h;
 508 |       while (True) {
 509 | 
 510 |          /*-- copy 1 --*/
 511 |          if (i > hi) break;
 512 |          v = ptr[i];
 513 |          j = i;
 514 |          while ( mainGtU ( 
 515 |                     ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
 516 |                  ) ) {
 517 |             ptr[j] = ptr[j-h];
 518 |             j = j - h;
 519 |             if (j <= (lo + h - 1)) break;
 520 |          }
 521 |          ptr[j] = v;
 522 |          i++;
 523 | 
 524 |          /*-- copy 2 --*/
 525 |          if (i > hi) break;
 526 |          v = ptr[i];
 527 |          j = i;
 528 |          while ( mainGtU ( 
 529 |                     ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
 530 |                  ) ) {
 531 |             ptr[j] = ptr[j-h];
 532 |             j = j - h;
 533 |             if (j <= (lo + h - 1)) break;
 534 |          }
 535 |          ptr[j] = v;
 536 |          i++;
 537 | 
 538 |          /*-- copy 3 --*/
 539 |          if (i > hi) break;
 540 |          v = ptr[i];
 541 |          j = i;
 542 |          while ( mainGtU ( 
 543 |                     ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
 544 |                  ) ) {
 545 |             ptr[j] = ptr[j-h];
 546 |             j = j - h;
 547 |             if (j <= (lo + h - 1)) break;
 548 |          }
 549 |          ptr[j] = v;
 550 |          i++;
 551 | 
 552 |          if (*budget < 0) return;
 553 |       }
 554 |    }
 555 | }
 556 | 
 557 | 
 558 | /*---------------------------------------------*/
 559 | /*--
 560 |    The following is an implementation of
 561 |    an elegant 3-way quicksort for strings,
 562 |    described in a paper "Fast Algorithms for
 563 |    Sorting and Searching Strings", by Robert
 564 |    Sedgewick and Jon L. Bentley.
 565 | --*/
 566 | 
 567 | #define mswap(zz1, zz2) \
 568 |    { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
 569 | 
 570 | #define mvswap(zzp1, zzp2, zzn)       \
 571 | {                                     \
 572 |    Int32 yyp1 = (zzp1);               \
 573 |    Int32 yyp2 = (zzp2);               \
 574 |    Int32 yyn  = (zzn);                \
 575 |    while (yyn > 0) {                  \
 576 |       mswap(ptr[yyp1], ptr[yyp2]);    \
 577 |       yyp1++; yyp2++; yyn--;          \
 578 |    }                                  \
 579 | }
 580 | 
 581 | static 
 582 | __inline__
 583 | UChar mmed3 ( UChar a, UChar b, UChar c )
 584 | {
 585 |    UChar t;
 586 |    if (a > b) { t = a; a = b; b = t; };
 587 |    if (b > c) { 
 588 |       b = c;
 589 |       if (a > b) b = a;
 590 |    }
 591 |    return b;
 592 | }
 593 | 
 594 | #define mmin(a,b) ((a) < (b)) ? (a) : (b)
 595 | 
 596 | #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
 597 |                           stackHi[sp] = hz; \
 598 |                           stackD [sp] = dz; \
 599 |                           sp++; }
 600 | 
 601 | #define mpop(lz,hz,dz) { sp--;             \
 602 |                          lz = stackLo[sp]; \
 603 |                          hz = stackHi[sp]; \
 604 |                          dz = stackD [sp]; }
 605 | 
 606 | 
 607 | #define mnextsize(az) (nextHi[az]-nextLo[az])
 608 | 
 609 | #define mnextswap(az,bz)                                        \
 610 |    { Int32 tz;                                                  \
 611 |      tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
 612 |      tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
 613 |      tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
 614 | 
 615 | 
 616 | #define MAIN_QSORT_SMALL_THRESH 20
 617 | #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
 618 | #define MAIN_QSORT_STACK_SIZE 100
 619 | 
 620 | static
 621 | void mainQSort3 ( UInt32* ptr,
 622 |                   UChar*  block,
 623 |                   UInt16* quadrant,
 624 |                   Int32   nblock,
 625 |                   Int32   loSt, 
 626 |                   Int32   hiSt, 
 627 |                   Int32   dSt,
 628 |                   Int32*  budget )
 629 | {
 630 |    Int32 unLo, unHi, ltLo, gtHi, n, m, med;
 631 |    Int32 sp, lo, hi, d;
 632 | 
 633 |    Int32 stackLo[MAIN_QSORT_STACK_SIZE];
 634 |    Int32 stackHi[MAIN_QSORT_STACK_SIZE];
 635 |    Int32 stackD [MAIN_QSORT_STACK_SIZE];
 636 | 
 637 |    Int32 nextLo[3];
 638 |    Int32 nextHi[3];
 639 |    Int32 nextD [3];
 640 | 
 641 |    sp = 0;
 642 |    mpush ( loSt, hiSt, dSt );
 643 | 
 644 |    while (sp > 0) {
 645 | 
 646 |       AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
 647 | 
 648 |       mpop ( lo, hi, d );
 649 |       if (hi - lo < MAIN_QSORT_SMALL_THRESH || 
 650 |           d > MAIN_QSORT_DEPTH_THRESH) {
 651 |          mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
 652 |          if (*budget < 0) return;
 653 |          continue;
 654 |       }
 655 | 
 656 |       med = (Int32) 
 657 |             mmed3 ( block[ptr[ lo         ]+d],
 658 |                     block[ptr[ hi         ]+d],
 659 |                     block[ptr[ (lo+hi)>>1 ]+d] );
 660 | 
 661 |       unLo = ltLo = lo;
 662 |       unHi = gtHi = hi;
 663 | 
 664 |       while (True) {
 665 |          while (True) {
 666 |             if (unLo > unHi) break;
 667 |             n = ((Int32)block[ptr[unLo]+d]) - med;
 668 |             if (n == 0) { 
 669 |                mswap(ptr[unLo], ptr[ltLo]); 
 670 |                ltLo++; unLo++; continue; 
 671 |             };
 672 |             if (n >  0) break;
 673 |             unLo++;
 674 |          }
 675 |          while (True) {
 676 |             if (unLo > unHi) break;
 677 |             n = ((Int32)block[ptr[unHi]+d]) - med;
 678 |             if (n == 0) { 
 679 |                mswap(ptr[unHi], ptr[gtHi]); 
 680 |                gtHi--; unHi--; continue; 
 681 |             };
 682 |             if (n <  0) break;
 683 |             unHi--;
 684 |          }
 685 |          if (unLo > unHi) break;
 686 |          mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
 687 |       }
 688 | 
 689 |       AssertD ( unHi == unLo-1, "mainQSort3(2)" );
 690 | 
 691 |       if (gtHi < ltLo) {
 692 |          mpush(lo, hi, d+1 );
 693 |          continue;
 694 |       }
 695 | 
 696 |       n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
 697 |       m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
 698 | 
 699 |       n = lo + unLo - ltLo - 1;
 700 |       m = hi - (gtHi - unHi) + 1;
 701 | 
 702 |       nextLo[0] = lo;  nextHi[0] = n;   nextD[0] = d;
 703 |       nextLo[1] = m;   nextHi[1] = hi;  nextD[1] = d;
 704 |       nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
 705 | 
 706 |       if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
 707 |       if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
 708 |       if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
 709 | 
 710 |       AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
 711 |       AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
 712 | 
 713 |       mpush (nextLo[0], nextHi[0], nextD[0]);
 714 |       mpush (nextLo[1], nextHi[1], nextD[1]);
 715 |       mpush (nextLo[2], nextHi[2], nextD[2]);
 716 |    }
 717 | }
 718 | 
 719 | #undef mswap
 720 | #undef mvswap
 721 | #undef mpush
 722 | #undef mpop
 723 | #undef mmin
 724 | #undef mnextsize
 725 | #undef mnextswap
 726 | #undef MAIN_QSORT_SMALL_THRESH
 727 | #undef MAIN_QSORT_DEPTH_THRESH
 728 | #undef MAIN_QSORT_STACK_SIZE
 729 | 
 730 | 
 731 | /*---------------------------------------------*/
 732 | /* Pre:
 733 |       nblock > N_OVERSHOOT
 734 |       block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
 735 |       ((UChar*)block32) [0 .. nblock-1] holds block
 736 |       ptr exists for [0 .. nblock-1]
 737 | 
 738 |    Post:
 739 |       ((UChar*)block32) [0 .. nblock-1] holds block
 740 |       All other areas of block32 destroyed
 741 |       ftab [0 .. 65536 ] destroyed
 742 |       ptr [0 .. nblock-1] holds sorted order
 743 |       if (*budget < 0), sorting was abandoned
 744 | */
 745 | 
 746 | #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
 747 | //#define SETMASK (1 << 21)
 748 | #define SETMASK (1 << 31) // changed so that larger block size in sorting can be supported. 
 749 | #define CLEARMASK (~(SETMASK))
 750 | 
 751 | static
 752 | void mainSort ( UInt32* ptr, 
 753 |                 UChar*  block,
 754 |                 UInt16* quadrant, 
 755 |                 UInt32* ftab,
 756 |                 Int32   nblock,
 757 |                 Int32   verb,
 758 |                 Int32*  budget )
 759 | {
 760 |    Int32  i, j, k, ss, sb;
 761 |    Int32  runningOrder[256];
 762 |    Bool   bigDone[256];
 763 |    Int32  copyStart[256];
 764 |    Int32  copyEnd  [256];
 765 |    UChar  c1;
 766 |    Int32  numQSorted;
 767 |    UInt16 s;
 768 |    if (verb >= 4) VPrintf0 ( "        main sort initialise ...\n" );
 769 | 
 770 |    /*-- set up the 2-byte frequency table --*/
 771 |    for (i = 65536; i >= 0; i--) ftab[i] = 0;
 772 | 
 773 |    j = block[0] << 8;
 774 |    i = nblock-1;
 775 |    for (; i >= 3; i -= 4) {
 776 |       quadrant[i] = 0;
 777 |       j = (j >> 8) | ( ((UInt16)block[i]) << 8);
 778 |       ftab[j]++;
 779 |       quadrant[i-1] = 0;
 780 |       j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
 781 |       ftab[j]++;
 782 |       quadrant[i-2] = 0;
 783 |       j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
 784 |       ftab[j]++;
 785 |       quadrant[i-3] = 0;
 786 |       j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
 787 |       ftab[j]++;
 788 |    }
 789 |    for (; i >= 0; i--) {
 790 |       quadrant[i] = 0;
 791 |       j = (j >> 8) | ( ((UInt16)block[i]) << 8);
 792 |       ftab[j]++;
 793 |    }
 794 | 
 795 |    /*-- (emphasises close relationship of block & quadrant) --*/
 796 |    for (i = 0; i < BZ_N_OVERSHOOT; i++) {
 797 |       block   [nblock+i] = block[i];
 798 |       quadrant[nblock+i] = 0;
 799 |    }
 800 | 
 801 |    if (verb >= 4) VPrintf0 ( "        bucket sorting ...\n" );
 802 | 
 803 |    /*-- Complete the initial radix sort --*/
 804 |    for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
 805 | 
 806 |    s = block[0] << 8;
 807 |    i = nblock-1;
 808 |    for (; i >= 3; i -= 4) {
 809 |       s = (s >> 8) | (block[i] << 8);
 810 |       j = ftab[s] -1;
 811 |       ftab[s] = j;
 812 |       ptr[j] = i;
 813 |       s = (s >> 8) | (block[i-1] << 8);
 814 |       j = ftab[s] -1;
 815 |       ftab[s] = j;
 816 |       ptr[j] = i-1;
 817 |       s = (s >> 8) | (block[i-2] << 8);
 818 |       j = ftab[s] -1;
 819 |       ftab[s] = j;
 820 |       ptr[j] = i-2;
 821 |       s = (s >> 8) | (block[i-3] << 8);
 822 |       j = ftab[s] -1;
 823 |       ftab[s] = j;
 824 |       ptr[j] = i-3;
 825 |    }
 826 |    for (; i >= 0; i--) {
 827 |       s = (s >> 8) | (block[i] << 8);
 828 |       j = ftab[s] -1;
 829 |       ftab[s] = j;
 830 |       ptr[j] = i;
 831 |    }
 832 | 
 833 |    /*--
 834 |       Now ftab contains the first loc of every small bucket.
 835 |       Calculate the running order, from smallest to largest
 836 |       big bucket.
 837 |    --*/
 838 |    for (i = 0; i <= 255; i++) {
 839 |       bigDone     [i] = False;
 840 |       runningOrder[i] = i;
 841 |    }
 842 | 
 843 |    {
 844 |       Int32 vv;
 845 |       Int32 h = 1;
 846 |       do h = 3 * h + 1; while (h <= 256);
 847 |       do {
 848 |          h = h / 3;
 849 |          for (i = h; i <= 255; i++) {
 850 |             vv = runningOrder[i];
 851 |             j = i;
 852 |             while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
 853 |                runningOrder[j] = runningOrder[j-h];
 854 |                j = j - h;
 855 |                if (j <= (h - 1)) goto zero;
 856 |             }
 857 |             zero:
 858 |             runningOrder[j] = vv;
 859 |          }
 860 |       } while (h != 1);
 861 |    }
 862 | 
 863 |    /*--
 864 |       The main sorting loop.
 865 |    --*/
 866 | 
 867 |    numQSorted = 0;
 868 | 
 869 |    for (i = 0; i <= 255; i++) {
 870 | 
 871 |       /*--
 872 |          Process big buckets, starting with the least full.
 873 |          Basically this is a 3-step process in which we call
 874 |          mainQSort3 to sort the small buckets [ss, j], but
 875 |          also make a big effort to avoid the calls if we can.
 876 |       --*/
 877 |       ss = runningOrder[i];
 878 | 
 879 |       /*--
 880 |          Step 1:
 881 |          Complete the big bucket [ss] by quicksorting
 882 |          any unsorted small buckets [ss, j], for j != ss.  
 883 |          Hopefully previous pointer-scanning phases have already
 884 |          completed many of the small buckets [ss, j], so
 885 |          we don't have to sort them at all.
 886 |       --*/
 887 |       for (j = 0; j <= 255; j++) {
 888 |          if (j != ss) {
 889 |             sb = (ss << 8) + j;
 890 |             if ( ! (ftab[sb] & SETMASK) ) {
 891 |                Int32 lo = ftab[sb]   & CLEARMASK;
 892 |                Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
 893 |                if (hi > lo) {
 894 |                   if (verb >= 4)
 895 |                      VPrintf4 ( "        qsort [0x%x, 0x%x]   "
 896 |                                 "done %d   this %d\n",
 897 |                                 ss, j, numQSorted, hi - lo + 1 );
 898 |                   mainQSort3 ( 
 899 |                      ptr, block, quadrant, nblock, 
 900 |                      lo, hi, BZ_N_RADIX, budget 
 901 |                   );   
 902 |                   numQSorted += (hi - lo + 1);
 903 |                   if (*budget < 0) return;
 904 |                }
 905 |             }
 906 |             ftab[sb] |= SETMASK;
 907 |          }
 908 |       }
 909 | 
 910 |       AssertH ( !bigDone[ss], 1006 );
 911 | 
 912 |       /*--
 913 |          Step 2:
 914 |          Now scan this big bucket [ss] so as to synthesise the
 915 |          sorted order for small buckets [t, ss] for all t,
 916 |          including, magically, the bucket [ss,ss] too.
 917 |          This will avoid doing Real Work in subsequent Step 1's.
 918 |       --*/
 919 |       {
 920 |          for (j = 0; j <= 255; j++) {
 921 |             copyStart[j] =  ftab[(j << 8) + ss]     & CLEARMASK;
 922 |             copyEnd  [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
 923 |          }
 924 |          for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
 925 |             k = ptr[j]-1; if (k < 0) k += nblock;
 926 |             c1 = block[k];
 927 |             if (!bigDone[c1])
 928 |                ptr[ copyStart[c1]++ ] = k;
 929 |          }
 930 |          for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
 931 |             k = ptr[j]-1; if (k < 0) k += nblock;
 932 |             c1 = block[k];
 933 |             if (!bigDone[c1]) 
 934 |                ptr[ copyEnd[c1]-- ] = k;
 935 |          }
 936 |       }
 937 | 
 938 | 
 939 |       AssertH ( (copyStart[ss]-1 == copyEnd[ss])
 940 |                 || 
 941 |                 // Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
 942 |                 //   Necessity for this case is demonstrated by compressing 
 943 |                 //   a sequence of approximately 48.5 million of character 
 944 |                 //   251; 1.0.0/1.0.1 will then die here. 
 945 |                 (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
 946 |                 1007 ) 
 947 | 
 948 |       for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
 949 | 
 950 |       /*--
 951 |          Step 3:
 952 |          The [ss] big bucket is now done.  Record this fact,
 953 |          and update the quadrant descriptors.  Remember to
 954 |          update quadrants in the overshoot area too, if
 955 |          necessary.  The "if (i < 255)" test merely skips
 956 |          this updating for the last bucket processed, since
 957 |          updating for the last bucket is pointless.
 958 | 
 959 |          The quadrant array provides a way to incrementally
 960 |          cache sort orderings, as they appear, so as to 
 961 |          make subsequent comparisons in fullGtU() complete
 962 |          faster.  For repetitive blocks this makes a big
 963 |          difference (but not big enough to be able to avoid
 964 |          the fallback sorting mechanism, exponential radix sort).
 965 | 
 966 |          The precise meaning is: at all times:
 967 | 
 968 |             for 0 <= i < nblock and 0 <= j <= nblock
 969 | 
 970 |             if block[i] != block[j], 
 971 | 
 972 |                then the relative values of quadrant[i] and 
 973 |                     quadrant[j] are meaningless.
 974 | 
 975 |                else {
 976 |                   if quadrant[i] < quadrant[j]
 977 |                      then the string starting at i lexicographically
 978 |                      precedes the string starting at j
 979 | 
 980 |                   else if quadrant[i] > quadrant[j]
 981 |                      then the string starting at j lexicographically
 982 |                      precedes the string starting at i
 983 | 
 984 |                   else
 985 |                      the relative ordering of the strings starting
 986 |                      at i and j has not yet been determined.
 987 |                }
 988 |       --*/
 989 |       bigDone[ss] = True;
 990 | 
 991 |       if (i < 255) {
 992 |          Int32 bbStart  = ftab[ss << 8] & CLEARMASK;
 993 |          Int32 bbSize   = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
 994 |          Int32 shifts   = 0;
 995 | 
 996 |          while ((bbSize >> shifts) > 65534) shifts++;
 997 | 
 998 |          for (j = bbSize-1; j >= 0; j--) {
 999 |             Int32 a2update     = ptr[bbStart + j];
1000 |             UInt16 qVal        = (UInt16)(j >> shifts);
1001 |             quadrant[a2update] = qVal;
1002 |             if (a2update < BZ_N_OVERSHOOT)
1003 |                quadrant[a2update + nblock] = qVal;
1004 |          }
1005 |          AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
1006 |       }
1007 | 
1008 |    }
1009 | 
1010 |    if (verb >= 4)
1011 |       VPrintf3 ( "        %d pointers, %d sorted, %d scanned\n",
1012 |                  nblock, numQSorted, nblock - numQSorted );
1013 | }
1014 | 
1015 | #undef BIGFREQ
1016 | #undef SETMASK
1017 | #undef CLEARMASK
1018 | 
1019 | 
1020 | /*---------------------------------------------*/
1021 | /* Pre:
1022 |       nblock > 0
1023 |       arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1024 |       ((UChar*)arr2)  [0 .. nblock-1] holds block
1025 |       arr1 exists for [0 .. nblock-1]
1026 | 
1027 |    Post:
1028 |       ((UChar*)arr2) [0 .. nblock-1] holds block
1029 |       All other areas of block destroyed
1030 |       ftab [ 0 .. 65536 ] destroyed
1031 |       arr1 [0 .. nblock-1] holds sorted order
1032 | */
1033 | void BZ2_blockSort ( EState* s )
1034 | {
1035 |    UInt32* ptr    = s->ptr; 
1036 |    UChar*  block  = s->block;
1037 |    UInt32* ftab   = s->ftab;
1038 |    Int32   nblock = s->nblock;
1039 |    Int32   verb   = s->verbosity;
1040 |    Int32   wfact  = s->workFactor;
1041 |    UInt16* quadrant;
1042 |    Int32   budget;
1043 |    Int32   budgetInit;
1044 |    Int32   i;
1045 | 
1046 |    if (nblock < 10000) {
1047 |       fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1048 |    } else {
1049 |       /* Calculate the location for quadrant, remembering to get
1050 |          the alignment right.  Assumes that &(block[0]) is at least
1051 |          2-byte aligned -- this should be ok since block is really
1052 |          the first section of arr2.
1053 |       */
1054 |       i = nblock+BZ_N_OVERSHOOT;
1055 |       if (i & 1) i++;
1056 |       quadrant = (UInt16*)(&(block[i]));
1057 | 
1058 |       /* (wfact-1) / 3 puts the default-factor-30
1059 |          transition point at very roughly the same place as 
1060 |          with v0.1 and v0.9.0.  
1061 |          Not that it particularly matters any more, since the
1062 |          resulting compressed stream is now the same regardless
1063 |          of whether or not we use the main sort or fallback sort.
1064 |       */
1065 |       if (wfact < 1  ) wfact = 1;
1066 |       if (wfact > 100) wfact = 100;
1067 |       budgetInit = nblock * ((wfact-1) / 3);
1068 |       budget = budgetInit;
1069 | 
1070 |       mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
1071 |       if (verb >= 3) 
1072 |          VPrintf3 ( "      %d work, %d block, ratio %5.2f\n",
1073 |                     budgetInit - budget,
1074 |                     nblock, 
1075 |                     (float)(budgetInit - budget) /
1076 |                     (float)(nblock==0 ? 1 : nblock) ); 
1077 |       if (budget < 0) {
1078 |          if (verb >= 2) 
1079 |             VPrintf0 ( "    too repetitive; using fallback"
1080 |                        " sorting algorithm\n" );
1081 |          fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1082 |       }
1083 |    }
1084 | 
1085 |    s->origPtr = -1;
1086 |    for (i = 0; i < s->nblock; i++)
1087 |       if (ptr[i] == 0)
1088 |          { s->origPtr = i; break; };
1089 | 
1090 |    AssertH( s->origPtr != -1, 1003 );
1091 | }
1092 | 
1093 | 
1094 | /*-------------------------------------------------------------*/
1095 | /*--- end                                       blocksort.c ---*/
1096 | /*-------------------------------------------------------------*/
1097 | 


--------------------------------------------------------------------------------
/bzip2recover.c:
--------------------------------------------------------------------------------
  1 | /*-----------------------------------------------------------*/
  2 | /*--- Block recoverer program for bzip2                   ---*/
  3 | /*---                                      bzip2recover.c ---*/
  4 | /*-----------------------------------------------------------*/
  5 | 
  6 | /* ------------------------------------------------------------------
  7 |    This file is part of bzip2/libbzip2, a program and library for
  8 |    lossless, block-sorting data compression.
  9 | 
 10 |    bzip2/libbzip2 version 1.0.6 of 6 September 2010
 11 |    Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
 12 | 
 13 |    Please read the WARNING, DISCLAIMER and PATENTS sections in the 
 14 |    README file.
 15 | 
 16 |    This program is released under the terms of the license contained
 17 |    in the file LICENSE.
 18 |    ------------------------------------------------------------------ */
 19 | 
 20 | /* This program is a complete hack and should be rewritten properly.
 21 | 	 It isn't very complicated. */
 22 | 
 23 | #include <stdio.h>
 24 | #include <errno.h>
 25 | #include <stdlib.h>
 26 | #include <string.h>
 27 | 
 28 | 
 29 | /* This program records bit locations in the file to be recovered.
 30 |    That means that if 64-bit ints are not supported, we will not
 31 |    be able to recover .bz2 files over 512MB (2^32 bits) long.
 32 |    On GNU supported platforms, we take advantage of the 64-bit
 33 |    int support to circumvent this problem.  Ditto MSVC.
 34 | 
 35 |    This change occurred in version 1.0.2; all prior versions have
 36 |    the 512MB limitation.
 37 | */
 38 | #ifdef __GNUC__
 39 |    typedef  unsigned long long int  MaybeUInt64;
 40 | #  define MaybeUInt64_FMT "%Lu"
 41 | #else
 42 | #ifdef _MSC_VER
 43 |    typedef  unsigned __int64  MaybeUInt64;
 44 | #  define MaybeUInt64_FMT "%I64u"
 45 | #else
 46 |    typedef  unsigned int   MaybeUInt64;
 47 | #  define MaybeUInt64_FMT "%u"
 48 | #endif
 49 | #endif
 50 | 
 51 | typedef  unsigned int   UInt32;
 52 | typedef  int            Int32;
 53 | typedef  unsigned char  UChar;
 54 | typedef  char           Char;
 55 | typedef  unsigned char  Bool;
 56 | #define True    ((Bool)1)
 57 | #define False   ((Bool)0)
 58 | 
 59 | 
 60 | #define BZ_MAX_FILENAME 2000
 61 | 
 62 | Char inFileName[BZ_MAX_FILENAME];
 63 | Char outFileName[BZ_MAX_FILENAME];
 64 | Char progName[BZ_MAX_FILENAME];
 65 | 
 66 | MaybeUInt64 bytesOut = 0;
 67 | MaybeUInt64 bytesIn  = 0;
 68 | 
 69 | 
 70 | /*---------------------------------------------------*/
 71 | /*--- Header bytes                                ---*/
 72 | /*---------------------------------------------------*/
 73 | 
 74 | #define BZ_HDR_B 0x42                         /* 'B' */
 75 | #define BZ_HDR_Z 0x5a                         /* 'Z' */
 76 | #define BZ_HDR_h 0x68                         /* 'h' */
 77 | #define BZ_HDR_0 0x30                         /* '0' */
 78 |  
 79 | 
 80 | /*---------------------------------------------------*/
 81 | /*--- I/O errors                                  ---*/
 82 | /*---------------------------------------------------*/
 83 | 
 84 | /*---------------------------------------------*/
 85 | static void readError ( void )
 86 | {
 87 |    fprintf ( stderr,
 88 |              "%s: I/O error reading `%s', possible reason follows.\n",
 89 |             progName, inFileName );
 90 |    perror ( progName );
 91 |    fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
 92 |              progName );
 93 |    exit ( 1 );
 94 | }
 95 | 
 96 | 
 97 | /*---------------------------------------------*/
 98 | static void writeError ( void )
 99 | {
100 |    fprintf ( stderr,
101 |              "%s: I/O error reading `%s', possible reason follows.\n",
102 |             progName, inFileName );
103 |    perror ( progName );
104 |    fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
105 |              progName );
106 |    exit ( 1 );
107 | }
108 | 
109 | 
110 | /*---------------------------------------------*/
111 | static void mallocFail ( Int32 n )
112 | {
113 |    fprintf ( stderr,
114 |              "%s: malloc failed on request for %d bytes.\n",
115 |             progName, n );
116 |    fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
117 |              progName );
118 |    exit ( 1 );
119 | }
120 | 
121 | 
122 | /*---------------------------------------------*/
123 | static void tooManyBlocks ( Int32 max_handled_blocks )
124 | {
125 |    fprintf ( stderr,
126 |              "%s: `%s' appears to contain more than %d blocks\n",
127 |             progName, inFileName, max_handled_blocks );
128 |    fprintf ( stderr,
129 |              "%s: and cannot be handled.  To fix, increase\n",
130 |              progName );
131 |    fprintf ( stderr, 
132 |              "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n",
133 |              progName );
134 |    exit ( 1 );
135 | }
136 | 
137 | 
138 | 
139 | /*---------------------------------------------------*/
140 | /*--- Bit stream I/O                              ---*/
141 | /*---------------------------------------------------*/
142 | 
143 | typedef
144 |    struct {
145 |       FILE*  handle;
146 |       Int32  buffer;
147 |       Int32  buffLive;
148 |       Char   mode;
149 |    }
150 |    BitStream;
151 | 
152 | 
153 | /*---------------------------------------------*/
154 | static BitStream* bsOpenReadStream ( FILE* stream )
155 | {
156 |    BitStream *bs = (BitStream *)malloc ( sizeof(BitStream) );
157 |    if (bs == NULL) mallocFail ( sizeof(BitStream) );
158 |    bs->handle = stream;
159 |    bs->buffer = 0;
160 |    bs->buffLive = 0;
161 |    bs->mode = 'r';
162 |    return bs;
163 | }
164 | 
165 | 
166 | /*---------------------------------------------*/
167 | static BitStream* bsOpenWriteStream ( FILE* stream )
168 | {
169 |    BitStream *bs = (BitStream *)malloc ( sizeof(BitStream) );
170 |    if (bs == NULL) mallocFail ( sizeof(BitStream) );
171 |    bs->handle = stream;
172 |    bs->buffer = 0;
173 |    bs->buffLive = 0;
174 |    bs->mode = 'w';
175 |    return bs;
176 | }
177 | 
178 | 
179 | /*---------------------------------------------*/
180 | static void bsPutBit ( BitStream* bs, Int32 bit )
181 | {
182 |    if (bs->buffLive == 8) {
183 |       Int32 retVal = putc ( (UChar) bs->buffer, bs->handle );
184 |       if (retVal == EOF) writeError();
185 |       bytesOut++;
186 |       bs->buffLive = 1;
187 |       bs->buffer = bit & 0x1;
188 |    } else {
189 |       bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) );
190 |       bs->buffLive++;
191 |    };
192 | }
193 | 
194 | 
195 | /*---------------------------------------------*/
196 | /*--
197 |    Returns 0 or 1, or 2 to indicate EOF.
198 | --*/
199 | static Int32 bsGetBit ( BitStream* bs )
200 | {
201 |    if (bs->buffLive > 0) {
202 |       bs->buffLive --;
203 |       return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 );
204 |    } else {
205 |       Int32 retVal = getc ( bs->handle );
206 |       if ( retVal == EOF ) {
207 |          if (errno != 0) readError();
208 |          return 2;
209 |       }
210 |       bs->buffLive = 7;
211 |       bs->buffer = retVal;
212 |       return ( ((bs->buffer) >> 7) & 0x1 );
213 |    }
214 | }
215 | 
216 | 
217 | /*---------------------------------------------*/
218 | static void bsClose ( BitStream* bs )
219 | {
220 |    Int32 retVal;
221 | 
222 |    if ( bs->mode == 'w' ) {
223 |       while ( bs->buffLive < 8 ) {
224 |          bs->buffLive++;
225 |          bs->buffer <<= 1;
226 |       };
227 |       retVal = putc ( (UChar) (bs->buffer), bs->handle );
228 |       if (retVal == EOF) writeError();
229 |       bytesOut++;
230 |       retVal = fflush ( bs->handle );
231 |       if (retVal == EOF) writeError();
232 |    }
233 |    retVal = fclose ( bs->handle );
234 |    if (retVal == EOF) {
235 |       if (bs->mode == 'w') writeError(); else readError();
236 |    }
237 |    free ( bs );
238 | }
239 | 
240 | 
241 | /*---------------------------------------------*/
242 | static void bsPutUChar ( BitStream* bs, UChar c )
243 | {
244 |    Int32 i;
245 |    for (i = 7; i >= 0; i--)
246 |       bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 );
247 | }
248 | 
249 | 
250 | /*---------------------------------------------*/
251 | static void bsPutUInt32 ( BitStream* bs, UInt32 c )
252 | {
253 |    Int32 i;
254 | 
255 |    for (i = 31; i >= 0; i--)
256 |       bsPutBit ( bs, (c >> i) & 0x1 );
257 | }
258 | 
259 | 
260 | /*---------------------------------------------*/
261 | static Bool endsInBz2 ( Char* name )
262 | {
263 |    Int32 n = strlen ( name );
264 |    if (n <= 4) return False;
265 |    return
266 |       (name[n-4] == '.' &&
267 |        name[n-3] == 'b' &&
268 |        name[n-2] == 'z' &&
269 |        name[n-1] == '2');
270 | }
271 | 
272 | 
273 | /*---------------------------------------------------*/
274 | /*---                                             ---*/
275 | /*---------------------------------------------------*/
276 | 
277 | /* This logic isn't really right when it comes to Cygwin. */
278 | #ifdef _WIN32
279 | #  define  BZ_SPLIT_SYM  '\\'  /* path splitter on Windows platform */
280 | #else
281 | #  define  BZ_SPLIT_SYM  '/'   /* path splitter on Unix platform */
282 | #endif
283 | 
284 | #define BLOCK_HEADER_HI  0x00003141UL
285 | #define BLOCK_HEADER_LO  0x59265359UL
286 | 
287 | #define BLOCK_ENDMARK_HI 0x00001772UL
288 | #define BLOCK_ENDMARK_LO 0x45385090UL
289 | 
290 | /* Increase if necessary.  However, a .bz2 file with > 50000 blocks
291 |    would have an uncompressed size of at least 40GB, so the chances
292 |    are low you'll need to up this.
293 | */
294 | #define BZ_MAX_HANDLED_BLOCKS 50000
295 | 
296 | MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS];
297 | MaybeUInt64 bEnd   [BZ_MAX_HANDLED_BLOCKS];
298 | MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS];
299 | MaybeUInt64 rbEnd  [BZ_MAX_HANDLED_BLOCKS];
300 | 
301 | Int32 main ( Int32 argc, Char** argv )
302 | {
303 |    FILE*       inFile;
304 |    FILE*       outFile;
305 |    BitStream*  bsIn, *bsWr;
306 |    Int32       b, wrBlock, currBlock, rbCtr;
307 |    MaybeUInt64 bitsRead;
308 | 
309 |    UInt32      buffHi, buffLo, blockCRC;
310 |    Char*       p;
311 | 
312 |    strcpy ( progName, argv[0] );
313 |    inFileName[0] = outFileName[0] = 0;
314 | 
315 |    fprintf ( stderr, 
316 |              "bzip2recover 1.0.6: extracts blocks from damaged .bz2 files.\n" );
317 | 
318 |    if (argc != 2) {
319 |       fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
320 |                         progName, progName );
321 |       switch (sizeof(MaybeUInt64)) {
322 |          case 8:
323 |             fprintf(stderr, 
324 |                     "\trestrictions on size of recovered file: None\n");
325 |             break;
326 |          case 4:
327 |             fprintf(stderr, 
328 |                     "\trestrictions on size of recovered file: 512 MB\n");
329 |             fprintf(stderr, 
330 |                     "\tto circumvent, recompile with MaybeUInt64 as an\n"
331 |                     "\tunsigned 64-bit int.\n");
332 |             break;
333 |          default:
334 |             fprintf(stderr, 
335 |                     "\tsizeof(MaybeUInt64) is not 4 or 8 -- "
336 |                     "configuration error.\n");
337 |             break;
338 |       }
339 |       exit(1);
340 |    }
341 | 
342 |    if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) {
343 |       fprintf ( stderr, 
344 |                 "%s: supplied filename is suspiciously (>= %d chars) long.  Bye!\n",
345 |                 progName, (int)strlen(argv[1]) );
346 |       exit(1);
347 |    }
348 | 
349 |    strcpy ( inFileName, argv[1] );
350 | 
351 |    inFile = fopen ( inFileName, "rb" );
352 |    if (inFile == NULL) {
353 |       fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName );
354 |       exit(1);
355 |    }
356 | 
357 |    bsIn = bsOpenReadStream ( inFile );
358 |    fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );
359 | 
360 |    bitsRead = 0;
361 |    buffHi = buffLo = 0;
362 |    currBlock = 0;
363 |    bStart[currBlock] = 0;
364 | 
365 |    rbCtr = 0;
366 | 
367 |    while (True) {
368 |       b = bsGetBit ( bsIn );
369 |       bitsRead++;
370 |       if (b == 2) {
371 |          if (bitsRead >= bStart[currBlock] &&
372 |             (bitsRead - bStart[currBlock]) >= 40) {
373 |             bEnd[currBlock] = bitsRead-1;
374 |             if (currBlock > 0)
375 |                fprintf ( stderr, "   block %d runs from " MaybeUInt64_FMT 
376 |                                  " to " MaybeUInt64_FMT " (incomplete)\n",
377 |                          currBlock,  bStart[currBlock], bEnd[currBlock] );
378 |          } else
379 |             currBlock--;
380 |          break;
381 |       }
382 |       buffHi = (buffHi << 1) | (buffLo >> 31);
383 |       buffLo = (buffLo << 1) | (b & 1);
384 |       if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI 
385 |              && buffLo == BLOCK_HEADER_LO)
386 |            || 
387 |            ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI 
388 |              && buffLo == BLOCK_ENDMARK_LO)
389 |          ) {
390 |          if (bitsRead > 49) {
391 |             bEnd[currBlock] = bitsRead-49;
392 |          } else {
393 |             bEnd[currBlock] = 0;
394 |          }
395 |          if (currBlock > 0 &&
396 | 	     (bEnd[currBlock] - bStart[currBlock]) >= 130) {
397 |             fprintf ( stderr, "   block %d runs from " MaybeUInt64_FMT 
398 |                               " to " MaybeUInt64_FMT "\n",
399 |                       rbCtr+1,  bStart[currBlock], bEnd[currBlock] );
400 |             rbStart[rbCtr] = bStart[currBlock];
401 |             rbEnd[rbCtr] = bEnd[currBlock];
402 |             rbCtr++;
403 |          }
404 |          if (currBlock >= BZ_MAX_HANDLED_BLOCKS)
405 |             tooManyBlocks(BZ_MAX_HANDLED_BLOCKS);
406 |          currBlock++;
407 | 
408 |          bStart[currBlock] = bitsRead;
409 |       }
410 |    }
411 | 
412 |    bsClose ( bsIn );
413 | 
414 |    /*-- identified blocks run from 1 to rbCtr inclusive. --*/
415 | 
416 |    if (rbCtr < 1) {
417 |       fprintf ( stderr,
418 |                 "%s: sorry, I couldn't find any block boundaries.\n",
419 |                 progName );
420 |       exit(1);
421 |    };
422 | 
423 |    fprintf ( stderr, "%s: splitting into blocks\n", progName );
424 | 
425 |    inFile = fopen ( inFileName, "rb" );
426 |    if (inFile == NULL) {
427 |       fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName );
428 |       exit(1);
429 |    }
430 |    bsIn = bsOpenReadStream ( inFile );
431 | 
432 |    /*-- placate gcc's dataflow analyser --*/
433 |    blockCRC = 0; bsWr = 0;
434 | 
435 |    bitsRead = 0;
436 |    outFile = NULL;
437 |    wrBlock = 0;
438 |    while (True) {
439 |       b = bsGetBit(bsIn);
440 |       if (b == 2) break;
441 |       buffHi = (buffHi << 1) | (buffLo >> 31);
442 |       buffLo = (buffLo << 1) | (b & 1);
443 |       if (bitsRead == 47+rbStart[wrBlock]) 
444 |          blockCRC = (buffHi << 16) | (buffLo >> 16);
445 | 
446 |       if (outFile != NULL && bitsRead >= rbStart[wrBlock]
447 |                           && bitsRead <= rbEnd[wrBlock]) {
448 |          bsPutBit ( bsWr, b );
449 |       }
450 | 
451 |       bitsRead++;
452 | 
453 |       if (bitsRead == rbEnd[wrBlock]+1) {
454 |          if (outFile != NULL) {
455 |             bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 );
456 |             bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 );
457 |             bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 );
458 |             bsPutUInt32 ( bsWr, blockCRC );
459 |             bsClose ( bsWr );
460 |          }
461 |          if (wrBlock >= rbCtr) break;
462 |          wrBlock++;
463 |       } else
464 |       if (bitsRead == rbStart[wrBlock]) {
465 |          /* Create the output file name, correctly handling leading paths. 
466 |             (31.10.2001 by Sergey E. Kusikov) */
467 |          Char* split;
468 |          Int32 ofs, k;
469 |          for (k = 0; k < BZ_MAX_FILENAME; k++) 
470 |             outFileName[k] = 0;
471 |          strcpy (outFileName, inFileName);
472 |          split = strrchr (outFileName, BZ_SPLIT_SYM);
473 |          if (split == NULL) {
474 |             split = outFileName;
475 |          } else {
476 |             ++split;
477 | 	 }
478 | 	 /* Now split points to the start of the basename. */
479 |          ofs  = split - outFileName;
480 |          sprintf (split, "rec%5d", wrBlock+1);
481 |          for (p = split; *p != 0; p++) if (*p == ' ') *p = '0';
482 |          strcat (outFileName, inFileName + ofs);
483 | 
484 |          if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
485 | 
486 |          fprintf ( stderr, "   writing block %d to `%s' ...\n",
487 |                            wrBlock+1, outFileName );
488 | 
489 |          outFile = fopen ( outFileName, "wb" );
490 |          if (outFile == NULL) {
491 |             fprintf ( stderr, "%s: can't write `%s'\n",
492 |                       progName, outFileName );
493 |             exit(1);
494 |          }
495 |          bsWr = bsOpenWriteStream ( outFile );
496 |          bsPutUChar ( bsWr, BZ_HDR_B );    
497 |          bsPutUChar ( bsWr, BZ_HDR_Z );    
498 |          bsPutUChar ( bsWr, BZ_HDR_h );    
499 |          bsPutUChar ( bsWr, BZ_HDR_0 + 9 );
500 |          bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 );
501 |          bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 );
502 |          bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 );
503 |       }
504 |    }
505 | 
506 |    fprintf ( stderr, "%s: finished\n", progName );
507 |    return 0;
508 | }
509 | 
510 | 
511 | 
512 | /*-----------------------------------------------------------*/
513 | /*--- end                                  bzip2recover.c ---*/
514 | /*-----------------------------------------------------------*/
515 | 


--------------------------------------------------------------------------------
/bzlib.h:
--------------------------------------------------------------------------------
  1 | 
  2 | /*-------------------------------------------------------------*/
  3 | /*--- Public header file for the library.                   ---*/
  4 | /*---                                               bzlib.h ---*/
  5 | /*-------------------------------------------------------------*/
  6 | 
  7 | /* ------------------------------------------------------------------
  8 |    This file is part of bzip2/libbzip2, a program and library for
  9 |    lossless, block-sorting data compression.
 10 | 
 11 |    bzip2/libbzip2 version 1.0.6 of 6 September 2010
 12 |    Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
 13 | 
 14 |    Please read the WARNING, DISCLAIMER and PATENTS sections in the 
 15 |    README file.
 16 | 
 17 |    This program is released under the terms of the license contained
 18 |    in the file LICENSE.
 19 |    ------------------------------------------------------------------ */
 20 | 
 21 | 
 22 | #ifndef _BZLIB_H
 23 | #define _BZLIB_H
 24 | 
 25 | #ifdef __cplusplus
 26 | extern "C" {
 27 | #endif
 28 | 
 29 | #define BZ_RUN               0
 30 | #define BZ_FLUSH             1
 31 | #define BZ_FINISH            2
 32 | 
 33 | #define BZ_OK                0
 34 | #define BZ_RUN_OK            1
 35 | #define BZ_FLUSH_OK          2
 36 | #define BZ_FINISH_OK         3
 37 | #define BZ_STREAM_END        4
 38 | #define BZ_MAX_STATE_COUNT 500
 39 | #define BZ_SEQUENCE_ERROR    (-1)
 40 | #define BZ_PARAM_ERROR       (-2)
 41 | #define BZ_MEM_ERROR         (-3)
 42 | #define BZ_DATA_ERROR        (-4)
 43 | #define BZ_DATA_ERROR_MAGIC  (-5)
 44 | #define BZ_IO_ERROR          (-6)
 45 | #define BZ_UNEXPECTED_EOF    (-7)
 46 | #define BZ_OUTBUFF_FULL      (-8)
 47 | #define BZ_CONFIG_ERROR      (-9)
 48 | //#define PRINT_DEBUG
 49 | #define TIME_DEBUG
 50 | 
 51 | typedef 
 52 |    struct {
 53 |       char *next_in;
 54 |       unsigned int avail_in;
 55 |       unsigned int total_in_lo32;
 56 |       unsigned int total_in_hi32;
 57 | 
 58 |       char *next_out;
 59 |       unsigned int avail_out;
 60 |       unsigned int total_out_lo32;
 61 |       unsigned int total_out_hi32;
 62 | 
 63 |       void *state[BZ_MAX_STATE_COUNT];
 64 |       unsigned int state_fill_count;
 65 |       int block_sort_count;
 66 |       FILE *handle;
 67 | 
 68 |       void *(*bzalloc)(void *,int,int);
 69 |       void (*bzfree)(void *,void *);
 70 |       void *opaque;
 71 |    } 
 72 |    bz_stream;
 73 | 
 74 | 
 75 | #ifndef BZ_IMPORT
 76 | #define BZ_EXPORT
 77 | #endif
 78 | 
 79 | #ifndef BZ_NO_STDIO
 80 | /* Need a definitition for FILE */
 81 | #include <stdio.h>
 82 | #endif
 83 | 
 84 | #ifdef _WIN32
 85 | #   include <windows.h>
 86 | #   ifdef small
 87 |       /* windows.h define small to char */
 88 | #      undef small
 89 | #   endif
 90 | #   ifdef BZ_EXPORT
 91 | #   define BZ_API(func) WINAPI func
 92 | #   define BZ_EXTERN extern
 93 | #   else
 94 |    /* import windows dll dynamically */
 95 | #   define BZ_API(func) (WINAPI * func)
 96 | #   define BZ_EXTERN
 97 | #   endif
 98 | #else
 99 | #   define BZ_API(func) func
100 | #   define BZ_EXTERN extern
101 | #endif
102 | 
103 | 
104 | /*-- Core (low-level) library functions --*/
105 | 
106 | BZ_EXTERN int BZ_API(BZ2_bzCompressInit) ( 
107 |       bz_stream* strm, 
108 |       int        blockSize100k, 
109 |       int        verbosity, 
110 |       int        workFactor,
111 |       int        numThreads
112 |    );
113 | 
114 | BZ_EXTERN int BZ_API(BZ2_bzCompress) ( 
115 |       bz_stream* strm, 
116 |       int action 
117 |    );
118 | 
119 | BZ_EXTERN int BZ_API(BZ2_bzCompressEnd) ( 
120 |       bz_stream* strm 
121 |    );
122 | 
123 | BZ_EXTERN int BZ_API(BZ2_bzDecompressInit) ( 
124 |       bz_stream *strm, 
125 |       int       verbosity, 
126 |       int       small
127 |    );
128 | 
129 | BZ_EXTERN int BZ_API(BZ2_bzDecompress) ( 
130 |       bz_stream* strm 
131 |    );
132 | 
133 | BZ_EXTERN int BZ_API(BZ2_bzDecompressEnd) ( 
134 |       bz_stream *strm 
135 |    );
136 | 
137 | void transfer_state_information (
138 |       bz_stream *strm
139 |    );
140 | 
141 | int allocate_new_block_in_stream ( 
142 |       bz_stream *strm 
143 |    );
144 | 
145 | /*-- High(er) level library functions --*/
146 | 
147 | #ifndef BZ_NO_STDIO
148 | #define BZ_MAX_UNUSED 5000
149 | 
150 | typedef void BZFILE;
151 | 
152 | BZ_EXTERN BZFILE* BZ_API(BZ2_bzReadOpen) ( 
153 |       int*  bzerror,   
154 |       FILE* f, 
155 |       int   verbosity, 
156 |       int   small,
157 |       void* unused,    
158 |       int   nUnused 
159 |    );
160 | 
161 | BZ_EXTERN void BZ_API(BZ2_bzReadClose) ( 
162 |       int*    bzerror, 
163 |       BZFILE* b 
164 |    );
165 | 
166 | BZ_EXTERN void BZ_API(BZ2_bzReadGetUnused) ( 
167 |       int*    bzerror, 
168 |       BZFILE* b, 
169 |       void**  unused,  
170 |       int*    nUnused 
171 |    );
172 | 
173 | BZ_EXTERN int BZ_API(BZ2_bzRead) ( 
174 |       int*    bzerror, 
175 |       BZFILE* b, 
176 |       void*   buf, 
177 |       int     len 
178 |    );
179 | 
180 | BZ_EXTERN BZFILE* BZ_API(BZ2_bzWriteOpen) ( 
181 |       int*  bzerror,      
182 |       FILE* f, 
183 |       int   blockSize100k, 
184 |       int   verbosity, 
185 |       int   workFactor,
186 |       int   numThreads
187 |    );
188 | 
189 | BZ_EXTERN void BZ_API(BZ2_bzWrite) ( 
190 |       int*    bzerror, 
191 |       BZFILE* b, 
192 |       void*   buf, 
193 |       int     len 
194 |    );
195 | 
196 | BZ_EXTERN void BZ_API(BZ2_bzWriteClose) ( 
197 |       int*          bzerror, 
198 |       BZFILE*       b, 
199 |       int           abandon, 
200 |       unsigned int* nbytes_in, 
201 |       unsigned int* nbytes_out 
202 |    );
203 | 
204 | BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) ( 
205 |       int*          bzerror, 
206 |       BZFILE*       b, 
207 |       int           abandon, 
208 |       unsigned int* nbytes_in_lo32, 
209 |       unsigned int* nbytes_in_hi32, 
210 |       unsigned int* nbytes_out_lo32, 
211 |       unsigned int* nbytes_out_hi32
212 |    );
213 | #endif
214 | 
215 | 
216 | /*-- Utility functions --*/
217 | 
218 | BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffCompress) ( 
219 |       char*         dest, 
220 |       unsigned int* destLen,
221 |       char*         source, 
222 |       unsigned int  sourceLen,
223 |       int           blockSize100k, 
224 |       int           verbosity, 
225 |       int           workFactor 
226 |    );
227 | 
228 | BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffDecompress) ( 
229 |       char*         dest, 
230 |       unsigned int* destLen,
231 |       char*         source, 
232 |       unsigned int  sourceLen,
233 |       int           small, 
234 |       int           verbosity 
235 |    );
236 | 
237 | 
238 | /*--
239 |    Code contributed by Yoshioka Tsuneo (tsuneo@rr.iij4u.or.jp)
240 |    to support better zlib compatibility.
241 |    This code is not _officially_ part of libbzip2 (yet);
242 |    I haven't tested it, documented it, or considered the
243 |    threading-safeness of it.
244 |    If this code breaks, please contact both Yoshioka and me.
245 | --*/
246 | 
247 | BZ_EXTERN const char * BZ_API(BZ2_bzlibVersion) (
248 |       void
249 |    );
250 | 
251 | #ifndef BZ_NO_STDIO
252 | BZ_EXTERN BZFILE * BZ_API(BZ2_bzopen) (
253 |       const char *path,
254 |       const char *mode
255 |    );
256 | 
257 | BZ_EXTERN BZFILE * BZ_API(BZ2_bzdopen) (
258 |       int        fd,
259 |       const char *mode
260 |    );
261 |          
262 | BZ_EXTERN int BZ_API(BZ2_bzread) (
263 |       BZFILE* b, 
264 |       void* buf, 
265 |       int len 
266 |    );
267 | 
268 | BZ_EXTERN int BZ_API(BZ2_bzwrite) (
269 |       BZFILE* b, 
270 |       void*   buf, 
271 |       int     len 
272 |    );
273 | 
274 | BZ_EXTERN int BZ_API(BZ2_bzflush) (
275 |       BZFILE* b
276 |    );
277 | 
278 | BZ_EXTERN void BZ_API(BZ2_bzclose) (
279 |       BZFILE* b
280 |    );
281 | 
282 | BZ_EXTERN const char * BZ_API(BZ2_bzerror) (
283 |       BZFILE *b, 
284 |       int    *errnum
285 |    );
286 | #endif
287 | 
288 | #ifdef __cplusplus
289 | }
290 | #endif
291 | 
292 | #endif
293 | 
294 | /*-------------------------------------------------------------*/
295 | /*--- end                                           bzlib.h ---*/
296 | /*-------------------------------------------------------------*/
297 | 


--------------------------------------------------------------------------------
/bzlib_private.h:
--------------------------------------------------------------------------------
  1 | 
  2 | /*-------------------------------------------------------------*/
  3 | /*--- Private header file for the library.                  ---*/
  4 | /*---                                       bzlib_private.h ---*/
  5 | /*-------------------------------------------------------------*/
  6 | 
  7 | /* ------------------------------------------------------------------
  8 |    This file is part of bzip2/libbzip2, a program and library for
  9 |    lossless, block-sorting data compression.
 10 | 
 11 |    bzip2/libbzip2 version 1.0.6 of 6 September 2010
 12 |    Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
 13 | 
 14 |    Please read the WARNING, DISCLAIMER and PATENTS sections in the 
 15 |    README file.
 16 | 
 17 |    This program is released under the terms of the license contained
 18 |    in the file LICENSE.
 19 |    ------------------------------------------------------------------ */
 20 | 
 21 | 
 22 | #ifndef _BZLIB_PRIVATE_H
 23 | #define _BZLIB_PRIVATE_H
 24 | 
 25 | #include <stdlib.h>
 26 | #include<time.h>
 27 | 
 28 | #ifndef BZ_NO_STDIO
 29 | #include <stdio.h>
 30 | #include <ctype.h>
 31 | #include <string.h>
 32 | #endif
 33 | 
 34 | #include "bzlib.h"
 35 | 
 36 | 
 37 | 
 38 | /*-- General stuff. --*/
 39 | 
 40 | #define BZ_VERSION  "1.0.6, 6-Sept-2010"
 41 | 
 42 | typedef char            Char;
 43 | typedef unsigned char   Bool;
 44 | typedef unsigned char   UChar;
 45 | typedef int             Int32;
 46 | typedef unsigned int    UInt32;
 47 | typedef short           Int16;
 48 | typedef unsigned short  UInt16;
 49 | 
 50 | #define True  ((Bool)1)
 51 | #define False ((Bool)0)
 52 | 
 53 | #ifndef __GNUC__
 54 | #define __inline__  /* */
 55 | #endif 
 56 | 
 57 | #ifndef BZ_NO_STDIO
 58 | 
 59 | extern void BZ2_bz__AssertH__fail ( int errcode );
 60 | #define AssertH(cond,errcode) \
 61 |    { if (!(cond)) BZ2_bz__AssertH__fail ( errcode ); }
 62 | 
 63 | #if BZ_DEBUG
 64 | #define AssertD(cond,msg) \
 65 |    { if (!(cond)) {       \
 66 |       fprintf ( stderr,   \
 67 |         "\n\nlibbzip2(debug build): internal error\n\t%s\n", msg );\
 68 |       exit(1); \
 69 |    }}
 70 | #else
 71 | #define AssertD(cond,msg) /* */
 72 | #endif
 73 | 
 74 | #define VPrintf0(zf) \
 75 |    fprintf(stderr,zf)
 76 | #define VPrintf1(zf,za1) \
 77 |    fprintf(stderr,zf,za1)
 78 | #define VPrintf2(zf,za1,za2) \
 79 |    fprintf(stderr,zf,za1,za2)
 80 | #define VPrintf3(zf,za1,za2,za3) \
 81 |    fprintf(stderr,zf,za1,za2,za3)
 82 | #define VPrintf4(zf,za1,za2,za3,za4) \
 83 |    fprintf(stderr,zf,za1,za2,za3,za4)
 84 | #define VPrintf5(zf,za1,za2,za3,za4,za5) \
 85 |    fprintf(stderr,zf,za1,za2,za3,za4,za5)
 86 | 
 87 | #else
 88 | 
 89 | extern void bz_internal_error ( int errcode );
 90 | #define AssertH(cond,errcode) \
 91 |    { if (!(cond)) bz_internal_error ( errcode ); }
 92 | #define AssertD(cond,msg)                do { } while (0)
 93 | #define VPrintf0(zf)                     do { } while (0)
 94 | #define VPrintf1(zf,za1)                 do { } while (0)
 95 | #define VPrintf2(zf,za1,za2)             do { } while (0)
 96 | #define VPrintf3(zf,za1,za2,za3)         do { } while (0)
 97 | #define VPrintf4(zf,za1,za2,za3,za4)     do { } while (0)
 98 | #define VPrintf5(zf,za1,za2,za3,za4,za5) do { } while (0)
 99 | 
100 | #endif
101 | 
102 | 
103 | #define BZALLOC(nnn) (strm->bzalloc)(strm->opaque,(nnn),1)
104 | #define BZFREE(ppp)  (strm->bzfree)(strm->opaque,(ppp))
105 | 
106 | /*-- Header bytes. --*/
107 | 
108 | #define BZ_HDR_B 0x42   /* 'B' */
109 | #define BZ_HDR_Z 0x5a   /* 'Z' */
110 | #define BZ_HDR_h 0x68   /* 'h' */
111 | #define BZ_HDR_0 0x30   /* '0' */
112 |   
113 | /*-- Constants for the back end. --*/
114 | 
115 | #define BZ_MAX_ALPHA_SIZE 258
116 | #define BZ_MAX_CODE_LEN    23
117 | 
118 | #define BZ_RUNA 0
119 | #define BZ_RUNB 1
120 | 
121 | #define BZ_N_GROUPS 6
122 | #define BZ_G_SIZE   50
123 | #define BZ_N_ITERS  4
124 | 
125 | #define BZ_MAX_SELECTORS (2 + ((15000000) / BZ_G_SIZE))
126 | // changed @aditya
127 | 
128 | 
129 | 
130 | /*-- Stuff for randomising repetitive blocks. --*/
131 | 
132 | extern Int32 BZ2_rNums[512];
133 | 
134 | #define BZ_RAND_DECLS                          \
135 |    Int32 rNToGo;                               \
136 |    Int32 rTPos                                 \
137 | 
138 | #define BZ_RAND_INIT_MASK                      \
139 |    s->rNToGo = 0;                              \
140 |    s->rTPos  = 0                               \
141 | 
142 | #define BZ_RAND_MASK ((s->rNToGo == 1) ? 1 : 0)
143 | 
144 | #define BZ_RAND_UPD_MASK                       \
145 |    if (s->rNToGo == 0) {                       \
146 |       s->rNToGo = BZ2_rNums[s->rTPos];         \
147 |       s->rTPos++;                              \
148 |       if (s->rTPos == 512) s->rTPos = 0;       \
149 |    }                                           \
150 |    s->rNToGo--;
151 | 
152 | 
153 | 
154 | /*-- Stuff for doing CRCs. --*/
155 | 
156 | extern UInt32 BZ2_crc32Table[256];
157 | 
158 | #define BZ_INITIALISE_CRC(crcVar)              \
159 | {                                              \
160 |    crcVar = 0xffffffffL;                       \
161 | }
162 | 
163 | #define BZ_FINALISE_CRC(crcVar)                \
164 | {                                              \
165 |    crcVar = ~(crcVar);                         \
166 | }
167 | 
168 | #define BZ_UPDATE_CRC(crcVar,cha)              \
169 | {                                              \
170 |    crcVar = (crcVar << 8) ^                    \
171 |             BZ2_crc32Table[(crcVar >> 24) ^    \
172 |                            ((UChar)cha)];      \
173 | }
174 | 
175 | 
176 | 
177 | /*-- States and modes for compression. --*/
178 | 
179 | #define BZ_M_IDLE      1
180 | #define BZ_M_RUNNING   2
181 | #define BZ_M_FLUSHING  3
182 | #define BZ_M_FINISHING 4
183 | 
184 | #define BZ_S_OUTPUT    1
185 | #define BZ_S_INPUT     2
186 | 
187 | #define BZ_N_RADIX 2
188 | #define BZ_N_QSORT 12
189 | #define BZ_N_SHELL 18
190 | #define BZ_N_OVERSHOOT (BZ_N_RADIX + BZ_N_QSORT + BZ_N_SHELL + 2)
191 | 
192 | 
193 | 
194 | 
195 | /*-- Structure holding all the compression-side stuff. --*/
196 | 
197 | typedef
198 |    struct {
199 |       /* pointer back to the struct bz_stream */
200 |       bz_stream* strm;
201 | 
202 |       /* mode this stream is in, and whether inputting */
203 |       /* or outputting data */
204 |       Int32    mode;
205 |       Int32    state;
206 | 
207 |       /* remembers avail_in when flush/finish requested */
208 |       UInt32   avail_in_expect;
209 | 
210 |       /* for doing the block sorting */
211 |       UInt32*  arr1;
212 |       UInt32*  arr2;
213 | 
214 |       /* added for storing two arrays to merge */
215 |       UInt32* arr1_first_sort; 
216 |       UInt32* arr1_second_sort; 
217 |       UInt32* arr1_first_sort_rank;
218 | 
219 |       /* length of the first sort problem */
220 |       Int32 first_sort_length; 
221 | 
222 |       UInt32*  ftab;
223 |       Int32    origPtr;
224 | 
225 |       /* aliases for arr1 and arr2 */
226 |       UInt32*  ptr;
227 |       UChar*   block;
228 |       UInt16*  mtfv;
229 |       UChar*   zbits;
230 | 
231 |       /* for deciding when to use the fallback sorting algorithm */
232 |       Int32    workFactor;
233 | 
234 |       /* run-length-encoding of the input */
235 |       UInt32   state_in_ch;
236 |       Int32    state_in_len;
237 |       BZ_RAND_DECLS;
238 | 
239 |       /* input and output limits and current posns */
240 |       Int32    nblock;
241 |       Int32    nblockMAX;
242 |       Int32    numZ;
243 |       Int32    state_out_pos;
244 | 
245 |       /* map of bytes used in block */
246 |       Int32    nInUse;
247 |       Bool     inUse[256];
248 |       UChar    unseqToSeq[256];
249 | 
250 |       /* the buffer for bit stream creation */
251 |       UInt32   bsBuff;
252 |       Int32    bsLive;
253 | 
254 |       /* block and combined CRCs */
255 |       UInt32   blockCRC;
256 |       UInt32   combinedCRC;
257 | 
258 |       /* misc administratium */
259 |       Int32    verbosity;
260 |       Int32    blockNo;
261 |       Int32    blockSize100k;
262 | 
263 |       /* stuff for coding the MTF values */
264 |       Int32    nMTF;
265 |       Int32    mtfFreq    [BZ_MAX_ALPHA_SIZE];
266 |       UChar    selector   [BZ_MAX_SELECTORS];
267 |       UChar    selectorMtf[BZ_MAX_SELECTORS];
268 | 
269 |       UChar    len     [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
270 |       Int32    code    [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
271 |       Int32    rfreq   [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
272 |       /* second dimension: only 3 needed; 4 makes index calculations faster */
273 |       UInt32   len_pack[BZ_MAX_ALPHA_SIZE][4];
274 | 
275 |      /* stores the BWT sorting depth */
276 |      Int32 sortingDepth;
277 | 
278 |      Int32 numThreads;
279 | 
280 |    }
281 |    EState;
282 | 
283 | 
284 | 
285 | /*-- externs for compression. --*/
286 | 
287 | extern void 
288 | BZ2_blockSort ( EState* );
289 | 
290 | extern void 
291 | BZ2_compressBlocks ( bz_stream* );
292 | 
293 | extern void 
294 | BZ2_compressBlocks_pthreads ( bz_stream* );
295 | 
296 | extern void 
297 | BZ2_compressBlocks_without_overlap ( bz_stream* );
298 | 
299 | extern void 
300 | BZ2_bsInitWrite ( EState* );
301 | 
302 | extern void 
303 | BZ2_hbAssignCodes ( Int32*, UChar*, Int32, Int32, Int32 );
304 | 
305 | extern void 
306 | BZ2_hbMakeCodeLengths ( UChar*, Int32*, Int32, Int32 );
307 | 
308 | 
309 | 
310 | /*-- states for decompression. --*/
311 | 
312 | #define BZ_X_IDLE        1
313 | #define BZ_X_OUTPUT      2
314 | 
315 | #define BZ_X_MAGIC_1     10
316 | #define BZ_X_MAGIC_2     11
317 | #define BZ_X_MAGIC_3     12
318 | #define BZ_X_MAGIC_4     13
319 | #define BZ_X_BLKHDR_1    14
320 | #define BZ_X_BLKHDR_2    15
321 | #define BZ_X_BLKHDR_3    16
322 | #define BZ_X_BLKHDR_4    17
323 | #define BZ_X_BLKHDR_5    18
324 | #define BZ_X_BLKHDR_6    19
325 | #define BZ_X_BCRC_1      20
326 | #define BZ_X_BCRC_2      21
327 | #define BZ_X_BCRC_3      22
328 | #define BZ_X_BCRC_4      23
329 | #define BZ_X_RANDBIT     24
330 | #define BZ_X_ORIGPTR_1   25
331 | #define BZ_X_ORIGPTR_2   26
332 | #define BZ_X_ORIGPTR_3   27
333 | #define BZ_X_MAPPING_1   28
334 | #define BZ_X_MAPPING_2   29
335 | #define BZ_X_SELECTOR_1  30
336 | #define BZ_X_SELECTOR_2  31
337 | #define BZ_X_SELECTOR_3  32
338 | #define BZ_X_CODING_1    33
339 | #define BZ_X_CODING_2    34
340 | #define BZ_X_CODING_3    35
341 | #define BZ_X_MTF_1       36
342 | #define BZ_X_MTF_2       37
343 | #define BZ_X_MTF_3       38
344 | #define BZ_X_MTF_4       39
345 | #define BZ_X_MTF_5       40
346 | #define BZ_X_MTF_6       41
347 | #define BZ_X_ENDHDR_2    42
348 | #define BZ_X_ENDHDR_3    43
349 | #define BZ_X_ENDHDR_4    44
350 | #define BZ_X_ENDHDR_5    45
351 | #define BZ_X_ENDHDR_6    46
352 | #define BZ_X_CCRC_1      47
353 | #define BZ_X_CCRC_2      48
354 | #define BZ_X_CCRC_3      49
355 | #define BZ_X_CCRC_4      50
356 | 
357 | 
358 | 
359 | /*-- Constants for the fast MTF decoder. --*/
360 | 
361 | #define MTFA_SIZE 4096
362 | #define MTFL_SIZE 16
363 | 
364 | 
365 | 
366 | /*-- Structure holding all the decompression-side stuff. --*/
367 | 
368 | typedef
369 |    struct {
370 |       /* pointer back to the struct bz_stream */
371 |       bz_stream* strm;
372 | 
373 |       /* state indicator for this stream */
374 |       Int32    state;
375 | 
376 |       /* for doing the final run-length decoding */
377 |       UChar    state_out_ch;
378 |       Int32    state_out_len;
379 |       Bool     blockRandomised;
380 |       BZ_RAND_DECLS;
381 | 
382 |       /* the buffer for bit stream reading */
383 |       UInt32   bsBuff;
384 |       Int32    bsLive;
385 | 
386 |       /* misc administratium */
387 |       Int32    blockSize100k;
388 |       Bool     smallDecompress;
389 |       Int32    currBlockNo;
390 |       Int32    verbosity;
391 | 
392 |       /* for undoing the Burrows-Wheeler transform */
393 |       Int32    origPtr;
394 |       UInt32   tPos;
395 |       Int32    k0;
396 |       Int32    unzftab[256];
397 |       Int32    nblock_used;
398 |       Int32    cftab[257];
399 |       Int32    cftabCopy[257];
400 | 
401 |       /* for undoing the Burrows-Wheeler transform (FAST) */
402 |       UInt32   *tt;
403 | 
404 |       /* for undoing the Burrows-Wheeler transform (SMALL) */
405 |       UInt16   *ll16;
406 |       UChar    *ll4;
407 | 
408 |       /* stored and calculated CRCs */
409 |       UInt32   storedBlockCRC;
410 |       UInt32   storedCombinedCRC;
411 |       UInt32   calculatedBlockCRC;
412 |       UInt32   calculatedCombinedCRC;
413 | 
414 |       /* map of bytes used in block */
415 |       Int32    nInUse;
416 |       Bool     inUse[256];
417 |       Bool     inUse16[16];
418 |       UChar    seqToUnseq[256];
419 | 
420 |       /* for decoding the MTF values */
421 |       UChar    mtfa   [MTFA_SIZE];
422 |       Int32    mtfbase[256 / MTFL_SIZE];
423 |       UChar    selector   [BZ_MAX_SELECTORS];
424 |       UChar    selectorMtf[BZ_MAX_SELECTORS];
425 |       UChar    len  [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
426 | 
427 |       Int32    limit  [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
428 |       Int32    base   [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
429 |       Int32    perm   [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
430 |       Int32    minLens[BZ_N_GROUPS];
431 | 
432 |       /* save area for scalars in the main decompress code */
433 |       Int32    save_i;
434 |       Int32    save_j;
435 |       Int32    save_t;
436 |       Int32    save_alphaSize;
437 |       Int32    save_nGroups;
438 |       Int32    save_nSelectors;
439 |       Int32    save_EOB;
440 |       Int32    save_groupNo;
441 |       Int32    save_groupPos;
442 |       Int32    save_nextSym;
443 |       Int32    save_nblockMAX;
444 |       Int32    save_nblock;
445 |       Int32    save_es;
446 |       Int32    save_N;
447 |       Int32    save_curr;
448 |       Int32    save_zt;
449 |       Int32    save_zn; 
450 |       Int32    save_zvec;
451 |       Int32    save_zj;
452 |       Int32    save_gSel;
453 |       Int32    save_gMinlen;
454 |       Int32*   save_gLimit;
455 |       Int32*   save_gBase;
456 |       Int32*   save_gPerm;
457 | 
458 |    }
459 |    DState;
460 | 
461 | 
462 | 
463 | /*-- Macros for decompression. --*/
464 | 
465 | #define BZ_GET_FAST(cccc)                     \
466 |     /* c_tPos is unsigned, hence test < 0 is pointless. */ \
467 |     if (s->tPos >= 100000 * (UInt32)s->blockSize100k) return True; \
468 |     s->tPos = s->tt[s->tPos];                 \
469 |     cccc = (UChar)(s->tPos & 0xff);           \
470 |     s->tPos >>= 8;
471 | 
472 | #define BZ_GET_FAST_C(cccc)                   \
473 |     /* c_tPos is unsigned, hence test < 0 is pointless. */ \
474 |     if (c_tPos >= 100000 * (UInt32)ro_blockSize100k) return True; \
475 |     c_tPos = c_tt[c_tPos];                    \
476 |     cccc = (UChar)(c_tPos & 0xff);            \
477 |     c_tPos >>= 8;
478 | 
479 | #define SET_LL4(i,n)                                          \
480 |    { if (((i) & 0x1) == 0)                                    \
481 |         s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0xf0) | (n); else    \
482 |         s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0x0f) | ((n) << 4);  \
483 |    }
484 | 
485 | #define GET_LL4(i)                             \
486 |    ((((UInt32)(s->ll4[(i) >> 1])) >> (((i) << 2) & 0x4)) & 0xF)
487 | 
488 | #define SET_LL(i,n)                          \
489 |    { s->ll16[i] = (UInt16)(n & 0x0000ffff);  \
490 |      SET_LL4(i, n >> 16);                    \
491 |    }
492 | 
493 | #define GET_LL(i) \
494 |    (((UInt32)s->ll16[i]) | (GET_LL4(i) << 16))
495 | 
496 | #define BZ_GET_SMALL(cccc)                            \
497 |     /* c_tPos is unsigned, hence test < 0 is pointless. */ \
498 |     if (s->tPos >= 100000 * (UInt32)s->blockSize100k) return True; \
499 |     cccc = BZ2_indexIntoF ( s->tPos, s->cftab );    \
500 |     s->tPos = GET_LL(s->tPos);
501 | 
502 | 
503 | /*-- externs for decompression. --*/
504 | 
505 | extern Int32 
506 | BZ2_indexIntoF ( Int32, Int32* );
507 | 
508 | extern Int32 
509 | BZ2_decompress ( DState* );
510 | 
511 | extern void 
512 | BZ2_hbCreateDecodeTables ( Int32*, Int32*, Int32*, UChar*,
513 |                            Int32,  Int32, Int32 );
514 | 
515 | 
516 | #endif
517 | 
518 | 
519 | /*-- BZ_NO_STDIO seems to make NULL disappear on some platforms. --*/
520 | 
521 | #ifdef BZ_NO_STDIO
522 | #ifndef NULL
523 | #define NULL 0
524 | #endif
525 | #endif
526 | 
527 | #ifndef BZ_GPU
528 | #define BZ_GPU
529 | extern int gpuBlockSort(UChar* block, UInt32* order, UInt32* orderFirstSort, UInt32* orderSecondSort, UInt32* orderFirstSortRank, Int32 blockSize, Int32 *sortingDepth);
530 | extern void gpuSetDevice(int devId);
531 | #endif
532 | 
533 | /*-------------------------------------------------------------*/
534 | /*--- end                                   bzlib_private.h ---*/
535 | /*-------------------------------------------------------------*/
536 | 


--------------------------------------------------------------------------------
/compress.c:
--------------------------------------------------------------------------------
   1 | 
   2 | /*-------------------------------------------------------------*/
   3 | /*--- Compression machinery (not incl block sorting)        ---*/
   4 | /*---                                            compress.c ---*/
   5 | /*-------------------------------------------------------------*/
   6 | 
   7 | /* ------------------------------------------------------------------
   8 |    This file is part of bzip2/libbzip2, a program and library for
   9 |    lossless, block-sorting data compression.
  10 | 
  11 |    bzip2/libbzip2 version 1.0.6 of 6 September 2010
  12 |    Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
  13 | 
  14 |    Please read the WARNING, DISCLAIMER and PATENTS sections in the 
  15 |    README file.
  16 | 
  17 |    This program is released under the terms of the license contained
  18 |    in the file LICENSE.
  19 |    ------------------------------------------------------------------ */
  20 | 
  21 | 
  22 | /* CHANGES
  23 |     0.9.0    -- original version.
  24 |     0.9.0a/b -- no changes in this file.
  25 |     0.9.0c   -- changed setting of nGroups in sendMTFValues() 
  26 |                 so as to do a bit better on small files
  27 | */
  28 | 
  29 | #include "bzlib_private.h"
  30 | #include<omp.h>
  31 | #include<pthread.h>
  32 | #include<semaphore.h>
  33 | 
  34 | 
  35 | /*---------------------------------------------------*/
  36 | /*--- Bit stream I/O                              ---*/
  37 | /*---------------------------------------------------*/
  38 | 
  39 | /*---------------------------------------------------*/
  40 | void BZ2_bsInitWrite ( EState* s )
  41 | {
  42 |    s->bsLive = 0;
  43 |    s->bsBuff = 0;
  44 | }
  45 | 
  46 | 
  47 | /*---------------------------------------------------*/
  48 | static
  49 | void bsFinishWrite ( EState* s )
  50 | {
  51 |    while (s->bsLive > 0) {
  52 |       s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24);
  53 |       s->numZ++;
  54 |       s->bsBuff <<= 8;
  55 |       s->bsLive -= 8;
  56 |    }
  57 | }
  58 | 
  59 | 
  60 | /*---------------------------------------------------*/
  61 | #define bsNEEDW(nz)                           \
  62 | {                                             \
  63 |    while (s->bsLive >= 8) {                   \
  64 |       s->zbits[s->numZ]                       \
  65 |          = (UChar)(s->bsBuff >> 24);          \
  66 |       s->numZ++;                              \
  67 |       s->bsBuff <<= 8;                        \
  68 |       s->bsLive -= 8;                         \
  69 |    }                                          \
  70 | }
  71 | 
  72 | 
  73 | /*---------------------------------------------------*/
  74 | static
  75 | __inline__
  76 | void bsW ( EState* s, Int32 n, UInt32 v )
  77 | {
  78 |    bsNEEDW ( n );
  79 |    s->bsBuff |= (v << (32 - s->bsLive - n));
  80 |    s->bsLive += n;
  81 | }
  82 | 
  83 | 
  84 | /*---------------------------------------------------*/
  85 | static
  86 | void bsPutUInt32 ( EState* s, UInt32 u )
  87 | {
  88 |    bsW ( s, 8, (u >> 24) & 0xffL );
  89 |    bsW ( s, 8, (u >> 16) & 0xffL );
  90 |    bsW ( s, 8, (u >>  8) & 0xffL );
  91 |    bsW ( s, 8,  u        & 0xffL );
  92 | }
  93 | 
  94 | 
  95 | /*---------------------------------------------------*/
  96 | static
  97 | void bsPutUChar ( EState* s, UChar c )
  98 | {
  99 |    bsW( s, 8, (UInt32)c );
 100 | }
 101 | 
 102 | 
 103 | /*---------------------------------------------------*/
 104 | /*--- The back end proper                         ---*/
 105 | /*---------------------------------------------------*/
 106 | 
 107 | /*---------------------------------------------------*/
 108 | static
 109 | void makeMaps_e ( EState* s )
 110 | {
 111 |    Int32 i;
 112 |    s->nInUse = 0;
 113 |    for (i = 0; i < 256; i++)
 114 |       if (s->inUse[i]) {
 115 |          s->unseqToSeq[i] = s->nInUse;
 116 |          s->nInUse++;
 117 |       }
 118 | }
 119 | 
 120 | 
 121 | /*---------------------------------------------------*/
 122 | static
 123 | void generateMTFValues ( EState* s )
 124 | {
 125 |    UChar   yy[256];
 126 |    Int32   i, j;
 127 |    Int32   zPend;
 128 |    Int32   wr;
 129 |    Int32   EOB;
 130 | 
 131 |    /* 
 132 |       After sorting (eg, here),
 133 |          s->arr1 [ 0 .. s->nblock-1 ] holds sorted order,
 134 |          and
 135 |          ((UChar*)s->arr2) [ 0 .. s->nblock-1 ] 
 136 |          holds the original block data.
 137 | 
 138 |       The first thing to do is generate the MTF values,
 139 |       and put them in
 140 |          ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ].
 141 |       Because there are strictly fewer or equal MTF values
 142 |       than block values, ptr values in this area are overwritten
 143 |       with MTF values only when they are no longer needed.
 144 | 
 145 |       The final compressed bitstream is generated into the
 146 |       area starting at
 147 |          (UChar*) (&((UChar*)s->arr2)[s->nblock])
 148 | 
 149 |       These storage aliases are set up in bzCompressInit(),
 150 |       except for the last one, which is arranged in 
 151 |       compressBlock().
 152 |    */
 153 |    UInt32* ptr   = s->ptr;
 154 |    UChar* block  = s->block;
 155 |    UInt16* mtfv  = s->mtfv;
 156 | 
 157 |    makeMaps_e ( s );
 158 |    EOB = s->nInUse+1;
 159 | 
 160 |    for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0;
 161 | 
 162 |    wr = 0;
 163 |    zPend = 0;
 164 |    for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i;
 165 | 
 166 |    for (i = 0; i < s->nblock; i++) {
 167 |       UChar ll_i;
 168 |       AssertD ( wr <= i, "generateMTFValues(1)" );
 169 |       j = ptr[i]-1; if (j < 0) j += s->nblock;
 170 |       ll_i = s->unseqToSeq[block[j]];
 171 |       AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" );
 172 | 
 173 |       if (yy[0] == ll_i) { 
 174 |          zPend++;
 175 |       } else {
 176 | 
 177 |          if (zPend > 0) {
 178 |             zPend--;
 179 |             while (True) {
 180 |                if (zPend & 1) {
 181 |                   mtfv[wr] = BZ_RUNB; wr++; 
 182 |                   s->mtfFreq[BZ_RUNB]++; 
 183 |                } else {
 184 |                   mtfv[wr] = BZ_RUNA; wr++; 
 185 |                   s->mtfFreq[BZ_RUNA]++; 
 186 |                }
 187 |                if (zPend < 2) break;
 188 |                zPend = (zPend - 2) / 2;
 189 |             };
 190 |             zPend = 0;
 191 |          }
 192 |          {
 193 |             register UChar  rtmp;
 194 |             register UChar* ryy_j;
 195 |             register UChar  rll_i;
 196 |             rtmp  = yy[1];
 197 |             yy[1] = yy[0];
 198 |             ryy_j = &(yy[1]);
 199 |             rll_i = ll_i;
 200 |             while ( rll_i != rtmp ) {
 201 |                register UChar rtmp2;
 202 |                ryy_j++;
 203 |                rtmp2  = rtmp;
 204 |                rtmp   = *ryy_j;
 205 |                *ryy_j = rtmp2;
 206 |             };
 207 |             yy[0] = rtmp;
 208 |             j = ryy_j - &(yy[0]);
 209 |             mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++;
 210 |          }
 211 | 
 212 |       }
 213 |    }
 214 | 
 215 |    if (zPend > 0) {
 216 |       zPend--;
 217 |       while (True) {
 218 |          if (zPend & 1) {
 219 |             mtfv[wr] = BZ_RUNB; wr++; 
 220 |             s->mtfFreq[BZ_RUNB]++; 
 221 |          } else {
 222 |             mtfv[wr] = BZ_RUNA; wr++; 
 223 |             s->mtfFreq[BZ_RUNA]++; 
 224 |          }
 225 |          if (zPend < 2) break;
 226 |          zPend = (zPend - 2) / 2;
 227 |       };
 228 |       zPend = 0;
 229 |    }
 230 | 
 231 |    mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++;
 232 | 
 233 |    s->nMTF = wr;
 234 | }
 235 | 
 236 | 
 237 | /*---------------------------------------------------*/
 238 | #define BZ_LESSER_ICOST  0
 239 | #define BZ_GREATER_ICOST 15
 240 | 
 241 | static
 242 | void sendMTFValues ( EState* s )
 243 | {
 244 |    Int32 v, t, i, j, gs, ge, totc, bt, bc, iter;
 245 |    Int32 nSelectors, alphaSize, minLen, maxLen, selCtr;
 246 |    Int32 nGroups, nBytes;
 247 | 
 248 |    /*--
 249 |    UChar  len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
 250 |    is a global since the decoder also needs it.
 251 | 
 252 |    Int32  code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
 253 |    Int32  rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
 254 |    are also globals only used in this proc.
 255 |    Made global to keep stack frame size small.
 256 |    --*/
 257 | 
 258 | 
 259 |    UInt16 cost[BZ_N_GROUPS];
 260 |    Int32  fave[BZ_N_GROUPS];
 261 | 
 262 |    UInt16* mtfv = s->mtfv;
 263 | 
 264 |    if (s->verbosity >= 3)
 265 |       VPrintf3( "      %d in block, %d after MTF & 1-2 coding, "
 266 |                 "%d+2 syms in use\n", 
 267 |                 s->nblock, s->nMTF, s->nInUse );
 268 | 
 269 |    alphaSize = s->nInUse+2;
 270 |    for (t = 0; t < BZ_N_GROUPS; t++)
 271 |       for (v = 0; v < alphaSize; v++)
 272 |          s->len[t][v] = BZ_GREATER_ICOST;
 273 | 
 274 |    /*--- Decide how many coding tables to use ---*/
 275 |    AssertH ( s->nMTF > 0, 3001 );
 276 |    if (s->nMTF < 200)  nGroups = 2; else
 277 |    if (s->nMTF < 600)  nGroups = 3; else
 278 |    if (s->nMTF < 1200) nGroups = 4; else
 279 |    if (s->nMTF < 2400) nGroups = 5; else
 280 |                        nGroups = 6;
 281 | 
 282 |    /*--- Generate an initial set of coding tables ---*/
 283 |    { 
 284 |       Int32 nPart, remF, tFreq, aFreq;
 285 | 
 286 |       nPart = nGroups;
 287 |       remF  = s->nMTF;
 288 |       gs = 0;
 289 |       while (nPart > 0) {
 290 |          tFreq = remF / nPart;
 291 |          ge = gs-1;
 292 |          aFreq = 0;
 293 |          while (aFreq < tFreq && ge < alphaSize-1) {
 294 |             ge++;
 295 |             aFreq += s->mtfFreq[ge];
 296 |          }
 297 | 
 298 |          if (ge > gs 
 299 |              && nPart != nGroups && nPart != 1 
 300 |              && ((nGroups-nPart) % 2 == 1)) {
 301 |             aFreq -= s->mtfFreq[ge];
 302 |             ge--;
 303 |          }
 304 | 
 305 |          if (s->verbosity >= 3)
 306 |             VPrintf5( "      initial group %d, [%d .. %d], "
 307 |                       "has %d syms (%4.1f%%)\n",
 308 |                       nPart, gs, ge, aFreq, 
 309 |                       (100.0 * (float)aFreq) / (float)(s->nMTF) );
 310 |  
 311 |          for (v = 0; v < alphaSize; v++)
 312 |             if (v >= gs && v <= ge) 
 313 |                s->len[nPart-1][v] = BZ_LESSER_ICOST; else
 314 |                s->len[nPart-1][v] = BZ_GREATER_ICOST;
 315 |  
 316 |          nPart--;
 317 |          gs = ge+1;
 318 |          remF -= aFreq;
 319 |       }
 320 |    }
 321 | 
 322 |    /*--- 
 323 |       Iterate up to BZ_N_ITERS times to improve the tables.
 324 |    ---*/
 325 |    for (iter = 0; iter < BZ_N_ITERS; iter++) {
 326 | 
 327 |       for (t = 0; t < nGroups; t++) fave[t] = 0;
 328 | 
 329 |       for (t = 0; t < nGroups; t++)
 330 |          for (v = 0; v < alphaSize; v++)
 331 |             s->rfreq[t][v] = 0;
 332 | 
 333 |       /*---
 334 |         Set up an auxiliary length table which is used to fast-track
 335 | 	the common case (nGroups == 6). 
 336 |       ---*/
 337 |       if (nGroups == 6) {
 338 |          for (v = 0; v < alphaSize; v++) {
 339 |             s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v];
 340 |             s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v];
 341 |             s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v];
 342 | 	 }
 343 |       }
 344 | 
 345 |       nSelectors = 0;
 346 |       totc = 0;
 347 |       gs = 0;
 348 |       while (True) {
 349 | 
 350 |          /*--- Set group start & end marks. --*/
 351 |          if (gs >= s->nMTF) break;
 352 |          ge = gs + BZ_G_SIZE - 1; 
 353 |          if (ge >= s->nMTF) ge = s->nMTF-1;
 354 | 
 355 |          /*-- 
 356 |             Calculate the cost of this group as coded
 357 |             by each of the coding tables.
 358 |          --*/
 359 |          for (t = 0; t < nGroups; t++) cost[t] = 0;
 360 | 
 361 |          if (nGroups == 6 && 50 == ge-gs+1) {
 362 |             /*--- fast track the common case ---*/
 363 |             register UInt32 cost01, cost23, cost45;
 364 |             register UInt16 icv;
 365 |             cost01 = cost23 = cost45 = 0;
 366 | 
 367 | #           define BZ_ITER(nn)                \
 368 |                icv = mtfv[gs+(nn)];           \
 369 |                cost01 += s->len_pack[icv][0]; \
 370 |                cost23 += s->len_pack[icv][1]; \
 371 |                cost45 += s->len_pack[icv][2]; \
 372 | 
 373 |             BZ_ITER(0);  BZ_ITER(1);  BZ_ITER(2);  BZ_ITER(3);  BZ_ITER(4);
 374 |             BZ_ITER(5);  BZ_ITER(6);  BZ_ITER(7);  BZ_ITER(8);  BZ_ITER(9);
 375 |             BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14);
 376 |             BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19);
 377 |             BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24);
 378 |             BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29);
 379 |             BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34);
 380 |             BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39);
 381 |             BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44);
 382 |             BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49);
 383 | 
 384 | #           undef BZ_ITER
 385 | 
 386 |             cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16;
 387 |             cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16;
 388 |             cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16;
 389 | 
 390 |          } else {
 391 | 	    /*--- slow version which correctly handles all situations ---*/
 392 |             for (i = gs; i <= ge; i++) { 
 393 |                UInt16 icv = mtfv[i];
 394 |                for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv];
 395 |             }
 396 |          }
 397 |  
 398 |          /*-- 
 399 |             Find the coding table which is best for this group,
 400 |             and record its identity in the selector table.
 401 |          --*/
 402 |          bc = 999999999; bt = -1;
 403 |          for (t = 0; t < nGroups; t++)
 404 |             if (cost[t] < bc) { bc = cost[t]; bt = t; };
 405 |          totc += bc;
 406 |          fave[bt]++;
 407 |          s->selector[nSelectors] = bt;
 408 |          nSelectors++;
 409 | 
 410 |          /*-- 
 411 |             Increment the symbol frequencies for the selected table.
 412 |           --*/
 413 |          if (nGroups == 6 && 50 == ge-gs+1) {
 414 |             /*--- fast track the common case ---*/
 415 | 
 416 | #           define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++
 417 | 
 418 |             BZ_ITUR(0);  BZ_ITUR(1);  BZ_ITUR(2);  BZ_ITUR(3);  BZ_ITUR(4);
 419 |             BZ_ITUR(5);  BZ_ITUR(6);  BZ_ITUR(7);  BZ_ITUR(8);  BZ_ITUR(9);
 420 |             BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14);
 421 |             BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19);
 422 |             BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24);
 423 |             BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29);
 424 |             BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34);
 425 |             BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39);
 426 |             BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44);
 427 |             BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49);
 428 | 
 429 | #           undef BZ_ITUR
 430 | 
 431 |          } else {
 432 | 	    /*--- slow version which correctly handles all situations ---*/
 433 |             for (i = gs; i <= ge; i++)
 434 |                s->rfreq[bt][ mtfv[i] ]++;
 435 |          }
 436 | 
 437 |          gs = ge+1;
 438 |       }
 439 |       if (s->verbosity >= 3) {
 440 |          VPrintf2 ( "      pass %d: size is %d, grp uses are ", 
 441 |                    iter+1, totc/8 );
 442 |          for (t = 0; t < nGroups; t++)
 443 |             VPrintf1 ( "%d ", fave[t] );
 444 |          VPrintf0 ( "\n" );
 445 |       }
 446 | 
 447 |       /*--
 448 |         Recompute the tables based on the accumulated frequencies.
 449 |       --*/
 450 |       /* maxLen was changed from 20 to 17 in bzip2-1.0.3.  See 
 451 |          comment in huffman.c for details. */
 452 |       for (t = 0; t < nGroups; t++)
 453 |          BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]), 
 454 |                                  alphaSize, 17 /*20*/ );
 455 |    }
 456 | 
 457 | 
 458 |    AssertH( nGroups < 8, 3002 );
 459 | 
 460 | 
 461 |    AssertH( /* nSelectors < 32768 &&  */
 462 |             nSelectors <= BZ_MAX_SELECTORS,
 463 |             3003 );
 464 | 
 465 | 
 466 |    /*--- Compute MTF values for the selectors. ---*/
 467 |    {
 468 |       UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp;
 469 |       for (i = 0; i < nGroups; i++) pos[i] = i;
 470 |       for (i = 0; i < nSelectors; i++) {
 471 |          ll_i = s->selector[i];
 472 |          j = 0;
 473 |          tmp = pos[j];
 474 |          while ( ll_i != tmp ) {
 475 |             j++;
 476 |             tmp2 = tmp;
 477 |             tmp = pos[j];
 478 |             pos[j] = tmp2;
 479 |          };
 480 |          pos[0] = tmp;
 481 |          s->selectorMtf[i] = j;
 482 |       }
 483 |    };
 484 | 
 485 |    /*--- Assign actual codes for the tables. --*/
 486 |    for (t = 0; t < nGroups; t++) {
 487 |       minLen = 32;
 488 |       maxLen = 0;
 489 |       for (i = 0; i < alphaSize; i++) {
 490 |          if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
 491 |          if (s->len[t][i] < minLen) minLen = s->len[t][i];
 492 |       }
 493 |       AssertH ( !(maxLen > 17 /*20*/ ), 3004 );
 494 |       AssertH ( !(minLen < 1),  3005 );
 495 |       BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]), 
 496 |                           minLen, maxLen, alphaSize );
 497 |    }
 498 | 
 499 |    /*--- Transmit the mapping table. ---*/
 500 |    { 
 501 |       Bool inUse16[16];
 502 |       for (i = 0; i < 16; i++) {
 503 |           inUse16[i] = False;
 504 |           for (j = 0; j < 16; j++)
 505 |              if (s->inUse[i * 16 + j]) inUse16[i] = True;
 506 |       }
 507 |      
 508 |       nBytes = s->numZ;
 509 |       for (i = 0; i < 16; i++)
 510 |          if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0);
 511 | 
 512 |       for (i = 0; i < 16; i++)
 513 |          if (inUse16[i])
 514 |             for (j = 0; j < 16; j++) {
 515 |                if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0);
 516 |             }
 517 | 
 518 |       if (s->verbosity >= 3) 
 519 |          VPrintf1( "      bytes: mapping %d, ", s->numZ-nBytes );
 520 |    }
 521 | 
 522 |    /*--- Now the selectors. ---*/
 523 |    nBytes = s->numZ;
 524 |    bsW ( s, 5 /* 3 */, nGroups ); // changed @aditya
 525 | 
 526 | 
 527 |    bsW ( s, 17 /* 15 */, nSelectors ); // changed @aditya
 528 |    
 529 |    for (i = 0; i < nSelectors; i++) { 
 530 |       for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1);
 531 |       bsW(s,1,0);
 532 |    }
 533 |    if (s->verbosity >= 3)
 534 |       VPrintf1( "selectors %d, ", s->numZ-nBytes );
 535 | 
 536 |    /*--- Now the coding tables. ---*/
 537 |    nBytes = s->numZ;
 538 | 
 539 |    for (t = 0; t < nGroups; t++) {
 540 |       Int32 curr = s->len[t][0];
 541 |       bsW ( s, 5, curr );
 542 |       for (i = 0; i < alphaSize; i++) {
 543 |          while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ };
 544 |          while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ };
 545 |          bsW ( s, 1, 0 );
 546 |       }
 547 |    }
 548 | 
 549 |    if (s->verbosity >= 3)
 550 |       VPrintf1 ( "code lengths %d, ", s->numZ-nBytes );
 551 | 
 552 |    /*--- And finally, the block data proper ---*/
 553 |    nBytes = s->numZ;
 554 |    selCtr = 0;
 555 |    gs = 0;
 556 |    while (True) {
 557 |       if (gs >= s->nMTF) break;
 558 |       ge = gs + BZ_G_SIZE - 1; 
 559 |       if (ge >= s->nMTF) ge = s->nMTF-1;
 560 |       AssertH ( s->selector[selCtr] < nGroups, 3006 );
 561 | 
 562 |       if (nGroups == 6 && 50 == ge-gs+1) {
 563 |             /*--- fast track the common case ---*/
 564 |             UInt16 mtfv_i;
 565 |             UChar* s_len_sel_selCtr 
 566 |                = &(s->len[s->selector[selCtr]][0]);
 567 |             Int32* s_code_sel_selCtr
 568 |                = &(s->code[s->selector[selCtr]][0]);
 569 | 
 570 | #           define BZ_ITAH(nn)                      \
 571 |                mtfv_i = mtfv[gs+(nn)];              \
 572 |                bsW ( s,                             \
 573 |                      s_len_sel_selCtr[mtfv_i],      \
 574 |                      s_code_sel_selCtr[mtfv_i] )
 575 | 
 576 |             BZ_ITAH(0);  BZ_ITAH(1);  BZ_ITAH(2);  BZ_ITAH(3);  BZ_ITAH(4);
 577 |             BZ_ITAH(5);  BZ_ITAH(6);  BZ_ITAH(7);  BZ_ITAH(8);  BZ_ITAH(9);
 578 |             BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14);
 579 |             BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19);
 580 |             BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24);
 581 |             BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29);
 582 |             BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34);
 583 |             BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39);
 584 |             BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44);
 585 |             BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49);
 586 | 
 587 | #           undef BZ_ITAH
 588 | 
 589 |       } else {
 590 | 	 /*--- slow version which correctly handles all situations ---*/
 591 |          for (i = gs; i <= ge; i++) {
 592 |             bsW ( s, 
 593 |                   s->len  [s->selector[selCtr]] [mtfv[i]],
 594 |                   s->code [s->selector[selCtr]] [mtfv[i]] );
 595 |          }
 596 |       }
 597 | 
 598 | 
 599 |       gs = ge+1;
 600 |       selCtr++;
 601 |    }
 602 |    AssertH( selCtr == nSelectors, 3007 );
 603 | 
 604 |    if (s->verbosity >= 3)
 605 |       VPrintf1( "codes %d\n", s->numZ-nBytes );
 606 | }
 607 | 
 608 | 
 609 | void merge_two_sort_arrays ( EState *s ) 
 610 | { 
 611 | 
 612 | 	unsigned char *block = (unsigned char*) s->arr2;
 613 | 	unsigned int *h_first_sort_rank = (unsigned int*) s->arr1_first_sort_rank;
 614 | 	unsigned int *h_first_sort_index = (unsigned int*) s->arr1_first_sort;
 615 | 	unsigned int *h_second_sort_index = (unsigned int*) s->arr1_second_sort;
 616 | 	unsigned int *order = (unsigned int*) s->arr1;
 617 | 
 618 | 	/* stores position of index 0 in BWT transform */
 619 | 	s->origPtr = -1;
 620 | 	
 621 | 	int originalLength = s->nblock;
 622 | 	int firstSortLength = s->first_sort_length;
 623 | 	int secondSortLength = originalLength - firstSortLength;
 624 | 
 625 | 	int countOrderArr = 0;
 626 | 	int countFirstArr = 0;
 627 | 	int countSecondArr = 0;
 628 | 	int indexFirst;
 629 | 	int indexSecond;
 630 | 
 631 | 	for(countOrderArr = 0; countOrderArr < originalLength && countFirstArr < firstSortLength && countSecondArr < secondSortLength; countOrderArr++) { 
 632 | 		indexFirst = h_first_sort_index[countFirstArr];
 633 | 		indexSecond = h_second_sort_index[countSecondArr];
 634 | 		if(block[indexFirst] != block[indexSecond]) {
 635 | 			block[indexFirst] < block[indexSecond] ? order[countOrderArr] = h_first_sort_index[countFirstArr++] : order[countOrderArr] = h_second_sort_index[countSecondArr++];
 636 | 			continue;
 637 | 		}
 638 | 
 639 | 		if(indexFirst == originalLength - 1) { 
 640 | 			if(block[0] == block[indexSecond + 1]) { 
 641 | 				if(indexSecond == originalLength - 2) { 
 642 | 					if(block[1]  == block[0]) { 
 643 | 						h_first_sort_rank[2] < h_first_sort_rank[1] ? order[countOrderArr] = h_first_sort_index[countFirstArr++] : order[countOrderArr] = h_second_sort_index[countSecondArr++];
 644 | 					} else { 
 645 | 						block[1] < block[0] ? order[countOrderArr] = h_first_sort_index[countFirstArr++] : order[countOrderArr] = h_second_sort_index[countSecondArr++];
 646 | 					}
 647 | 				} else { 
 648 | 					h_first_sort_rank[1] < h_first_sort_rank[indexSecond+2] ? order[countOrderArr] = h_first_sort_index[countFirstArr++] : order[countOrderArr] = h_second_sort_index[countSecondArr++];
 649 | 				}
 650 | 			} else { 
 651 | 				block[0] < block[indexSecond+1] ? order[countOrderArr] = h_first_sort_index[countFirstArr++] : order[countOrderArr] = h_second_sort_index[countSecondArr++];
 652 | 			}
 653 | 		} else if(indexFirst % 3 == 1) {
 654 | 			(h_first_sort_rank[indexFirst + 1] < h_first_sort_rank[indexSecond + 1]) ? order[countOrderArr] = h_first_sort_index[countFirstArr++] : order[countOrderArr] = h_second_sort_index[countSecondArr++];
 655 | 		
 656 | 		} else if(indexFirst % 3 == 2) {
 657 | 			if(block[indexFirst+1] == block[indexSecond+1]) {
 658 | 				if(indexFirst + 2 == originalLength || indexSecond+2 == originalLength) { 
 659 | 					int itrIndexFirst = (indexFirst+2) % originalLength;
 660 | 					int itrIndexSecond = (indexSecond+2) % originalLength;
 661 | 					int foundDifference = 0;
 662 | 					while(itrIndexFirst % 3 == 0 || itrIndexSecond % 3 == 0) { 
 663 | 						if(block[itrIndexFirst] != block[itrIndexSecond]) { 
 664 | 							block[itrIndexFirst] < block[itrIndexSecond] ? order[countOrderArr] = h_first_sort_index[countFirstArr++] : order[countOrderArr] = h_second_sort_index[countSecondArr++];
 665 | 							foundDifference = 1;
 666 | 							break;
 667 | 						}
 668 | 						itrIndexFirst = (itrIndexFirst+1)%originalLength;
 669 | 						itrIndexSecond = (itrIndexSecond+1)%originalLength;
 670 | 					}
 671 | 					if(foundDifference == 1) { 
 672 | 						continue;
 673 | 					}
 674 | 					h_first_sort_rank[itrIndexFirst] < h_first_sort_rank[itrIndexSecond] ? order[countOrderArr] = h_first_sort_index[countFirstArr++] : order[countOrderArr] = h_second_sort_index[countSecondArr++];
 675 | 				} else { 
 676 | 					(h_first_sort_rank[indexFirst+2] < h_first_sort_rank[indexSecond+2]) ? order[countOrderArr] = h_first_sort_index[countFirstArr++] : order[countOrderArr] = h_second_sort_index[countSecondArr++];
 677 | 				}
 678 | 			} else { 
 679 | 				(block[indexFirst+1] < block[indexSecond+1]) ? order[countOrderArr] = h_first_sort_index[countFirstArr++] : order[countOrderArr] = h_second_sort_index[countSecondArr++];
 680 | 			}
 681 | 		}
 682 | 
 683 | 		if(order[countOrderArr] == 0) { 
 684 | 			s->origPtr = countOrderArr;
 685 | 		}
 686 | 		
 687 | 	} 
 688 | 	
 689 | 	while(countFirstArr < firstSortLength) { 
 690 | 		order[countOrderArr] = h_first_sort_index[countFirstArr];
 691 | 		if(order[countOrderArr] == 0) { 
 692 | 			s->origPtr = countOrderArr;
 693 | 		}
 694 | 		countFirstArr++;
 695 | 		countOrderArr++;
 696 | 	}
 697 | 
 698 | 	while(countSecondArr < secondSortLength) { 
 699 | 		order[countOrderArr] = h_second_sort_index[countSecondArr];
 700 | 		if(order[countOrderArr] == 0) { 
 701 | 			s->origPtr = countOrderArr;
 702 | 		}
 703 | 		countSecondArr++;
 704 | 		countOrderArr++;
 705 | 	}  
 706 | 
 707 | 	AssertH( s->origPtr != -1, 1003 ); 
 708 | 
 709 | 	return;
 710 | }
 711 | 
 712 | Bool blocksort_wrapper( EState *s ) {
 713 | 	BZ_INITIALISE_CRC( s->blockCRC );
 714 | 	if (s->nblock > 0) {
 715 | 
 716 | 		BZ_FINALISE_CRC ( s->blockCRC );
 717 | 		s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31);
 718 | 		s->combinedCRC ^= s->blockCRC;
 719 | 		if (s->blockNo > 1) s->numZ = 0;
 720 | 
 721 | 		if (s->verbosity >= 2)
 722 | 			VPrintf4( "    block %d: crc = 0x%08x, "
 723 | 					"combined CRC = 0x%08x, size = %d\n",
 724 | 					s->blockNo, s->blockCRC, s->combinedCRC, s->nblock );
 725 | 
 726 | 		s->first_sort_length = gpuBlockSort( (UChar*) s->arr2, s->arr1, s->arr1_first_sort, s->arr1_second_sort, s->arr1_first_sort_rank, s->nblock, &(s->sortingDepth));
 727 | 		
 728 | 		/* loop moved to merge two arrays
 729 | 		UInt32* ptr    = s->ptr; 
 730 | 		s->origPtr = -1;
 731 | 		Int32 i;
 732 | 		for (i = 0; i < s->nblock; i++)
 733 | 			if (ptr[i] == 0)
 734 | 			{ s->origPtr = i; break; };
 735 | 
 736 | 		AssertH( s->origPtr != -1, 1003 ); */
 737 | 	}
 738 | 	return True;
 739 | }
 740 | 
 741 | Bool mtf_huff_wrapper( EState* s, Bool is_last_block ) { 
 742 | 
 743 | 	
 744 | 	s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]);
 745 | 
 746 | #ifdef PRINT_DEBUG
 747 | 	printf("BZ2_compressBlock %d\n",s->blockNo); 
 748 | #endif
 749 | 
 750 | 	// start with a fresh buffer with every block
 751 | 	BZ2_bsInitWrite( s ); 
 752 | 
 753 | 	/*-- If this is the first block, create the stream header. --*/
 754 | 	if (s->blockNo == 1) {
 755 | #ifdef PRINT_DEBUG
 756 | 		printf("This is the first block\n");
 757 | #endif
 758 | 		bsPutUChar ( s, BZ_HDR_B );
 759 | 		bsPutUChar ( s, BZ_HDR_Z );
 760 | 		bsPutUChar ( s, BZ_HDR_h );
 761 | 		bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) );
 762 | 	}
 763 | 
 764 | 	if (s->nblock > 0) {
 765 | 
 766 | 		bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 );
 767 | 		bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 );
 768 | 		bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 );
 769 | 
 770 | 		/*-- Now the block's CRC, so it is in a known place. --*/
 771 | 		bsPutUInt32 ( s, s->blockCRC );
 772 | 
 773 | 		/*-- 
 774 | 		  Now a single bit indicating (non-)randomisation. 
 775 | 		  As of version 0.9.5, we use a better sorting algorithm
 776 | 		  which makes randomisation unnecessary.  So always set
 777 | 		  the randomised bit to 'no'.  Of course, the decoder
 778 | 		  still needs to be able to handle randomised blocks
 779 | 		  so as to maintain backwards compatibility with
 780 | 		  older versions of bzip2.
 781 | 		  --*/
 782 | 		bsW(s,1,0);
 783 | 
 784 | 		bsW ( s, 24, s->origPtr );
 785 | 		generateMTFValues ( s );
 786 | 		sendMTFValues ( s );
 787 | 	}
 788 | 
 789 | 
 790 | 	/*-- If this is the last block, add the stream trailer. --*/
 791 | 	if (is_last_block) {
 792 | #ifdef PRINT_DEBUG
 793 | 		printf("This is the last block\n");
 794 | #endif
 795 | 		bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 );
 796 | 		bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 );
 797 | 		bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 );
 798 | 		bsPutUInt32 ( s, s->combinedCRC );
 799 | 		if (s->verbosity >= 2)
 800 | 			VPrintf1( "    final combined CRC = 0x%08x\n   ", s->combinedCRC );
 801 | 		bsFinishWrite ( s );
 802 | 	}
 803 | 
 804 | 	return True;
 805 | }
 806 | 
 807 | Bool BZ2_compressBlock_only_CPU ( EState* s, Bool is_last_block ) {
 808 | 
 809 | 	if (s->nblock > 0) {
 810 | 		BZ_FINALISE_CRC ( s->blockCRC );
 811 | 		s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31);
 812 | 		s->combinedCRC ^= s->blockCRC;
 813 | 		if (s->blockNo > 1) s->numZ = 0;
 814 | 
 815 | 		if (s->verbosity >= 2)
 816 | 			VPrintf4( "    block %d: crc = 0x%08x, "
 817 | 					"combined CRC = 0x%08x, size = %d\n",
 818 | 					s->blockNo, s->blockCRC, s->combinedCRC, s->nblock );
 819 | 
 820 | 		BZ2_blockSort ( s );
 821 | 	}
 822 | 
 823 | 	s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]);
 824 | 
 825 | 	/*-- If this is the first block, create the stream header. --*/
 826 | 	BZ2_bsInitWrite ( s );
 827 | 	if (s->blockNo == 1) {
 828 | 		bsPutUChar ( s, BZ_HDR_B );
 829 | 		bsPutUChar ( s, BZ_HDR_Z );
 830 | 		bsPutUChar ( s, BZ_HDR_h );
 831 | 		bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) );
 832 | 	}
 833 | 
 834 | 	if (s->nblock > 0) {
 835 | 
 836 | 		bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 );
 837 | 		bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 );
 838 | 		bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 );
 839 | 
 840 | 		/*-- Now the block's CRC, so it is in a known place. --*/
 841 | 		bsPutUInt32 ( s, s->blockCRC );
 842 | 
 843 | 		/*-- 
 844 | 		  Now a single bit indicating (non-)randomisation. 
 845 | 		  As of version 0.9.5, we use a better sorting algorithm
 846 | 		  which makes randomisation unnecessary.  So always set
 847 | 		  the randomised bit to 'no'.  Of course, the decoder
 848 | 		  still needs to be able to handle randomised blocks
 849 | 		  so as to maintain backwards compatibility with
 850 | 		  older versions of bzip2.
 851 | 		  --*/
 852 | 		bsW(s,1,0);
 853 | 
 854 | 		bsW ( s, 24, s->origPtr );
 855 | 		generateMTFValues ( s );
 856 | 		sendMTFValues ( s );
 857 | 	}
 858 | 
 859 | 
 860 | 	/*-- If this is the last block, add the stream trailer. --*/
 861 | 	if (is_last_block) {
 862 | 
 863 | 		bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 );
 864 | 		bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 );
 865 | 		bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 );
 866 | 		bsPutUInt32 ( s, s->combinedCRC );
 867 | 		if (s->verbosity >= 2)
 868 | 			VPrintf1( "    final combined CRC = 0x%08x\n   ", s->combinedCRC );
 869 | 		bsFinishWrite ( s );
 870 | 	}
 871 | 
 872 | 	return True;
 873 | 
 874 | }
 875 | /*---------------------------------------------------*/
 876 | void BZ2_compressBlocks ( bz_stream* strm)
 877 | {
 878 | 
 879 | 
 880 | 	gpuSetDevice(0);
 881 | 	
 882 | 	struct timespec t1, t2; 
 883 | 	clock_gettime(CLOCK_MONOTONIC, &t1);
 884 | 
 885 | 	Int32 atomic_used_count = 1; 
 886 | 	Int32 done_block_sort = 0;
 887 | 	Int32 done_block_sort_queue[BZ_MAX_STATE_COUNT];
 888 | 	Int32 done_huff_mtf = 0;
 889 | 	Int32 finalGPUBlocks = BZ_MAX_STATE_COUNT;
 890 | 	Int32 num_procs = omp_get_num_procs();
 891 | 
 892 | 	printf("Block size : %d\n",((EState *)strm->state[0])->nblockMAX + 19);
 893 | 	printf("numThreads read %d\n",((EState*)strm->state[0])->numThreads);
 894 | 	Int32 num_threads = ((EState*)strm->state[0])->numThreads/*+2*/; 
 895 | 	omp_set_num_threads(num_threads+2);
 896 | 	printf("Number of additional CPU threads %d\n",num_threads+1);
 897 | 
 898 | 	
 899 |         #pragma omp parallel shared(done_block_sort, done_block_sort_queue, done_huff_mtf, finalGPUBlocks)
 900 | 	{ 
 901 | 		int threadID = omp_get_thread_num();
 902 | 		Int32 count;
 903 | 		Int32 currentHuffMtf = -1;
 904 | 		EState *s; 
 905 | 		Bool is_last_block = False;
 906 | 
 907 | 		if(threadID == 0) {
 908 | 			struct timespec thread1_T1, thread1_T2; 
 909 | 			clock_gettime(CLOCK_MONOTONIC, &thread1_T1);
 910 | 
 911 | 			blocksort_wrapper((EState*) strm->state[0]);
 912 | 
 913 | 			done_block_sort_queue[done_block_sort] = 0;
 914 | 			done_block_sort++;
 915 | 			while(true) {
 916 | 				#pragma omp critical 
 917 | 				{
 918 | 					count = atomic_used_count;
 919 | 					atomic_used_count++;
 920 | 				}
 921 | 				if(count > strm->state_fill_count) { 
 922 | 					finalGPUBlocks = done_block_sort;
 923 | 					break;
 924 | 				}
 925 | 				s = (EState *)strm->state[count];
 926 | 				if(blocksort_wrapper( s )) {
 927 | 					done_block_sort_queue[done_block_sort] = count;
 928 | 					done_block_sort++; 
 929 | 				} else { 
 930 | 					printf("blocksort_wrapper failed for blockNo %d\n",count);
 931 | 					exit(2);
 932 | 				}
 933 | 			}
 934 | 			clock_gettime(CLOCK_MONOTONIC, &thread1_T2);
 935 | 			double thread1_diff = thread1_T2.tv_sec - thread1_T1.tv_sec + ((thread1_T2.tv_nsec - thread1_T1.tv_nsec)/1000000000.0); 
 936 | 			printf("[Time thread1] %lf\n",thread1_diff);
 937 | 		}
 938 | 
 939 | 		if(threadID == 1) {
 940 | 			struct timespec thread2_T1, thread2_T2;
 941 | 			clock_gettime(CLOCK_MONOTONIC, &thread2_T1);
 942 | 			double thread2_work = 0.0;
 943 | 			struct timespec thread2_work1, thread2_work2;
 944 | 			while(true) {
 945 | 				if(done_block_sort > done_huff_mtf) {
 946 | 					clock_gettime(CLOCK_MONOTONIC, &thread2_work1);
 947 | 				
 948 | 					currentHuffMtf = done_block_sort_queue[done_huff_mtf];
 949 | 					done_huff_mtf++; 
 950 | 					s = (EState *)strm->state[currentHuffMtf];
 951 | 					merge_two_sort_arrays ( s );
 952 | 					if(currentHuffMtf == strm->state_fill_count) { 
 953 | 						is_last_block = True;
 954 | 					}
 955 | 					if( !mtf_huff_wrapper( s, is_last_block ) ) { 
 956 | 						printf("mtf_huff_wrapper failed for blockNo %d\n",currentHuffMtf);
 957 | 						exit(2);
 958 | 					}
 959 | 				
 960 | 					clock_gettime(CLOCK_MONOTONIC, &thread2_work2);
 961 | 					thread2_work+= thread2_work2.tv_sec - thread2_work1.tv_sec + ((thread2_work2.tv_nsec - thread2_work1.tv_nsec)/1000000000.0);
 962 | 				}
 963 | 				if(done_huff_mtf == finalGPUBlocks) { 
 964 | 					break;
 965 | 				}
 966 | 			} 
 967 | 		
 968 | 			clock_gettime(CLOCK_MONOTONIC, &thread2_T2);
 969 | 			double thread2_diff = thread2_T2.tv_sec - thread2_T1.tv_sec + ((thread2_T2.tv_nsec - thread2_T1.tv_nsec)/1000000000.0); 
 970 | 			printf("[Time thread2 work] %lf\n",thread2_work);
 971 | 			printf("[Time thread2] %lf\n",thread2_diff);
 972 | 		}
 973 | 
 974 | 		if(threadID >= 2) {
 975 | 			while(true) { 
 976 | 				#pragma omp critical 
 977 | 				{
 978 | 					count = atomic_used_count;
 979 | 					atomic_used_count++;
 980 | 				}
 981 | 				if(count > strm->state_fill_count) { 
 982 | 					break;
 983 | 				}
 984 | 				s = (EState *)strm->state[count];
 985 | 				if(!BZ2_compressBlock_only_CPU( s, (Bool) count == strm->state_fill_count)) { 
 986 | 					printf("BZ2_compressBlock_only_CPU failed at blockNo %d\n",count);
 987 | 					exit(2);
 988 | 				}
 989 | 			}
 990 | 		} 
 991 | 	} 
 992 | 
 993 | 	/*
 994 | 	UInt32 count;
 995 | 	#pragma omp parallel for
 996 | 	for(count = 0; count <= strm->state_fill_count; count++) { 
 997 | 		EState *s = (EState*)strm->state[count];	
 998 | 		if(!BZ2_compressBlock_only_CPU( s, (Bool) count == strm->state_fill_count)) { 
 999 | 			printf("BZ2_compressBlock_only_CPU failed at blockNo %d\n",count);
1000 | 			exit(2);
1001 | 		}
1002 | 	} */
1003 | 	printf("Number of CPU threads %d\n",num_threads - 2);
1004 | 	printf("Out of the total %d blocks GPU did %d\n",strm->state_fill_count+1,finalGPUBlocks);
1005 | 	clock_gettime(CLOCK_MONOTONIC, &t2);
1006 | 	double t = t2.tv_sec - t1.tv_sec + ((t2.tv_nsec - t1.tv_nsec)/1000000000.0); 
1007 | 	printf("total compression time (with overlap) %lf\n",t);
1008 | }
1009 | 
1010 | 
1011 | /* Global Variables for PThreads Producer-Consumer pipeline */
1012 | bz_stream* global_strm;
1013 | sem_t mutex, full, empty;
1014 | int buff[BZ_MAX_STATE_COUNT];
1015 | int producerCount = 0, consumerCount = 0;
1016 | 
1017 | 
1018 | void *produce(void *arg) { 
1019 | 	unsigned int i;
1020 | 	struct timespec t1, t2;
1021 | 	double thread1_work = 0.0;
1022 | 
1023 | 	int maxSortingDepth = 0;
1024 | 	int avgSortingDepth = 0;
1025 | 
1026 | 	for(i = 0; i <= global_strm->state_fill_count; i++) {
1027 | 		 
1028 | 		clock_gettime(CLOCK_MONOTONIC, &t1);
1029 | 		EState *s = (EState*)global_strm->state[i];
1030 | 		if( !blocksort_wrapper( s )) { 
1031 | 			printf("[ERROR] blocksort_wrapper_failed\n");
1032 | 			exit(1);
1033 | 		}
1034 | 		clock_gettime(CLOCK_MONOTONIC, &t2);
1035 | 		if(i!=global_strm->state_fill_count) { 
1036 | 			thread1_work += t2.tv_sec - t1.tv_sec + ((t2.tv_nsec - t1.tv_nsec)/1000000000.0);
1037 | 			if(s->sortingDepth > maxSortingDepth) maxSortingDepth = s->sortingDepth;
1038 | 			avgSortingDepth += s->sortingDepth;
1039 | 		}
1040 | 		
1041 | 		sem_wait(&empty);
1042 | 		sem_wait(&mutex);	
1043 | 		buff[++producerCount] = i;
1044 | 		sem_post(&mutex);
1045 | 		sem_post(&full);
1046 | 	}
1047 | 	int divFactor = global_strm->state_fill_count;
1048 | 	printf("[Time BWT] %lf\n",thread1_work);
1049 | 	printf("[Average Time BWT] %lf\n",thread1_work/divFactor);
1050 | 	printf("[Max Sorting Depth] %d\n",maxSortingDepth);
1051 | 	printf("[Average Sorting Depth] %d\n",(int)((1.0*avgSortingDepth)/divFactor));
1052 | }
1053 | 
1054 | void *consume(void *arg) { 
1055 | 	int item, i;
1056 | 	bool is_last_block = false;
1057 | 	
1058 | 	struct timespec t1, t2;
1059 | 	struct timespec tmerge; 
1060 | 	double tmergeTotal = 0.0;
1061 | 	double thread2_work = 0.0;
1062 | 	
1063 | 	for(i = 0; i <= global_strm->state_fill_count; i++) { 
1064 | 	
1065 | 		sem_wait(&full);
1066 | 		sem_wait(&mutex);
1067 | 		item = buff[++consumerCount];
1068 | 		sem_post(&mutex);
1069 | 		sem_post(&empty);
1070 | 		
1071 | 		clock_gettime(CLOCK_MONOTONIC, &t1);
1072 | 		EState *s = (EState*)global_strm->state[item];
1073 | 		merge_two_sort_arrays( s );
1074 | 		clock_gettime(CLOCK_MONOTONIC, &tmerge);
1075 | 
1076 | 		
1077 | 		if(item == global_strm->state_fill_count) { 
1078 | 			is_last_block = true;
1079 | 		} else { 
1080 | 			tmergeTotal += tmerge.tv_sec - t1.tv_sec + ((tmerge.tv_nsec - t1.tv_nsec)/1000000000.0);
1081 | 		}
1082 | 
1083 | 		if(!mtf_huff_wrapper( s, is_last_block )) { 
1084 | 			printf("[ERROR] mtf_huff_wrapper\n");
1085 | 			exit(1);
1086 | 		}
1087 | 		clock_gettime(CLOCK_MONOTONIC, &t2);
1088 | 		thread2_work += t2.tv_sec - t1.tv_sec + ((t2.tv_nsec - t1.tv_nsec)/1000000000.0); 
1089 | 	}
1090 | 	int divFactor = global_strm->state_fill_count;
1091 | 	printf("[Total Time Merge] %lf\n",tmergeTotal);
1092 | 	printf("[Average Time Merge] %lf\n",tmergeTotal/divFactor);
1093 | 	printf("[Time Merge, MTF+HUFF] %lf\n",thread2_work);
1094 | }
1095 | 
1096 | void BZ2_compressBlocks_pthreads ( bz_stream* strm)
1097 | {
1098 | 
1099 | 	printf("[BZ2_compressBlocks_pthreads] Total Blocks : %d, Block Size %d\n", strm->state_fill_count, ((EState*)strm->state[0])->nblockMAX);
1100 | 	gpuSetDevice(0);
1101 | 	global_strm = strm;
1102 | 	
1103 | 	struct timespec t1, t2; 
1104 | 	clock_gettime(CLOCK_MONOTONIC, &t1);
1105 | 	
1106 | 	pthread_t tid1, tid2;
1107 | 	sem_init(&mutex, 0, 1);
1108 | 	sem_init(&full, 0, 0);
1109 | 	sem_init(&empty, 0, strm->state_fill_count);
1110 | 	
1111 | 	pthread_create(&tid1, NULL, produce, NULL);
1112 | 	pthread_create(&tid2, NULL, consume, NULL);
1113 | 
1114 | 	pthread_join(tid1, NULL);
1115 | 	pthread_join(tid2, NULL);
1116 | 	
1117 | 	clock_gettime(CLOCK_MONOTONIC, &t2);
1118 | 	double t = t2.tv_sec - t1.tv_sec + ((t2.tv_nsec - t1.tv_nsec)/1000000000.0); 
1119 | 	printf("total compression time (with pthreads overlap) %lf\n",t);
1120 | }
1121 | 
1122 | 
1123 | 
1124 | void BZ2_compressBlocks_without_overlap (bz_stream* strm)  { 
1125 | 
1126 | 	struct timespec t1, t2; 
1127 | 	clock_gettime(CLOCK_MONOTONIC, &t1);
1128 | 
1129 | 	UInt32 count;
1130 | 	EState *s; 
1131 | 	Bool is_last_block = False;
1132 | 
1133 | 	for(count = 0; count <= strm->state_fill_count; count++) { 
1134 | 		s = (EState *)strm->state[count];
1135 | 		BZ_INITIALISE_CRC( s->blockCRC );
1136 | 		if(count == strm->state_fill_count) { 
1137 | 			is_last_block = True;
1138 | 		}
1139 | 		if (s->nblock > 0) {
1140 | 
1141 | 			BZ_FINALISE_CRC ( s->blockCRC );
1142 | 			s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31);
1143 | 			s->combinedCRC ^= s->blockCRC;
1144 | 			if (s->blockNo > 1) s->numZ = 0;
1145 | 
1146 | 			if (s->verbosity >= 2)
1147 | 				VPrintf4( "    block %d: crc = 0x%08x, "
1148 | 						"combined CRC = 0x%08x, size = %d\n",
1149 | 						s->blockNo, s->blockCRC, s->combinedCRC, s->nblock );
1150 | 
1151 | 			s->first_sort_length = gpuBlockSort( (UChar*) s->arr2, s->arr1, s->arr1_first_sort, s->arr1_second_sort, s->arr1_first_sort_rank, s->nblock, &(s->sortingDepth));
1152 | 
1153 | 			merge_two_sort_arrays( s );
1154 | 			
1155 | 			UInt32* ptr    = s->ptr; 
1156 | 			s->origPtr = -1;
1157 | 			Int32 i;
1158 | 			for (i = 0; i < s->nblock; i++)
1159 | 				if (ptr[i] == 0)
1160 | 				{ s->origPtr = i; break; };
1161 | 
1162 | 			AssertH( s->origPtr != -1, 1003 );
1163 | 		}
1164 | 		s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]);
1165 | 
1166 | #ifdef PRINT_DEBUG
1167 | 		printf("BZ2_compressBlock %d\n",s->blockNo); 
1168 | #endif
1169 | 
1170 | 		// start with a fresh buffer with every block
1171 | 		BZ2_bsInitWrite( s ); 
1172 | 
1173 | 		/*-- If this is the first block, create the stream header. --*/
1174 | 		if (s->blockNo == 1) {
1175 | #ifdef PRINT_DEBUG
1176 | 			printf("This is the first block\n");
1177 | #endif
1178 | 			//BZ2_bsInitWrite ( s );
1179 | 			bsPutUChar ( s, BZ_HDR_B );
1180 | 			bsPutUChar ( s, BZ_HDR_Z );
1181 | 			bsPutUChar ( s, BZ_HDR_h );
1182 | 			bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) );
1183 | 		}
1184 | 
1185 | 		if (s->nblock > 0) {
1186 | 
1187 | 			bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 );
1188 | 			bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 );
1189 | 			bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 );
1190 | 
1191 | 			/*-- Now the block's CRC, so it is in a known place. --*/
1192 | 			bsPutUInt32 ( s, s->blockCRC );
1193 | 
1194 | 			/*-- 
1195 | 			  Now a single bit indicating (non-)randomisation. 
1196 | 			  As of version 0.9.5, we use a better sorting algorithm
1197 | 			  which makes randomisation unnecessary.  So always set
1198 | 			  the randomised bit to 'no'.  Of course, the decoder
1199 | 			  still needs to be able to handle randomised blocks
1200 | 			  so as to maintain backwards compatibility with
1201 | 			  older versions of bzip2.
1202 | 			  --*/
1203 | 			bsW(s,1,0);
1204 | 
1205 | 			bsW ( s, 24, s->origPtr );
1206 | 			generateMTFValues ( s );
1207 | 			sendMTFValues ( s );
1208 | 		}
1209 | 
1210 | 
1211 | 		/*-- If this is the last block, add the stream trailer. --*/
1212 | 		if (is_last_block) {
1213 | #ifdef PRINT_DEBUG
1214 | 			printf("This is the last block\n");
1215 | #endif
1216 | 			bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 );
1217 | 			bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 );
1218 | 			bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 );
1219 | 			bsPutUInt32 ( s, s->combinedCRC );
1220 | 			if (s->verbosity >= 2)
1221 | 				VPrintf1( "    final combined CRC = 0x%08x\n   ", s->combinedCRC );
1222 | 			bsFinishWrite ( s );
1223 | 		}
1224 | 	}
1225 | 	
1226 | 	clock_gettime(CLOCK_MONOTONIC, &t2);
1227 | 	double t = t2.tv_sec - t1.tv_sec + ((t2.tv_nsec - t1.tv_nsec)/1000000000.0); 
1228 | 	printf("total compression time (without overlap) %lf\n",t);
1229 | }
1230 | /*-------------------------------------------------------------*/
1231 | /*--- end                                        compress.c ---*/
1232 | /*-------------------------------------------------------------*/
1233 | 


--------------------------------------------------------------------------------
/crctable.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /*-------------------------------------------------------------*/
  3 | /*--- Table for doing CRCs                                  ---*/
  4 | /*---                                            crctable.c ---*/
  5 | /*-------------------------------------------------------------*/
  6 | 
  7 | /* ------------------------------------------------------------------
  8 |    This file is part of bzip2/libbzip2, a program and library for
  9 |    lossless, block-sorting data compression.
 10 | 
 11 |    bzip2/libbzip2 version 1.0.6 of 6 September 2010
 12 |    Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
 13 | 
 14 |    Please read the WARNING, DISCLAIMER and PATENTS sections in the 
 15 |    README file.
 16 | 
 17 |    This program is released under the terms of the license contained
 18 |    in the file LICENSE.
 19 |    ------------------------------------------------------------------ */
 20 | 
 21 | 
 22 | #include "bzlib_private.h"
 23 | 
 24 | /*--
 25 |   I think this is an implementation of the AUTODIN-II,
 26 |   Ethernet & FDDI 32-bit CRC standard.  Vaguely derived
 27 |   from code by Rob Warnock, in Section 51 of the
 28 |   comp.compression FAQ.
 29 | --*/
 30 | 
 31 | UInt32 BZ2_crc32Table[256] = {
 32 | 
 33 |    /*-- Ugly, innit? --*/
 34 | 
 35 |    0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L,
 36 |    0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L,
 37 |    0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L,
 38 |    0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL,
 39 |    0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L,
 40 |    0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L,
 41 |    0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L,
 42 |    0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL,
 43 |    0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L,
 44 |    0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L,
 45 |    0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L,
 46 |    0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL,
 47 |    0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L,
 48 |    0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L,
 49 |    0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L,
 50 |    0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL,
 51 |    0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL,
 52 |    0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L,
 53 |    0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L,
 54 |    0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL,
 55 |    0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL,
 56 |    0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L,
 57 |    0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L,
 58 |    0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL,
 59 |    0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL,
 60 |    0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L,
 61 |    0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L,
 62 |    0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL,
 63 |    0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL,
 64 |    0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L,
 65 |    0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L,
 66 |    0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL,
 67 |    0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L,
 68 |    0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL,
 69 |    0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL,
 70 |    0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L,
 71 |    0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L,
 72 |    0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL,
 73 |    0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL,
 74 |    0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L,
 75 |    0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L,
 76 |    0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL,
 77 |    0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL,
 78 |    0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L,
 79 |    0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L,
 80 |    0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL,
 81 |    0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL,
 82 |    0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L,
 83 |    0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L,
 84 |    0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL,
 85 |    0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L,
 86 |    0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L,
 87 |    0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L,
 88 |    0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL,
 89 |    0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L,
 90 |    0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L,
 91 |    0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L,
 92 |    0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL,
 93 |    0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L,
 94 |    0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L,
 95 |    0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L,
 96 |    0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL,
 97 |    0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L,
 98 |    0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L
 99 | };
100 | 
101 | 
102 | /*-------------------------------------------------------------*/
103 | /*--- end                                        crctable.c ---*/
104 | /*-------------------------------------------------------------*/
105 | 


--------------------------------------------------------------------------------
/decompress.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /*-------------------------------------------------------------*/
  3 | /*--- Decompression machinery                               ---*/
  4 | /*---                                          decompress.c ---*/
  5 | /*-------------------------------------------------------------*/
  6 | 
  7 | /* ------------------------------------------------------------------
  8 |    This file is part of bzip2/libbzip2, a program and library for
  9 |    lossless, block-sorting data compression.
 10 | 
 11 |    bzip2/libbzip2 version 1.0.6 of 6 September 2010
 12 |    Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
 13 | 
 14 |    Please read the WARNING, DISCLAIMER and PATENTS sections in the 
 15 |    README file.
 16 | 
 17 |    This program is released under the terms of the license contained
 18 |    in the file LICENSE.
 19 |    ------------------------------------------------------------------ */
 20 | 
 21 | 
 22 | #include "bzlib_private.h"
 23 | 
 24 | 
 25 | /*---------------------------------------------------*/
 26 | static
 27 | void makeMaps_d ( DState* s )
 28 | {
 29 |    Int32 i;
 30 |    s->nInUse = 0;
 31 |    for (i = 0; i < 256; i++)
 32 |       if (s->inUse[i]) {
 33 |          s->seqToUnseq[s->nInUse] = i;
 34 |          s->nInUse++;
 35 |       }
 36 | }
 37 | 
 38 | 
 39 | /*---------------------------------------------------*/
 40 | #define RETURN(rrr)                               \
 41 |    { retVal = rrr; goto save_state_and_return; };
 42 | 
 43 | #define GET_BITS(lll,vvv,nnn)                     \
 44 |    case lll: s->state = lll;                      \
 45 |    while (True) {                                 \
 46 |       if (s->bsLive >= nnn) {                     \
 47 |          UInt32 v;                                \
 48 |          v = (s->bsBuff >>                        \
 49 |              (s->bsLive-nnn)) & ((1 << nnn)-1);   \
 50 |          s->bsLive -= nnn;                        \
 51 |          vvv = v;                                 \
 52 |          break;                                   \
 53 |       }                                           \
 54 |       if (s->strm->avail_in == 0) RETURN(BZ_OK);  \
 55 |       s->bsBuff                                   \
 56 |          = (s->bsBuff << 8) |                     \
 57 |            ((UInt32)                              \
 58 |               (*((UChar*)(s->strm->next_in))));   \
 59 |       s->bsLive += 8;                             \
 60 |       s->strm->next_in++;                         \
 61 |       s->strm->avail_in--;                        \
 62 |       s->strm->total_in_lo32++;                   \
 63 |       if (s->strm->total_in_lo32 == 0)            \
 64 |          s->strm->total_in_hi32++;                \
 65 |    }
 66 | 
 67 | #define GET_UCHAR(lll,uuu)                        \
 68 |    GET_BITS(lll,uuu,8)
 69 | 
 70 | #define GET_BIT(lll,uuu)                          \
 71 |    GET_BITS(lll,uuu,1)
 72 | 
 73 | /*---------------------------------------------------*/
 74 | #define GET_MTF_VAL(label1,label2,lval)           \
 75 | {                                                 \
 76 |    if (groupPos == 0) {                           \
 77 |       groupNo++;                                  \
 78 |       if (groupNo >= nSelectors)                  \
 79 |          RETURN(BZ_DATA_ERROR);                   \
 80 |       groupPos = BZ_G_SIZE;                       \
 81 |       gSel = s->selector[groupNo];                \
 82 |       gMinlen = s->minLens[gSel];                 \
 83 |       gLimit = &(s->limit[gSel][0]);              \
 84 |       gPerm = &(s->perm[gSel][0]);                \
 85 |       gBase = &(s->base[gSel][0]);                \
 86 |    }                                              \
 87 |    groupPos--;                                    \
 88 |    zn = gMinlen;                                  \
 89 |    GET_BITS(label1, zvec, zn);                    \
 90 |    while (1) {                                    \
 91 |       if (zn > 20 /* the longest code */)         \
 92 |          RETURN(BZ_DATA_ERROR);                   \
 93 |       if (zvec <= gLimit[zn]) break;              \
 94 |       zn++;                                       \
 95 |       GET_BIT(label2, zj);                        \
 96 |       zvec = (zvec << 1) | zj;                    \
 97 |    };                                             \
 98 |    if (zvec - gBase[zn] < 0                       \
 99 |        || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE)  \
100 |       RETURN(BZ_DATA_ERROR);                      \
101 |    lval = gPerm[zvec - gBase[zn]];                \
102 | }
103 | 
104 | 
105 | /*---------------------------------------------------*/
106 | Int32 BZ2_decompress ( DState* s )
107 | {
108 |    UChar      uc;
109 |    Int32      retVal;
110 |    Int32      minLen, maxLen;
111 |    bz_stream* strm = s->strm;
112 | 
113 |    /* stuff that needs to be saved/restored */
114 |    Int32  i;
115 |    Int32  j;
116 |    Int32  t;
117 |    Int32  alphaSize;
118 |    Int32  nGroups;
119 |    Int32  nSelectors;
120 |    Int32  EOB;
121 |    Int32  groupNo;
122 |    Int32  groupPos;
123 |    Int32  nextSym;
124 |    Int32  nblockMAX;
125 |    Int32  nblock;
126 |    Int32  es;
127 |    Int32  N;
128 |    Int32  curr;
129 |    Int32  zt;
130 |    Int32  zn; 
131 |    Int32  zvec;
132 |    Int32  zj;
133 |    Int32  gSel;
134 |    Int32  gMinlen;
135 |    Int32* gLimit;
136 |    Int32* gBase;
137 |    Int32* gPerm;
138 | 
139 |    if (s->state == BZ_X_MAGIC_1) {
140 |       /*initialise the save area*/
141 |       s->save_i           = 0;
142 |       s->save_j           = 0;
143 |       s->save_t           = 0;
144 |       s->save_alphaSize   = 0;
145 |       s->save_nGroups     = 0;
146 |       s->save_nSelectors  = 0;
147 |       s->save_EOB         = 0;
148 |       s->save_groupNo     = 0;
149 |       s->save_groupPos    = 0;
150 |       s->save_nextSym     = 0;
151 |       s->save_nblockMAX   = 0;
152 |       s->save_nblock      = 0;
153 |       s->save_es          = 0;
154 |       s->save_N           = 0;
155 |       s->save_curr        = 0;
156 |       s->save_zt          = 0;
157 |       s->save_zn          = 0;
158 |       s->save_zvec        = 0;
159 |       s->save_zj          = 0;
160 |       s->save_gSel        = 0;
161 |       s->save_gMinlen     = 0;
162 |       s->save_gLimit      = NULL;
163 |       s->save_gBase       = NULL;
164 |       s->save_gPerm       = NULL;
165 |    }
166 | 
167 |    /*restore from the save area*/
168 |    i           = s->save_i;
169 |    j           = s->save_j;
170 |    t           = s->save_t;
171 |    alphaSize   = s->save_alphaSize;
172 |    nGroups     = s->save_nGroups;
173 |    nSelectors  = s->save_nSelectors;
174 |    EOB         = s->save_EOB;
175 |    groupNo     = s->save_groupNo;
176 |    groupPos    = s->save_groupPos;
177 |    nextSym     = s->save_nextSym;
178 |    nblockMAX   = s->save_nblockMAX;
179 |    nblock      = s->save_nblock;
180 |    es          = s->save_es;
181 |    N           = s->save_N;
182 |    curr        = s->save_curr;
183 |    zt          = s->save_zt;
184 |    zn          = s->save_zn; 
185 |    zvec        = s->save_zvec;
186 |    zj          = s->save_zj;
187 |    gSel        = s->save_gSel;
188 |    gMinlen     = s->save_gMinlen;
189 |    gLimit      = s->save_gLimit;
190 |    gBase       = s->save_gBase;
191 |    gPerm       = s->save_gPerm;
192 | 
193 |    retVal = BZ_OK;
194 | 
195 |    switch (s->state) {
196 | 
197 |       GET_UCHAR(BZ_X_MAGIC_1, uc);
198 |       if (uc != BZ_HDR_B) RETURN(BZ_DATA_ERROR_MAGIC);
199 | 
200 |       GET_UCHAR(BZ_X_MAGIC_2, uc);
201 |       if (uc != BZ_HDR_Z) RETURN(BZ_DATA_ERROR_MAGIC);
202 | 
203 |       GET_UCHAR(BZ_X_MAGIC_3, uc)
204 |       if (uc != BZ_HDR_h) RETURN(BZ_DATA_ERROR_MAGIC);
205 | 
206 |       GET_BITS(BZ_X_MAGIC_4, s->blockSize100k, 8)
207 |       if (s->blockSize100k < (BZ_HDR_0 + 1) || 
208 |           s->blockSize100k > (BZ_HDR_0 + 150 /* 9 */)) { 
209 | 	      printf("[DEBUG] Error because of blockSize100k and BZ_HDR_0 check\n");
210 | 	      RETURN(BZ_DATA_ERROR_MAGIC);
211 |       }
212 |       s->blockSize100k -= BZ_HDR_0;
213 | 
214 |       if (s->smallDecompress) {
215 |          s->ll16 = (UInt16 *) BZALLOC( s->blockSize100k * 100000 * sizeof(UInt16) );
216 |          s->ll4  = (UChar *)BZALLOC( 
217 |                       ((1 + s->blockSize100k * 100000) >> 1) * sizeof(UChar) 
218 |                    );
219 |          if (s->ll16 == NULL || s->ll4 == NULL) RETURN(BZ_MEM_ERROR);
220 |       } else {
221 |          s->tt  = (UInt32 *) BZALLOC( s->blockSize100k * 100000 * sizeof(Int32) );
222 |          if (s->tt == NULL) RETURN(BZ_MEM_ERROR);
223 |       }
224 | 
225 |       #ifdef PRINT_DEBUG
226 |       printf("before start header checks\n");
227 |       #endif
228 |      
229 |       GET_UCHAR(BZ_X_BLKHDR_1, uc);
230 | 
231 |       if (uc == 0x17) goto endhdr_2;
232 |       if (uc != 0x31) RETURN(BZ_DATA_ERROR);
233 |       GET_UCHAR(BZ_X_BLKHDR_2, uc);
234 |       if (uc != 0x41) RETURN(BZ_DATA_ERROR);
235 |       GET_UCHAR(BZ_X_BLKHDR_3, uc);
236 |       if (uc != 0x59) RETURN(BZ_DATA_ERROR);
237 |       GET_UCHAR(BZ_X_BLKHDR_4, uc);
238 |       if (uc != 0x26) RETURN(BZ_DATA_ERROR);
239 |       GET_UCHAR(BZ_X_BLKHDR_5, uc);
240 |       if (uc != 0x53) RETURN(BZ_DATA_ERROR);
241 |       GET_UCHAR(BZ_X_BLKHDR_6, uc);
242 |       if (uc != 0x59) RETURN(BZ_DATA_ERROR);
243 |      #ifdef PRINT_DEBUG
244 |      printf("after start header checks\n");
245 |      #endif
246 |       s->currBlockNo++;
247 |       if (s->verbosity >= 2)
248 |          VPrintf1 ( "\n    [%d: huff+mtf ", s->currBlockNo );
249 |  
250 |      #ifdef PRINT_DEBUG
251 |      printf("before storedBlockCRC\n");
252 |      #endif
253 |       s->storedBlockCRC = 0;
254 |       GET_UCHAR(BZ_X_BCRC_1, uc);
255 |       s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
256 |       GET_UCHAR(BZ_X_BCRC_2, uc);
257 |       s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
258 |       GET_UCHAR(BZ_X_BCRC_3, uc);
259 |       s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
260 |       GET_UCHAR(BZ_X_BCRC_4, uc);
261 |       s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
262 | 
263 |       GET_BITS(BZ_X_RANDBIT, s->blockRandomised, 1);
264 | 
265 |       s->origPtr = 0;
266 |       GET_UCHAR(BZ_X_ORIGPTR_1, uc);
267 |       s->origPtr = (s->origPtr << 8) | ((Int32)uc);
268 |       GET_UCHAR(BZ_X_ORIGPTR_2, uc);
269 |       s->origPtr = (s->origPtr << 8) | ((Int32)uc);
270 |       GET_UCHAR(BZ_X_ORIGPTR_3, uc);
271 |       s->origPtr = (s->origPtr << 8) | ((Int32)uc);
272 | 
273 |       if (s->origPtr < 0)
274 |          RETURN(BZ_DATA_ERROR);
275 |       if (s->origPtr > 10 + (100000 * s->blockSize100k)) 
276 |          RETURN(BZ_DATA_ERROR);
277 | 
278 |       #ifdef PRINT_DEBUG
279 |       printf("after origPtr checks\n");
280 |       #endif
281 | 
282 |       /*--- Receive the mapping table ---*/
283 |       for (i = 0; i < 16; i++) {
284 |          GET_BIT(BZ_X_MAPPING_1, uc);
285 |          if (uc == 1) 
286 |             s->inUse16[i] = True; else 
287 |             s->inUse16[i] = False;
288 |       }
289 | 
290 |       for (i = 0; i < 256; i++) s->inUse[i] = False;
291 | 
292 |       for (i = 0; i < 16; i++)
293 |          if (s->inUse16[i])
294 |             for (j = 0; j < 16; j++) {
295 |                GET_BIT(BZ_X_MAPPING_2, uc);
296 |                if (uc == 1) s->inUse[i * 16 + j] = True;
297 |             }
298 |       makeMaps_d ( s );
299 |       if (s->nInUse == 0) RETURN(BZ_DATA_ERROR);
300 |       alphaSize = s->nInUse+2;
301 | 
302 |       #ifdef PRINT_DEBUG
303 |       printf("after nInUse checks\n");
304 |       #endif
305 |       
306 |       /*--- Now the selectors ---*/
307 |       // printf("before GET_BITS nGroups %d\n",nGroups);
308 |       GET_BITS(BZ_X_SELECTOR_1, nGroups, 5 /* 3 */); //changed @aditya
309 |    
310 |      // printf("before nGroups check %d\n",nGroups);
311 | 
312 |       if (nGroups < 2 || nGroups > 6) RETURN(BZ_DATA_ERROR);
313 |       
314 |       #ifdef PRINT_DEBUG
315 |       printf("after nGroups check %d\n",nGroups);
316 |       #endif
317 | 
318 |       GET_BITS(BZ_X_SELECTOR_2, nSelectors, 17 /* 15 */); // changed @aditya
319 |       if (nSelectors < 1) RETURN(BZ_DATA_ERROR);
320 |       
321 |       #ifdef PRINT_DEBUG
322 |       printf("after nSelectors check nSelectors %d\n",nSelectors);
323 |       #endif
324 | 
325 |       for (i = 0; i < nSelectors; i++) {
326 |          j = 0;
327 |          while (True) {
328 |             GET_BIT(BZ_X_SELECTOR_3, uc);
329 |             if (uc == 0) break;
330 |             j++;
331 |             if (j >= nGroups) { 
332 |                     #ifdef PRINT_DEBUG
333 | 		    printf("data error for selector %d and ngroups %d, j = %d\n",i,nGroups,j);
334 |                     #endif
335 | 		    RETURN(BZ_DATA_ERROR);
336 | 	    }
337 |          }
338 |          s->selectorMtf[i] = j;
339 |       }
340 |       #ifdef PRINT_DEBUG
341 |       printf("before undo mtf\n");
342 |       #endif
343 | 
344 |       /*--- Undo the MTF values for the selectors. ---*/
345 |       {
346 |          UChar pos[BZ_N_GROUPS], tmp, v;
347 |          for (v = 0; v < nGroups; v++) pos[v] = v;
348 |    
349 |          for (i = 0; i < nSelectors; i++) {
350 |             v = s->selectorMtf[i];
351 |             tmp = pos[v];
352 |             while (v > 0) { pos[v] = pos[v-1]; v--; }
353 |             pos[0] = tmp;
354 |             s->selector[i] = tmp;
355 |          }
356 |       }
357 | 
358 |       #ifdef PRINT_DEBUG
359 |       printf("before coding tables\n");
360 |       #endif 
361 |       
362 |       /*--- Now the coding tables ---*/
363 |       for (t = 0; t < nGroups; t++) {
364 |          GET_BITS(BZ_X_CODING_1, curr, 5);
365 |          for (i = 0; i < alphaSize; i++) {
366 |             while (True) {
367 |                if (curr < 1 || curr > 20) RETURN(BZ_DATA_ERROR);
368 |                GET_BIT(BZ_X_CODING_2, uc);
369 |                if (uc == 0) break;
370 |                GET_BIT(BZ_X_CODING_3, uc);
371 |                if (uc == 0) curr++; else curr--;
372 |             }
373 |             s->len[t][i] = curr;
374 |          }
375 |       }
376 | 
377 |       #ifdef PRINT_DEBUG
378 |       printf("after coding tables s->blockSize100k %d\n",s->blockSize100k);
379 |       #endif
380 | 
381 |       /*--- Create the Huffman decoding tables ---*/
382 |       for (t = 0; t < nGroups; t++) {
383 |          minLen = 32;
384 |          maxLen = 0;
385 |          for (i = 0; i < alphaSize; i++) {
386 |             if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
387 |             if (s->len[t][i] < minLen) minLen = s->len[t][i];
388 |          }
389 |          BZ2_hbCreateDecodeTables ( 
390 |             &(s->limit[t][0]), 
391 |             &(s->base[t][0]), 
392 |             &(s->perm[t][0]), 
393 |             &(s->len[t][0]),
394 |             minLen, maxLen, alphaSize
395 |          );
396 |          s->minLens[t] = minLen;
397 |       }
398 | 
399 |       /*--- Now the MTF values ---*/
400 | 
401 |       EOB      = s->nInUse+1;
402 |       nblockMAX = 100000 * s->blockSize100k;
403 |       groupNo  = -1;
404 |       groupPos = 0;
405 | 
406 |       for (i = 0; i <= 255; i++) s->unzftab[i] = 0;
407 | 
408 |       /*-- MTF init --*/
409 |       {
410 |          Int32 ii, jj, kk;
411 |          kk = MTFA_SIZE-1;
412 |          for (ii = 256 / MTFL_SIZE - 1; ii >= 0; ii--) {
413 |             for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
414 |                s->mtfa[kk] = (UChar)(ii * MTFL_SIZE + jj);
415 |                kk--;
416 |             }
417 |             s->mtfbase[ii] = kk + 1;
418 |          }
419 |       }
420 |       /*-- end MTF init --*/
421 | 
422 |       nblock = 0;
423 |       GET_MTF_VAL(BZ_X_MTF_1, BZ_X_MTF_2, nextSym);
424 | 
425 |       #ifdef PRINT_DEBUG
426 |       printf("before while true\n");
427 |       #endif
428 | 
429 |       while (True) {
430 |          if (nextSym == EOB) { 
431 |                  #ifdef PRINT_DEBUG
432 | 		 printf("found EOB\n");
433 | 		 #endif
434 | 		 break;
435 | 	 }
436 | 
437 |          if (nextSym == BZ_RUNA || nextSym == BZ_RUNB) {
438 | 
439 |             es = -1;
440 |             N = 1;
441 |             do {
442 |                /* Check that N doesn't get too big, so that es doesn't
443 |                   go negative.  The maximum value that can be
444 |                   RUNA/RUNB encoded is equal to the block size (post
445 |                   the initial RLE), viz, 900k, so bounding N at 2
446 |                   million should guard against overflow without
447 |                   rejecting any legitimate inputs. */
448 |                if (N >= 2*1024*1024) RETURN(BZ_DATA_ERROR);
449 |                if (nextSym == BZ_RUNA) es = es + (0+1) * N; else
450 |                if (nextSym == BZ_RUNB) es = es + (1+1) * N;
451 |                N = N * 2;
452 |                GET_MTF_VAL(BZ_X_MTF_3, BZ_X_MTF_4, nextSym);
453 |             }
454 |                while (nextSym == BZ_RUNA || nextSym == BZ_RUNB);
455 | 
456 |             es++;
457 |             uc = s->seqToUnseq[ s->mtfa[s->mtfbase[0]] ];
458 |             s->unzftab[uc] += es;
459 | 
460 |             if (s->smallDecompress)
461 |                while (es > 0) {
462 |                   if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
463 |                   s->ll16[nblock] = (UInt16)uc;
464 |                   nblock++;
465 |                   es--;
466 |                }
467 |             else
468 |                while (es > 0) {
469 |                   if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
470 |                   s->tt[nblock] = (UInt32)uc;
471 |                   nblock++;
472 |                   es--;
473 |                };
474 | 
475 |             continue;
476 | 
477 |          } else {
478 | 
479 |             if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
480 | 
481 |             /*-- uc = MTF ( nextSym-1 ) --*/
482 |             {
483 |                Int32 ii, jj, kk, pp, lno, off;
484 |                UInt32 nn;
485 |                nn = (UInt32)(nextSym - 1);
486 | 
487 |                if (nn < MTFL_SIZE) {
488 |                   /* avoid general-case expense */
489 |                   pp = s->mtfbase[0];
490 |                   uc = s->mtfa[pp+nn];
491 |                   while (nn > 3) {
492 |                      Int32 z = pp+nn;
493 |                      s->mtfa[(z)  ] = s->mtfa[(z)-1];
494 |                      s->mtfa[(z)-1] = s->mtfa[(z)-2];
495 |                      s->mtfa[(z)-2] = s->mtfa[(z)-3];
496 |                      s->mtfa[(z)-3] = s->mtfa[(z)-4];
497 |                      nn -= 4;
498 |                   }
499 |                   while (nn > 0) { 
500 |                      s->mtfa[(pp+nn)] = s->mtfa[(pp+nn)-1]; nn--; 
501 |                   };
502 |                   s->mtfa[pp] = uc;
503 |                } else { 
504 |                   /* general case */
505 |                   lno = nn / MTFL_SIZE;
506 |                   off = nn % MTFL_SIZE;
507 |                   pp = s->mtfbase[lno] + off;
508 |                   uc = s->mtfa[pp];
509 |                   while (pp > s->mtfbase[lno]) { 
510 |                      s->mtfa[pp] = s->mtfa[pp-1]; pp--; 
511 |                   };
512 |                   s->mtfbase[lno]++;
513 |                   while (lno > 0) {
514 |                      s->mtfbase[lno]--;
515 |                      s->mtfa[s->mtfbase[lno]] 
516 |                         = s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1];
517 |                      lno--;
518 |                   }
519 |                   s->mtfbase[0]--;
520 |                   s->mtfa[s->mtfbase[0]] = uc;
521 |                   if (s->mtfbase[0] == 0) {
522 |                      kk = MTFA_SIZE-1;
523 |                      for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) {
524 |                         for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
525 |                            s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj];
526 |                            kk--;
527 |                         }
528 |                         s->mtfbase[ii] = kk + 1;
529 |                      }
530 |                   }
531 |                }
532 |             }
533 |             /*-- end uc = MTF ( nextSym-1 ) --*/
534 | 
535 |             s->unzftab[s->seqToUnseq[uc]]++;
536 |             if (s->smallDecompress)
537 |                s->ll16[nblock] = (UInt16)(s->seqToUnseq[uc]); else
538 |                s->tt[nblock]   = (UInt32)(s->seqToUnseq[uc]);
539 |             nblock++;
540 | 
541 |             GET_MTF_VAL(BZ_X_MTF_5, BZ_X_MTF_6, nextSym);
542 |             continue;
543 |          }
544 |       }
545 |       #ifdef PRINT_DEBUG
546 |       printf("after while true\n");
547 |       #endif
548 | 
549 |       /* Now we know what nblock is, we can do a better sanity
550 |          check on s->origPtr.
551 |       */
552 |       #ifdef PRINT_DEBUG
553 |       printf("Reached here s->origPtr check\n");
554 |       #endif
555 | 
556 |       if (s->origPtr < 0 || s->origPtr >= nblock)
557 |          RETURN(BZ_DATA_ERROR);
558 | 
559 |       /*-- Set up cftab to facilitate generation of T^(-1) --*/
560 |       /* Check: unzftab entries in range. */
561 |       for (i = 0; i <= 255; i++) {
562 |          if (s->unzftab[i] < 0 || s->unzftab[i] > nblock)
563 |             RETURN(BZ_DATA_ERROR);
564 |       }
565 |       #ifdef PRINT_DEBUG
566 |       printf("after unzftab check\n");
567 |       #endif
568 | 
569 |       /* Actually generate cftab. */
570 |       s->cftab[0] = 0;
571 |       for (i = 1; i <= 256; i++) s->cftab[i] = s->unzftab[i-1];
572 |       for (i = 1; i <= 256; i++) s->cftab[i] += s->cftab[i-1];
573 |       /* Check: cftab entries in range. */
574 |       for (i = 0; i <= 256; i++) {
575 |          if (s->cftab[i] < 0 || s->cftab[i] > nblock) {
576 |             /* s->cftab[i] can legitimately be == nblock */
577 |             RETURN(BZ_DATA_ERROR);
578 |          }
579 |       }
580 |       /* Check: cftab entries non-descending. */
581 |       for (i = 1; i <= 256; i++) {
582 |          if (s->cftab[i-1] > s->cftab[i]) {
583 |             RETURN(BZ_DATA_ERROR);
584 |          }
585 |       }
586 | 
587 |       s->state_out_len = 0;
588 |       s->state_out_ch  = 0;
589 |       BZ_INITIALISE_CRC ( s->calculatedBlockCRC );
590 |       s->state = BZ_X_OUTPUT;
591 |       if (s->verbosity >= 2) VPrintf0 ( "rt+rld" );
592 | 
593 |       if (s->smallDecompress) {
594 | 
595 |          /*-- Make a copy of cftab, used in generation of T --*/
596 |          for (i = 0; i <= 256; i++) s->cftabCopy[i] = s->cftab[i];
597 | 
598 |          /*-- compute the T vector --*/
599 |          for (i = 0; i < nblock; i++) {
600 |             uc = (UChar)(s->ll16[i]);
601 |             SET_LL(i, s->cftabCopy[uc]);
602 |             s->cftabCopy[uc]++;
603 |          }
604 | 
605 |          /*-- Compute T^(-1) by pointer reversal on T --*/
606 |          i = s->origPtr;
607 |          j = GET_LL(i);
608 |          do {
609 |             Int32 tmp = GET_LL(j);
610 |             SET_LL(j, i);
611 |             i = j;
612 |             j = tmp;
613 |          }
614 |             while (i != s->origPtr);
615 | 
616 |          s->tPos = s->origPtr;
617 |          s->nblock_used = 0;
618 |          if (s->blockRandomised) {
619 |             BZ_RAND_INIT_MASK;
620 |             BZ_GET_SMALL(s->k0); s->nblock_used++;
621 |             BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK; 
622 |          } else {
623 |             BZ_GET_SMALL(s->k0); s->nblock_used++;
624 |          }
625 | 
626 |       } else {
627 | 
628 |          /*-- compute the T^(-1) vector --*/
629 |          for (i = 0; i < nblock; i++) {
630 |             uc = (UChar)(s->tt[i] & 0xff);
631 |             s->tt[s->cftab[uc]] |= (i << 8);
632 |             s->cftab[uc]++;
633 |          }
634 | 
635 |          s->tPos = s->tt[s->origPtr] >> 8;
636 |          s->nblock_used = 0;
637 |          if (s->blockRandomised) {
638 |             BZ_RAND_INIT_MASK;
639 |             BZ_GET_FAST(s->k0); s->nblock_used++;
640 |             BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK; 
641 |          } else {
642 |             BZ_GET_FAST(s->k0); s->nblock_used++;
643 |          }
644 | 
645 |       }
646 | 
647 |       #ifdef PRINT_DEBUG
648 |       printf("Before return BZ_OK\n");
649 |       #endif
650 | 
651 |       RETURN(BZ_OK);
652 | 
653 | 
654 | 
655 |     endhdr_2:
656 | 
657 |       GET_UCHAR(BZ_X_ENDHDR_2, uc);
658 |       if (uc != 0x72) RETURN(BZ_DATA_ERROR);
659 |       GET_UCHAR(BZ_X_ENDHDR_3, uc);
660 |       if (uc != 0x45) RETURN(BZ_DATA_ERROR);
661 |       GET_UCHAR(BZ_X_ENDHDR_4, uc);
662 |       if (uc != 0x38) RETURN(BZ_DATA_ERROR);
663 |       GET_UCHAR(BZ_X_ENDHDR_5, uc);
664 |       if (uc != 0x50) RETURN(BZ_DATA_ERROR);
665 |       GET_UCHAR(BZ_X_ENDHDR_6, uc);
666 |       if (uc != 0x90) RETURN(BZ_DATA_ERROR);
667 | 
668 |       s->storedCombinedCRC = 0;
669 |       GET_UCHAR(BZ_X_CCRC_1, uc);
670 |       s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
671 |       GET_UCHAR(BZ_X_CCRC_2, uc);
672 |       s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
673 |       GET_UCHAR(BZ_X_CCRC_3, uc);
674 |       s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
675 |       GET_UCHAR(BZ_X_CCRC_4, uc);
676 |       s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
677 | 
678 |       s->state = BZ_X_IDLE;
679 |       RETURN(BZ_STREAM_END);
680 | 
681 |       default: AssertH ( False, 4001 );
682 |    }
683 | 
684 |    AssertH ( False, 4002 );
685 | 
686 |    save_state_and_return:
687 | 
688 |    s->save_i           = i;
689 |    s->save_j           = j;
690 |    s->save_t           = t;
691 |    s->save_alphaSize   = alphaSize;
692 |    s->save_nGroups     = nGroups;
693 |    s->save_nSelectors  = nSelectors;
694 |    s->save_EOB         = EOB;
695 |    s->save_groupNo     = groupNo;
696 |    s->save_groupPos    = groupPos;
697 |    s->save_nextSym     = nextSym;
698 |    s->save_nblockMAX   = nblockMAX;
699 |    s->save_nblock      = nblock;
700 |    s->save_es          = es;
701 |    s->save_N           = N;
702 |    s->save_curr        = curr;
703 |    s->save_zt          = zt;
704 |    s->save_zn          = zn;
705 |    s->save_zvec        = zvec;
706 |    s->save_zj          = zj;
707 |    s->save_gSel        = gSel;
708 |    s->save_gMinlen     = gMinlen;
709 |    s->save_gLimit      = gLimit;
710 |    s->save_gBase       = gBase;
711 |    s->save_gPerm       = gPerm;
712 | 
713 |    return retVal;   
714 | }
715 | 
716 | 
717 | /*-------------------------------------------------------------*/
718 | /*--- end                                      decompress.c ---*/
719 | /*-------------------------------------------------------------*/
720 | 


--------------------------------------------------------------------------------
/dlltest.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |    minibz2
  3 |       libbz2.dll test program.
  4 |       by Yoshioka Tsuneo (tsuneo@rr.iij4u.or.jp)
  5 |       This file is Public Domain.  Welcome any email to me.
  6 | 
  7 |    usage: minibz2 [-d] [-{1,2,..9}] [[srcfilename] destfilename]
  8 | */
  9 | 
 10 | #define BZ_IMPORT
 11 | #include <stdio.h>
 12 | #include <stdlib.h>
 13 | #include "bzlib.h"
 14 | #ifdef _WIN32
 15 | #include <io.h>
 16 | #endif
 17 | 
 18 | 
 19 | #ifdef _WIN32
 20 | 
 21 | #define BZ2_LIBNAME "libbz2-1.0.2.DLL" 
 22 | 
 23 | #include <windows.h>
 24 | static int BZ2DLLLoaded = 0;
 25 | static HINSTANCE BZ2DLLhLib;
 26 | int BZ2DLLLoadLibrary(void)
 27 | {
 28 |    HINSTANCE hLib;
 29 | 
 30 |    if(BZ2DLLLoaded==1){return 0;}
 31 |    hLib=LoadLibrary(BZ2_LIBNAME);
 32 |    if(hLib == NULL){
 33 |       fprintf(stderr,"Can't load %s\n",BZ2_LIBNAME);
 34 |       return -1;
 35 |    }
 36 |    BZ2_bzlibVersion=GetProcAddress(hLib,"BZ2_bzlibVersion");
 37 |    BZ2_bzopen=GetProcAddress(hLib,"BZ2_bzopen");
 38 |    BZ2_bzdopen=GetProcAddress(hLib,"BZ2_bzdopen");
 39 |    BZ2_bzread=GetProcAddress(hLib,"BZ2_bzread");
 40 |    BZ2_bzwrite=GetProcAddress(hLib,"BZ2_bzwrite");
 41 |    BZ2_bzflush=GetProcAddress(hLib,"BZ2_bzflush");
 42 |    BZ2_bzclose=GetProcAddress(hLib,"BZ2_bzclose");
 43 |    BZ2_bzerror=GetProcAddress(hLib,"BZ2_bzerror");
 44 | 
 45 |    if (!BZ2_bzlibVersion || !BZ2_bzopen || !BZ2_bzdopen
 46 |        || !BZ2_bzread || !BZ2_bzwrite || !BZ2_bzflush
 47 |        || !BZ2_bzclose || !BZ2_bzerror) {
 48 |       fprintf(stderr,"GetProcAddress failed.\n");
 49 |       return -1;
 50 |    }
 51 |    BZ2DLLLoaded=1;
 52 |    BZ2DLLhLib=hLib;
 53 |    return 0;
 54 | 
 55 | }
 56 | int BZ2DLLFreeLibrary(void)
 57 | {
 58 |    if(BZ2DLLLoaded==0){return 0;}
 59 |    FreeLibrary(BZ2DLLhLib);
 60 |    BZ2DLLLoaded=0;
 61 | }
 62 | #endif /* WIN32 */
 63 | 
 64 | void usage(void)
 65 | {
 66 |    puts("usage: minibz2 [-d] [-{1,2,..9}] [[srcfilename] destfilename]");
 67 | }
 68 | 
 69 | int main(int argc,char *argv[])
 70 | {
 71 |    int decompress = 0;
 72 |    int level = 9;
 73 |    char *fn_r = NULL;
 74 |    char *fn_w = NULL;
 75 | 
 76 | #ifdef _WIN32
 77 |    if(BZ2DLLLoadLibrary()<0){
 78 |       fprintf(stderr,"Loading of %s failed.  Giving up.\n", BZ2_LIBNAME);
 79 |       exit(1);
 80 |    }
 81 |    printf("Loading of %s succeeded.  Library version is %s.\n",
 82 |           BZ2_LIBNAME, BZ2_bzlibVersion() );
 83 | #endif
 84 |    while(++argv,--argc){
 85 |       if(**argv =='-' || **argv=='/'){
 86 |          char *p;
 87 | 
 88 |          for(p=*argv+1;*p;p++){
 89 |             if(*p=='d'){
 90 |                decompress = 1;
 91 |             }else if('1'<=*p && *p<='9'){
 92 |                level = *p - '0';
 93 |             }else{
 94 |                usage();
 95 |                exit(1);
 96 |             }
 97 |          }
 98 |       }else{
 99 |          break;
100 |       }
101 |    }
102 |    if(argc>=1){
103 |       fn_r = *argv;
104 |       argc--;argv++;
105 |    }else{
106 |       fn_r = NULL;
107 |    }
108 |    if(argc>=1){
109 |       fn_w = *argv;
110 |       argc--;argv++;
111 |    }else{
112 |       fn_w = NULL;
113 |    }
114 |    {
115 |       int len;
116 |       char buff[0x1000];
117 |       char mode[10];
118 | 
119 |       if(decompress){
120 |          BZFILE *BZ2fp_r = NULL;
121 |          FILE *fp_w = NULL;
122 | 
123 |          if(fn_w){
124 |             if((fp_w = fopen(fn_w,"wb"))==NULL){
125 |                printf("can't open [%s]\n",fn_w);
126 |                perror("reason:");
127 |                exit(1);
128 |             }
129 |          }else{
130 |             fp_w = stdout;
131 |          }
132 |          if((fn_r == NULL && (BZ2fp_r = BZ2_bzdopen(fileno(stdin),"rb"))==NULL)
133 |             || (fn_r != NULL && (BZ2fp_r = BZ2_bzopen(fn_r,"rb"))==NULL)){
134 |             printf("can't bz2openstream\n");
135 |             exit(1);
136 |          }
137 |          while((len=BZ2_bzread(BZ2fp_r,buff,0x1000))>0){
138 |             fwrite(buff,1,len,fp_w);
139 |          }
140 |          BZ2_bzclose(BZ2fp_r);
141 |          if(fp_w != stdout) fclose(fp_w);
142 |       }else{
143 |          BZFILE *BZ2fp_w = NULL;
144 |          FILE *fp_r = NULL;
145 | 
146 |          if(fn_r){
147 |             if((fp_r = fopen(fn_r,"rb"))==NULL){
148 |                printf("can't open [%s]\n",fn_r);
149 |                perror("reason:");
150 |                exit(1);
151 |             }
152 |          }else{
153 |             fp_r = stdin;
154 |          }
155 |          mode[0]='w';
156 |          mode[1] = '0' + level;
157 |          mode[2] = '\0';
158 | 
159 |          if((fn_w == NULL && (BZ2fp_w = BZ2_bzdopen(fileno(stdout),mode))==NULL)
160 |             || (fn_w !=NULL && (BZ2fp_w = BZ2_bzopen(fn_w,mode))==NULL)){
161 |             printf("can't bz2openstream\n");
162 |             exit(1);
163 |          }
164 |          while((len=fread(buff,1,0x1000,fp_r))>0){
165 |             BZ2_bzwrite(BZ2fp_w,buff,len);
166 |          }
167 |          BZ2_bzclose(BZ2fp_w);
168 |          if(fp_r!=stdin)fclose(fp_r);
169 |       }
170 |    }
171 | #ifdef _WIN32
172 |    BZ2DLLFreeLibrary();
173 | #endif
174 |    return 0;
175 | }
176 | 


--------------------------------------------------------------------------------
/gen.c:
--------------------------------------------------------------------------------
 1 | #include<stdio.h>
 2 | #include<malloc.h>
 3 | 
 4 | int main() { 
 5 | 	int a = 1; 
 6 | 	FILE *fp = fopen("out.txt","w");
 7 | 	printf("%d\n",a);
 8 | 	fprintf(fp,"%d",a);
 9 | 	return 0;
10 | }
11 | 


--------------------------------------------------------------------------------
/gpuBWTSort.cu:
--------------------------------------------------------------------------------
  1 | 
  2 | #define BZ_GPU
  3 | #include "bzlib_private.h"
  4 | #include <stdio.h>
  5 | #include <algorithm>
  6 | #include <fstream>
  7 | #include <iterator>
  8 | #include <iostream>
  9 | #include <thrust/host_vector.h>
 10 | #include <thrust/device_vector.h>
 11 | #include <thrust/sort.h>
 12 | #include <thrust/scan.h>
 13 | #include <thrust/scatter.h>
 14 | #include <thrust/iterator/zip_iterator.h>
 15 | #include <cuda.h>
 16 | #include <time.h>
 17 | 
 18 | #define MAX_THREADS_PER_BLOCK 1024
 19 | 
 20 | //#define __DEBUG__
 21 | 
 22 | __device__ unsigned int *global_int_original_string = NULL; 
 23 | __device__ unsigned int *global_first_sort_rank = NULL;
 24 | 
 25 | class Bar
 26 | {
 27 | 	unsigned int *functor_string;
 28 | 	unsigned int *functor_first_sort_rank;
 29 | 	const int currentOffset;
 30 | 	const int currentLength;  
 31 | 	const int totalLength; 
 32 | 
 33 | 	public:
 34 | 		__host__
 35 | 		Bar(int _currentOffset, int _currentLength, int _totalLength):functor_string(global_int_original_string), functor_first_sort_rank(global_first_sort_rank), currentOffset(_currentOffset), currentLength(_currentLength), totalLength(_totalLength) { }
 36 | 	
 37 | 		inline __device__
 38 | 		bool operator() (thrust::tuple< unsigned int, unsigned int > t1, thrust::tuple< unsigned int, unsigned int > t2) { 
 39 | 			int seg1 = thrust::get<0>(t1); 
 40 | 			int seg2 = thrust::get<0>(t2);
 41 | 			
 42 | 			if(seg1 > seg2) return false;
 43 | 			if(seg1 < seg2) return true;
 44 | 			
 45 | 			int ind1 = thrust::get<1>(t1);
 46 | 			int ind2 = thrust::get<1>(t2);
 47 | 			int count = 0;
 48 | 			
 49 | 			while( count < currentLength) {
 50 | 				int newInd1 = ( ind1 + currentOffset + count ) %totalLength;
 51 | 				int newInd2 = ( ind2 + currentOffset + count ) %totalLength;
 52 | 				unsigned int a1 = (functor_string[newInd1]); 
 53 | 				unsigned int a2 = (functor_string[newInd2]);
 54 | 				int a = a1 - a2; 
 55 | 				count+=4;
 56 | 				if( a == 0 ) { continue; }
 57 | 				else {
 58 | 					if((a1>>24) > (a2>>24)) { return false;}
 59 | 					else if((a1>>24) < (a2>>24)) {return true;} 
 60 | 					else if((a1>>16) > (a2>>16)) { return false;}
 61 | 					else if((a1>>16) < (a2>>16)) {return true;} 
 62 | 					else if((a1>>8) > (a2>>8)) { return false;}
 63 | 					else if((a1>>8) < (a2>>8)) {return true;} 
 64 | 					else if((a1) > (a2)) { return false;}
 65 | 					else if((a1) < (a2)) {return true;} 
 66 | 				}
 67 | 			}
 68 | 			return false;
 69 | 		}
 70 | };
 71 | 
 72 | __global__ void pack4CharsToInt(unsigned char *input_string, unsigned int *static_input_string, unsigned int *output_string, unsigned int *output_index, unsigned int *d_array_first_sort_rank, int length) { 
 73 | 	
 74 | 	int threadID = (blockIdx.x * blockDim.x) +  threadIdx.x;
 75 | 
 76 | 	if(threadID > length) return;
 77 | 
 78 | 	d_array_first_sort_rank[threadID] = 0;
 79 | 	if(threadID % 3 != 0) { 
 80 | 		int mult3 =(int) ((1.0*threadID)/3);
 81 | 	
 82 | 		int newIndex = mult3*2 + ((threadID % 3) - 1);
 83 | 
 84 | 		output_index[newIndex] = threadID;
 85 | 	
 86 | 		output_string[newIndex] = (((unsigned int)input_string[threadID]) << 24) + 
 87 | 					  (((unsigned int)input_string[(threadID+1) % length]) << 16) +  
 88 | 	                                  (((unsigned int)input_string[(threadID+2) % length]) << 8) + 
 89 | 					  (((unsigned int)input_string[(threadID+3) % length]));
 90 | 	}
 91 | 
 92 | 	static_input_string[threadID] =  (((unsigned int)input_string[threadID]) << 24) + 
 93 | 					 (((unsigned int)input_string[(threadID+1) % length]) << 16) +  
 94 | 	                                 (((unsigned int)input_string[(threadID+2) % length]) << 8) + 
 95 | 					 (((unsigned int)input_string[(threadID+3) % length]));
 96 | 
 97 | }
 98 | 
 99 | __global__ void findSuccessor( unsigned int *d_array_original_string, unsigned int *d_array_string, unsigned int *d_array_index, unsigned int *d_array_segment, unsigned int *d_array_string_out, unsigned int *d_array_segment_out, int length, int originalLength, int sequenceCount) {
100 | 
101 | 	int threadID = (blockIdx.x * blockDim.x) +  threadIdx.x;
102 | 	if(threadID > length) return;
103 | 	d_array_segment_out[threadID] = 0; 
104 | 	if(threadID > 0) { 
105 | 		if(((d_array_string[threadID]!=d_array_string[threadID-1]) || (d_array_segment[threadID]!=d_array_segment[threadID-1])) ) { 
106 | 			d_array_segment_out[threadID] = 1; 
107 | 		}
108 | 	}
109 | 	int successorIndex = (d_array_index[threadID] + sequenceCount + 4)%originalLength; 
110 | 	d_array_string_out[threadID] = d_array_original_string[successorIndex];
111 | }
112 | 
113 | 
114 | __global__ void  eliminateSizeOneKernel1(unsigned int *d_array_original_string, unsigned int *d_array_final_index, unsigned int *d_array_index, unsigned int *d_array_static_index, unsigned int *d_array_map, unsigned int *d_array_stencil, unsigned int *d_array_first_sort_rank, int sequenceCount, int length, int originalLength) {
115 | 
116 | 	int threadID = (blockIdx.x * blockDim.x) +  threadIdx.x;
117 | 
118 | 	if(threadID >= length) return;
119 | 
120 | 	d_array_stencil[threadID] = 1;
121 | 
122 |         if(threadID == 0 && (d_array_map[threadID + 1] == 1)) { 
123 | 		d_array_stencil[threadID] = 0; 
124 | 	} else if( (threadID == (length-1)) && (d_array_map[threadID] == 1) ) {
125 | 		d_array_stencil[threadID] = 0;  
126 | 	} else if( (d_array_map[threadID] == 1) && (d_array_map[threadID + 1] == 1)) { 
127 | 		d_array_stencil[threadID] = 0; 
128 | 	}
129 | 
130 | 	if(d_array_stencil[threadID] == 0) {
131 | 		int finalIndex = d_array_index[threadID];
132 | 		d_array_final_index[ d_array_static_index[threadID] ] = finalIndex;
133 | 		d_array_first_sort_rank[finalIndex] = d_array_static_index[threadID];
134 | 	}
135 | }
136 | 
137 | __global__ void updateSegments( unsigned int *d_int_array_string, unsigned int *d_array_index, unsigned int *d_array_segment, unsigned int *d_array_segment_out, int size, int offset, int length, int originalLength) {
138 | 
139 | 	int threadID = (blockIdx.x * blockDim.x) +  threadIdx.x;
140 | 	if(threadID > size) return;
141 | 	d_array_segment_out[threadID] = 0; 
142 | 	if(threadID > 0) { 
143 | 		if((d_array_segment[threadID - 1] != d_array_segment[threadID])) { 
144 | 			d_array_segment_out[threadID] = 1;
145 | 			return; 
146 | 		}
147 | 		int count = 0;
148 | 		unsigned int ind1 = d_array_index[threadID - 1]; 
149 | 		unsigned int ind2 = d_array_index[threadID]; 
150 | 		while(count < length) {
151 | 			if( d_int_array_string[(ind1 + offset + count) % originalLength] != d_int_array_string[(ind2 + offset + count) % originalLength]) { 
152 | 				d_array_segment_out[threadID] = 1; 
153 | 				break;
154 | 			}
155 | 			count+=4; 
156 | 		} 
157 | 	}
158 | 	return;
159 | }
160 | 
161 | 
162 | __global__ void  eliminateSizeOne(unsigned int *d_array_final_index, unsigned int *d_array_index, unsigned int *d_array_static_index, unsigned int *d_array_map, unsigned int *d_array_stencil, unsigned int *d_array_first_sort_rank, int size, int originalLength) {
163 | 
164 | 	int threadID = (blockIdx.x * blockDim.x) +  threadIdx.x;
165 | 
166 | 	if(threadID >= size) return;
167 | 
168 | 	d_array_stencil[threadID] = 1;
169 | 
170 | 	if(threadID == 0 && (d_array_map[threadID + 1] == 1)) { 
171 | 		d_array_stencil[threadID] = 0; 
172 | 	} else if( (threadID == (size-1)) && (d_array_map[threadID] == 1) ) {
173 | 		d_array_stencil[threadID] = 0;  
174 | 	} else if( (d_array_map[threadID] == 1) && (d_array_map[threadID + 1] == 1)) { 
175 | 		d_array_stencil[threadID] = 0; 
176 | 	}
177 | 
178 | 	if(d_array_stencil[threadID] == 0) {
179 | 		int finalIndex = d_array_index[threadID];
180 | 		d_array_final_index[ d_array_static_index[threadID] ] = finalIndex;
181 | 		d_array_first_sort_rank[finalIndex] = d_array_static_index[threadID]; 
182 | 	}
183 | }
184 | 
185 | __global__ void createSecondSort(unsigned char *d_original_string_in, unsigned int *d_array_first_sort_rank, unsigned char *d_array_second_sort, unsigned int *d_array_second_sort_rank, unsigned int *d_array_second_sort_index, int secondSortLength) { 
186 | 
187 | 	int threadID = (blockIdx.x * blockDim.x) +  threadIdx.x;
188 | 
189 | 	if(threadID > secondSortLength) return;
190 | 
191 | 	int mult3 = threadID*3;
192 | 
193 | 	d_array_second_sort[threadID] = d_original_string_in[mult3];
194 | 	d_array_second_sort_rank[threadID] = d_array_first_sort_rank[mult3+1];
195 | 	d_array_second_sort_index[threadID] = mult3;
196 | }
197 | 
198 | void gpuSetDevice(int devId) { 
199 | 	cudaSetDevice(devId);
200 | 	return;
201 | }
202 | int gpuBlockSort(UChar *block, UInt32 *order, UInt32 *orderFirstSort, UInt32 *orderSecondSort, UInt32 *orderFirstSortRank, Int32 blockSize, Int32* sortingDepth) { 
203 | 
204 | 	int limit = 64; 
205 | 	int length = blockSize;
206 | 	int originalLength = blockSize; 
207 | 
208 | 
209 | 	cudaMalloc((unsigned int **)&global_int_original_string, sizeof(unsigned int)*originalLength);
210 | 	cudaMalloc((unsigned int **)&global_first_sort_rank, sizeof(unsigned int)*originalLength);
211 | 
212 | 	unsigned char *d_original_string_in; 
213 | 	cudaMalloc((unsigned char **)&d_original_string_in, sizeof(unsigned char)*originalLength);
214 | 	cudaMemcpy(d_original_string_in, block, sizeof(unsigned char)*originalLength, cudaMemcpyHostToDevice); 
215 | 	
216 | 	int numBlocks1 = 1;
217 | 	int numThreadsPerBlock1 = originalLength/numBlocks1;
218 | 
219 | 	if(numThreadsPerBlock1 > MAX_THREADS_PER_BLOCK) { 
220 | 		numBlocks1 = (int)ceil(originalLength/(float)MAX_THREADS_PER_BLOCK);
221 | 		numThreadsPerBlock1 = MAX_THREADS_PER_BLOCK;
222 | 	}
223 | 	dim3 grid1(numBlocks1, 1, 1);
224 | 	dim3 threads1(numThreadsPerBlock1, 1, 1); 
225 | 
226 | 
227 | 	int firstSortLength =  2*((originalLength-1)/3) + ((originalLength-1)%3);
228 | 	int secondSortLength = originalLength - firstSortLength;
229 | 
230 | 	int includeLast = 0;
231 | 	if(originalLength % 3 == 1) { 
232 | 		includeLast = 1;
233 | 		firstSortLength++;
234 | 		secondSortLength--;
235 | 	}
236 | 
237 |   	thrust::device_vector<unsigned int> d_stencil(firstSortLength, 0);
238 | 	thrust::device_vector<unsigned int> d_index(firstSortLength);
239 | 	thrust::device_vector<unsigned int> d_final_index(firstSortLength);
240 | 
241 | 	unsigned int *d_array_index_out = thrust::raw_pointer_cast(&d_index[0]);
242 | 	unsigned int *d_array_string_out = thrust::raw_pointer_cast(&d_stencil[0]);
243 | 
244 | 	cudaDeviceSynchronize();
245 | 	pack4CharsToInt<<<grid1, threads1, 0>>>(d_original_string_in, global_int_original_string, d_array_string_out, d_array_index_out, global_first_sort_rank, originalLength);
246 | 	cudaDeviceSynchronize();
247 | 
248 | 	if(includeLast == 1) { 
249 | 		int lastIndex = originalLength - 1;
250 | 		*(d_index.end() - 1) = lastIndex;
251 | 		*(d_stencil.end() - 1) =  (((unsigned int)block[lastIndex]) << 24) + 
252 | 				  (((unsigned int)block[(lastIndex+1) % length ]) << 16) +  
253 |                                   (((unsigned int)block[(lastIndex+2) % length ]) << 8) + 
254 | 				  (((unsigned int)block[(lastIndex+3) % length ]));
255 | 
256 | 	}
257 | 
258 | 	length = firstSortLength;
259 | 
260 | #ifdef __DEBUG__
261 | 	printf("First sorting problem length : %d\n", firstSortLength);
262 | 	thrust::device_vector<unsigned int>::iterator d_index_itr = d_index.begin();
263 | 	thrust::device_vector<unsigned int>::iterator d_stencil_itr = d_stencil.begin();
264 | 
265 | 	for(d_index_itr = d_index.begin(); d_index_itr != d_index.end(); ++d_index_itr) { 
266 | 		std::cout << "( " << *d_index_itr << " , " << *d_stencil_itr << " ) ";
267 | 		++d_stencil_itr;
268 | 	}
269 | 	std::cout << std::endl; 
270 | #endif
271 | 
272 | 
273 | 	thrust::device_vector<unsigned int> d_string(length); 
274 | 	thrust::copy(d_stencil.begin(), d_stencil.begin() + length, d_string.begin());
275 | 
276 | 	thrust::device_vector<unsigned int> d_static_index(length);
277 | 	thrust::sequence(d_static_index.begin(), d_static_index.end());
278 | 
279 | 	thrust::device_vector<unsigned int> d_segment(length, 0);
280 | 	thrust::device_vector<unsigned int> d_map(length, 0);
281 |         thrust::device_vector<unsigned int> d_integer_arr(length, 0);
282 | 
283 | 
284 | 
285 | 	int sequenceCount = 0;
286 | 
287 | 	for(sequenceCount=0; sequenceCount <= limit; sequenceCount+=4) { 
288 | 		//Changed to sort by key, this ignores the index sorting
289 | 
290 | 		thrust::sort_by_key(
291 | 				thrust::make_zip_iterator( thrust::make_tuple(d_segment.begin(), d_string.begin())),
292 | 				thrust::make_zip_iterator( thrust::make_tuple(d_segment.begin() + length, d_string.begin() + length)),
293 | 				d_index.begin()
294 | 			); 
295 |  
296 |  
297 | 		unsigned int *d_array_string = thrust::raw_pointer_cast(&d_string[0]); 
298 | 		unsigned int *d_array_index = thrust::raw_pointer_cast(&d_index[0]);
299 | 		unsigned int *d_array_segment = thrust::raw_pointer_cast(&d_segment[0]); 
300 |  	  	unsigned int *d_array_static_index = thrust::raw_pointer_cast(&d_static_index[0]); 
301 |           	unsigned int *d_array_map = thrust::raw_pointer_cast(&d_map[0]); 
302 | 		unsigned int *d_array_stencil = thrust::raw_pointer_cast(&d_stencil[0]); 
303 | 		unsigned int *d_array_final_index = thrust::raw_pointer_cast(&d_final_index[0]);
304 | 
305 | 		int numBlocks = 1;
306 | 		int numThreadsPerBlock = length/numBlocks;
307 | 
308 | 		if(numThreadsPerBlock > MAX_THREADS_PER_BLOCK) { 
309 | 			numBlocks = (int)ceil(numThreadsPerBlock/(float)MAX_THREADS_PER_BLOCK);
310 | 			numThreadsPerBlock = MAX_THREADS_PER_BLOCK;
311 | 		}
312 | 		dim3 grid(numBlocks, 1, 1);
313 | 		dim3 threads(numThreadsPerBlock, 1, 1); 
314 | 
315 |           	cudaDeviceSynchronize();
316 | 	
317 | 		findSuccessor<<<grid, threads, 0>>>(global_int_original_string, d_array_string, d_array_index, d_array_segment, d_array_stencil, d_array_map, length, originalLength, sequenceCount);
318 | 	
319 | 	        cudaDeviceSynchronize();
320 | 
321 | 
322 | 	        thrust::copy(d_stencil.begin(), d_stencil.begin() + length, d_string.begin());
323 | 
324 | 	        thrust::inclusive_scan(d_map.begin(),d_map.begin() + length, d_segment.begin());
325 | 
326 | 	        cudaDeviceSynchronize();
327 |  
328 | 	        eliminateSizeOneKernel1<<<grid, threads, 0>>>( global_int_original_string, d_array_final_index, d_array_index, d_array_static_index, d_array_map, d_array_stencil, global_first_sort_rank, sequenceCount, length, originalLength);
329 |  
330 |                 cudaDeviceSynchronize();
331 | 		
332 | 		thrust::exclusive_scan(d_stencil.begin(), d_stencil.begin() + length, d_map.begin());
333 | 
334 | 		thrust::scatter_if(d_segment.begin(), d_segment.begin() + length, d_map.begin(), d_stencil.begin(), d_integer_arr.begin());
335 | 		thrust::copy(d_integer_arr.begin(), d_integer_arr.begin() + length, d_segment.begin()); 
336 | 
337 | 		thrust::scatter_if(d_string.begin(), d_string.begin() + length, d_map.begin(), d_stencil.begin(), d_integer_arr.begin()); 
338 | 		thrust::copy(d_integer_arr.begin(), d_integer_arr.begin() + length, d_string.begin());
339 | 
340 | 
341 | 		thrust::scatter_if(d_index.begin(), d_index.begin() + length, d_map.begin(), d_stencil.begin(), d_integer_arr.begin()); 
342 | 		thrust::copy(d_integer_arr.begin(), d_integer_arr.begin() + length, d_index.begin()); 
343 | 
344 | 		thrust::scatter_if(d_static_index.begin(), d_static_index.begin() + length, d_map.begin(), d_stencil.begin(), d_integer_arr.begin()); 
345 | 		thrust::copy(d_integer_arr.begin(), d_integer_arr.begin() + length, d_static_index.begin()); 
346 | 
347 | 		length = *(d_map.begin() + length - 1) + *(d_stencil.begin() + length - 1); 
348 | 	        if(length == 0) {
349 | 			 *sortingDepth = sequenceCount;  
350 |              	         break;
351 | 	  	} 
352 | 
353 | 	}
354 | 
355 | 
356 | 	if(length!=0) { 
357 | 
358 | 		int size = length; 
359 | 		length = limit*2;
360 | 		int offset = limit;  
361 | 		for(offset = limit; offset < originalLength; offset+=(length/2)) {  
362 | 
363 | 			thrust::sort(
364 | 					thrust::make_zip_iterator( thrust::make_tuple(d_segment.begin(), d_index.begin())),
365 | 					thrust::make_zip_iterator( thrust::make_tuple(d_segment.begin() + size, d_index.begin() + size)),
366 | 					Bar(offset, length, originalLength)
367 | 				    );
368 | 
369 | 			unsigned int *d_array_index = thrust::raw_pointer_cast(&d_index[0]);
370 | 			unsigned int *d_array_segment = thrust::raw_pointer_cast(&d_segment[0]); 
371 | 			unsigned int *d_array_map = thrust::raw_pointer_cast(&d_map[0]);
372 | 			unsigned int *d_array_static_index = thrust::raw_pointer_cast(&d_static_index[0]);  
373 | 			unsigned int *d_array_stencil = thrust::raw_pointer_cast(&d_stencil[0]); 
374 | 			unsigned int *d_array_final_index = thrust::raw_pointer_cast(&d_final_index[0]);
375 | 
376 | 			int numBlocks = 1;
377 | 			int numThreadsPerBlock = size/numBlocks;
378 | 
379 | 			if(numThreadsPerBlock > MAX_THREADS_PER_BLOCK) { 
380 | 				numBlocks = (int)ceil(size/(float)MAX_THREADS_PER_BLOCK);
381 | 				numThreadsPerBlock = MAX_THREADS_PER_BLOCK;
382 | 			}
383 | 			dim3 grid(numBlocks, 1, 1);
384 | 			dim3 threads(numThreadsPerBlock, 1, 1); 
385 | 
386 | 			cudaDeviceSynchronize();
387 | 
388 | 			updateSegments<<<grid, threads, 0>>>(global_int_original_string, d_array_index, d_array_segment, d_array_map, size, offset, length, originalLength);
389 | 
390 | 			cudaDeviceSynchronize();
391 | 
392 | 			thrust::inclusive_scan(d_map.begin(), d_map.begin() + size, d_segment.begin());
393 | 
394 | 			cudaDeviceSynchronize();
395 | 
396 | 			eliminateSizeOne<<<grid, threads, 0>>>( d_array_final_index, d_array_index, d_array_static_index, d_array_map, d_array_stencil, global_first_sort_rank, size, originalLength);
397 | 			cudaDeviceSynchronize();
398 | 
399 | 			thrust::exclusive_scan(d_stencil.begin(), d_stencil.begin() + size, d_map.begin());
400 | 
401 | 			thrust::scatter_if(d_segment.begin(), d_segment.begin() + size, d_map.begin(), d_stencil.begin(), d_integer_arr.begin());
402 | 			thrust::copy(d_integer_arr.begin(), d_integer_arr.begin() + size, d_segment.begin()); 
403 | 
404 | 			thrust::scatter_if(d_index.begin(), d_index.begin() + size, d_map.begin(), d_stencil.begin(), d_integer_arr.begin()); 
405 | 			thrust::copy(d_integer_arr.begin(), d_integer_arr.begin() + size, d_index.begin()); 
406 | 
407 | 			thrust::scatter_if(d_static_index.begin(), d_static_index.begin() + size, d_map.begin(), d_stencil.begin(), d_integer_arr.begin()); 
408 | 			thrust::copy(d_integer_arr.begin(), d_integer_arr.begin() + size, d_static_index.begin()); 
409 | 
410 | 			size = *(d_map.begin() + size - 1) + *(d_stencil.begin() + size - 1); 
411 | 
412 | 			if(size == 0) {
413 | 				*sortingDepth = offset;
414 | 				break;
415 | 			}
416 | 
417 | 			length*=2; 
418 | 		}
419 | 
420 | 	}
421 | 
422 | #ifdef __DEBUG__
423 | 
424 | 	printf("First Sort\n");
425 | 	thrust::device_vector<unsigned int>::iterator d_final_index_itr = d_final_index.begin();
426 | 	for(d_final_index_itr = d_final_index.begin(); d_final_index_itr != d_final_index.end(); ++d_final_index_itr) { 
427 | 		std::cout << *d_final_index_itr << " ";
428 | 	}
429 | 	std::cout << std::endl;
430 | 	std::cout << "First Sort Length " << firstSortLength << std::endl;
431 | #endif
432 | 
433 | 	int numBlocks = 1;
434 | 	int numThreadsPerBlock = secondSortLength/numBlocks;
435 | 
436 | 	if(numThreadsPerBlock > MAX_THREADS_PER_BLOCK) { 
437 | 		numBlocks = (int)ceil(numThreadsPerBlock/(float)MAX_THREADS_PER_BLOCK);
438 | 		numThreadsPerBlock = MAX_THREADS_PER_BLOCK;
439 | 	}
440 | 	dim3 grid(numBlocks, 1, 1);
441 | 	dim3 threads(numThreadsPerBlock, 1, 1); 
442 | 
443 | 	thrust::device_vector<unsigned char> d_second_sort(secondSortLength, 0);
444 | 	thrust::device_vector<unsigned int> d_second_sort_rank(secondSortLength, 0);
445 | 	thrust::device_vector<unsigned int> d_second_sort_index(secondSortLength);
446 | 
447 | 	unsigned char *d_array_second_sort = thrust::raw_pointer_cast(&d_second_sort[0]);
448 | 	unsigned int *d_array_second_sort_rank = thrust::raw_pointer_cast(&d_second_sort_rank[0]); 
449 | 	unsigned int *d_array_second_sort_index = thrust::raw_pointer_cast(&d_second_sort_index[0]); 
450 | 
451 | 	cudaDeviceSynchronize();
452 | 	createSecondSort<<<grid, threads, 0>>>(d_original_string_in, global_first_sort_rank, d_array_second_sort, d_array_second_sort_rank, d_array_second_sort_index, secondSortLength);
453 | 	cudaDeviceSynchronize();
454 | 
455 | 	thrust::sort_by_key(
456 | 		thrust::make_zip_iterator( thrust::make_tuple(d_second_sort.begin(), d_second_sort_rank.begin())),
457 | 		thrust::make_zip_iterator( thrust::make_tuple(d_second_sort.begin() + secondSortLength, d_second_sort_rank.begin() + secondSortLength)),
458 | 		d_second_sort_index.begin()
459 | 	);  
460 | 
461 | 
462 | #ifdef __DEBUG__
463 | 	printf("Second Sort\n");
464 | 	thrust::device_vector<unsigned char>::iterator d_second_sort_itr = d_second_sort.begin();
465 | 	thrust::device_vector<unsigned int>::iterator d_second_sort_rank_itr = d_second_sort_rank.begin();
466 | 	thrust::device_vector<unsigned int>::iterator d_second_sort_index_itr = d_second_sort_index.begin();
467 | 	for(d_second_sort_itr = d_second_sort.begin(); d_second_sort_itr != d_second_sort.end(); ++d_second_sort_itr) { 
468 | 		std::cout << *d_second_sort_index_itr << " ";
469 | 		++d_second_sort_rank_itr;
470 | 		++d_second_sort_index_itr;
471 | 	}
472 | 	std::cout << std::endl;
473 | 	std::cout << "Second Sort Length " << secondSortLength << std::endl;
474 | #endif
475 | 
476 | 	thrust::copy(d_final_index.begin(), d_final_index.end(), orderFirstSort);
477 | 	thrust::copy(d_second_sort_index.begin(), d_second_sort_index.end(), orderSecondSort);
478 | 	cudaMemcpy(orderFirstSortRank, global_first_sort_rank, sizeof(unsigned int)*originalLength, cudaMemcpyDeviceToHost); 
479 | 	
480 | 	cudaFree(d_original_string_in); 
481 | 	cudaFree(global_int_original_string);
482 | 	cudaFree(global_first_sort_rank);
483 | 	return firstSortLength;
484 | }
485 | 


--------------------------------------------------------------------------------
/huffman.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /*-------------------------------------------------------------*/
  3 | /*--- Huffman coding low-level stuff                        ---*/
  4 | /*---                                             huffman.c ---*/
  5 | /*-------------------------------------------------------------*/
  6 | 
  7 | /* ------------------------------------------------------------------
  8 |    This file is part of bzip2/libbzip2, a program and library for
  9 |    lossless, block-sorting data compression.
 10 | 
 11 |    bzip2/libbzip2 version 1.0.6 of 6 September 2010
 12 |    Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
 13 | 
 14 |    Please read the WARNING, DISCLAIMER and PATENTS sections in the 
 15 |    README file.
 16 | 
 17 |    This program is released under the terms of the license contained
 18 |    in the file LICENSE.
 19 |    ------------------------------------------------------------------ */
 20 | 
 21 | 
 22 | #include "bzlib_private.h"
 23 | 
 24 | /*---------------------------------------------------*/
 25 | #define WEIGHTOF(zz0)  ((zz0) & 0xffffff00)
 26 | #define DEPTHOF(zz1)   ((zz1) & 0x000000ff)
 27 | #define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
 28 | 
 29 | #define ADDWEIGHTS(zw1,zw2)                           \
 30 |    (WEIGHTOF(zw1)+WEIGHTOF(zw2)) |                    \
 31 |    (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
 32 | 
 33 | #define UPHEAP(z)                                     \
 34 | {                                                     \
 35 |    Int32 zz, tmp;                                     \
 36 |    zz = z; tmp = heap[zz];                            \
 37 |    while (weight[tmp] < weight[heap[zz >> 1]]) {      \
 38 |       heap[zz] = heap[zz >> 1];                       \
 39 |       zz >>= 1;                                       \
 40 |    }                                                  \
 41 |    heap[zz] = tmp;                                    \
 42 | }
 43 | 
 44 | #define DOWNHEAP(z)                                   \
 45 | {                                                     \
 46 |    Int32 zz, yy, tmp;                                 \
 47 |    zz = z; tmp = heap[zz];                            \
 48 |    while (True) {                                     \
 49 |       yy = zz << 1;                                   \
 50 |       if (yy > nHeap) break;                          \
 51 |       if (yy < nHeap &&                               \
 52 |           weight[heap[yy+1]] < weight[heap[yy]])      \
 53 |          yy++;                                        \
 54 |       if (weight[tmp] < weight[heap[yy]]) break;      \
 55 |       heap[zz] = heap[yy];                            \
 56 |       zz = yy;                                        \
 57 |    }                                                  \
 58 |    heap[zz] = tmp;                                    \
 59 | }
 60 | 
 61 | 
 62 | /*---------------------------------------------------*/
 63 | void BZ2_hbMakeCodeLengths ( UChar *len, 
 64 |                              Int32 *freq,
 65 |                              Int32 alphaSize,
 66 |                              Int32 maxLen )
 67 | {
 68 |    /*--
 69 |       Nodes and heap entries run from 1.  Entry 0
 70 |       for both the heap and nodes is a sentinel.
 71 |    --*/
 72 |    Int32 nNodes, nHeap, n1, n2, i, j, k;
 73 |    Bool  tooLong;
 74 | 
 75 |    Int32 heap   [ BZ_MAX_ALPHA_SIZE + 2 ];
 76 |    Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ];
 77 |    Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ]; 
 78 | 
 79 |    for (i = 0; i < alphaSize; i++)
 80 |       weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
 81 | 
 82 |    while (True) {
 83 | 
 84 |       nNodes = alphaSize;
 85 |       nHeap = 0;
 86 | 
 87 |       heap[0] = 0;
 88 |       weight[0] = 0;
 89 |       parent[0] = -2;
 90 | 
 91 |       for (i = 1; i <= alphaSize; i++) {
 92 |          parent[i] = -1;
 93 |          nHeap++;
 94 |          heap[nHeap] = i;
 95 |          UPHEAP(nHeap);
 96 |       }
 97 | 
 98 |       AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 );
 99 |    
100 |       while (nHeap > 1) {
101 |          n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
102 |          n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
103 |          nNodes++;
104 |          parent[n1] = parent[n2] = nNodes;
105 |          weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
106 |          parent[nNodes] = -1;
107 |          nHeap++;
108 |          heap[nHeap] = nNodes;
109 |          UPHEAP(nHeap);
110 |       }
111 | 
112 |       AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 );
113 | 
114 |       tooLong = False;
115 |       for (i = 1; i <= alphaSize; i++) {
116 |          j = 0;
117 |          k = i;
118 |          while (parent[k] >= 0) { k = parent[k]; j++; }
119 |          len[i-1] = j;
120 |          if (j > maxLen) tooLong = True;
121 |       }
122 |       
123 |       if (! tooLong) break;
124 | 
125 |       /* 17 Oct 04: keep-going condition for the following loop used
126 |          to be 'i < alphaSize', which missed the last element,
127 |          theoretically leading to the possibility of the compressor
128 |          looping.  However, this count-scaling step is only needed if
129 |          one of the generated Huffman code words is longer than
130 |          maxLen, which up to and including version 1.0.2 was 20 bits,
131 |          which is extremely unlikely.  In version 1.0.3 maxLen was
132 |          changed to 17 bits, which has minimal effect on compression
133 |          ratio, but does mean this scaling step is used from time to
134 |          time, enough to verify that it works.
135 | 
136 |          This means that bzip2-1.0.3 and later will only produce
137 |          Huffman codes with a maximum length of 17 bits.  However, in
138 |          order to preserve backwards compatibility with bitstreams
139 |          produced by versions pre-1.0.3, the decompressor must still
140 |          handle lengths of up to 20. */
141 | 
142 |       for (i = 1; i <= alphaSize; i++) {
143 |          j = weight[i] >> 8;
144 |          j = 1 + (j / 2);
145 |          weight[i] = j << 8;
146 |       }
147 |    }
148 | }
149 | 
150 | 
151 | /*---------------------------------------------------*/
152 | void BZ2_hbAssignCodes ( Int32 *code,
153 |                          UChar *length,
154 |                          Int32 minLen,
155 |                          Int32 maxLen,
156 |                          Int32 alphaSize )
157 | {
158 |    Int32 n, vec, i;
159 | 
160 |    vec = 0;
161 |    for (n = minLen; n <= maxLen; n++) {
162 |       for (i = 0; i < alphaSize; i++)
163 |          if (length[i] == n) { code[i] = vec; vec++; };
164 |       vec <<= 1;
165 |    }
166 | }
167 | 
168 | 
169 | /*---------------------------------------------------*/
170 | void BZ2_hbCreateDecodeTables ( Int32 *limit,
171 |                                 Int32 *base,
172 |                                 Int32 *perm,
173 |                                 UChar *length,
174 |                                 Int32 minLen,
175 |                                 Int32 maxLen,
176 |                                 Int32 alphaSize )
177 | {
178 |    Int32 pp, i, j, vec;
179 | 
180 |    pp = 0;
181 |    for (i = minLen; i <= maxLen; i++)
182 |       for (j = 0; j < alphaSize; j++)
183 |          if (length[j] == i) { perm[pp] = j; pp++; };
184 | 
185 |    for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0;
186 |    for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
187 | 
188 |    for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1];
189 | 
190 |    for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0;
191 |    vec = 0;
192 | 
193 |    for (i = minLen; i <= maxLen; i++) {
194 |       vec += (base[i+1] - base[i]);
195 |       limit[i] = vec-1;
196 |       vec <<= 1;
197 |    }
198 |    for (i = minLen + 1; i <= maxLen; i++)
199 |       base[i] = ((limit[i-1] + 1) << 1) - base[i];
200 | }
201 | 
202 | 
203 | /*-------------------------------------------------------------*/
204 | /*--- end                                         huffman.c ---*/
205 | /*-------------------------------------------------------------*/
206 | 


--------------------------------------------------------------------------------
/mk251.c:
--------------------------------------------------------------------------------
 1 | 
 2 | /* Spew out a long sequence of the byte 251.  When fed to bzip2
 3 |    versions 1.0.0 or 1.0.1, causes it to die with internal error
 4 |    1007 in blocksort.c.  This assertion misses an extremely rare
 5 |    case, which is fixed in this version (1.0.2) and above.
 6 | */
 7 | 
 8 | /* ------------------------------------------------------------------
 9 |    This file is part of bzip2/libbzip2, a program and library for
10 |    lossless, block-sorting data compression.
11 | 
12 |    bzip2/libbzip2 version 1.0.6 of 6 September 2010
13 |    Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
14 | 
15 |    Please read the WARNING, DISCLAIMER and PATENTS sections in the 
16 |    README file.
17 | 
18 |    This program is released under the terms of the license contained
19 |    in the file LICENSE.
20 |    ------------------------------------------------------------------ */
21 | 
22 | 
23 | #include <stdio.h>
24 | 
25 | int main ()
26 | {
27 |    int i;
28 |    for (i = 0; i < 48500000 ; i++)
29 |      putchar(251);
30 |    return 0;
31 | }
32 | 


--------------------------------------------------------------------------------
/randtable.c:
--------------------------------------------------------------------------------
 1 | 
 2 | /*-------------------------------------------------------------*/
 3 | /*--- Table for randomising repetitive blocks               ---*/
 4 | /*---                                           randtable.c ---*/
 5 | /*-------------------------------------------------------------*/
 6 | 
 7 | /* ------------------------------------------------------------------
 8 |    This file is part of bzip2/libbzip2, a program and library for
 9 |    lossless, block-sorting data compression.
10 | 
11 |    bzip2/libbzip2 version 1.0.6 of 6 September 2010
12 |    Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
13 | 
14 |    Please read the WARNING, DISCLAIMER and PATENTS sections in the 
15 |    README file.
16 | 
17 |    This program is released under the terms of the license contained
18 |    in the file LICENSE.
19 |    ------------------------------------------------------------------ */
20 | 
21 | 
22 | #include "bzlib_private.h"
23 | 
24 | 
25 | /*---------------------------------------------*/
26 | Int32 BZ2_rNums[512] = { 
27 |    619, 720, 127, 481, 931, 816, 813, 233, 566, 247, 
28 |    985, 724, 205, 454, 863, 491, 741, 242, 949, 214, 
29 |    733, 859, 335, 708, 621, 574, 73, 654, 730, 472, 
30 |    419, 436, 278, 496, 867, 210, 399, 680, 480, 51, 
31 |    878, 465, 811, 169, 869, 675, 611, 697, 867, 561, 
32 |    862, 687, 507, 283, 482, 129, 807, 591, 733, 623, 
33 |    150, 238, 59, 379, 684, 877, 625, 169, 643, 105, 
34 |    170, 607, 520, 932, 727, 476, 693, 425, 174, 647, 
35 |    73, 122, 335, 530, 442, 853, 695, 249, 445, 515, 
36 |    909, 545, 703, 919, 874, 474, 882, 500, 594, 612, 
37 |    641, 801, 220, 162, 819, 984, 589, 513, 495, 799, 
38 |    161, 604, 958, 533, 221, 400, 386, 867, 600, 782, 
39 |    382, 596, 414, 171, 516, 375, 682, 485, 911, 276, 
40 |    98, 553, 163, 354, 666, 933, 424, 341, 533, 870, 
41 |    227, 730, 475, 186, 263, 647, 537, 686, 600, 224, 
42 |    469, 68, 770, 919, 190, 373, 294, 822, 808, 206, 
43 |    184, 943, 795, 384, 383, 461, 404, 758, 839, 887, 
44 |    715, 67, 618, 276, 204, 918, 873, 777, 604, 560, 
45 |    951, 160, 578, 722, 79, 804, 96, 409, 713, 940, 
46 |    652, 934, 970, 447, 318, 353, 859, 672, 112, 785, 
47 |    645, 863, 803, 350, 139, 93, 354, 99, 820, 908, 
48 |    609, 772, 154, 274, 580, 184, 79, 626, 630, 742, 
49 |    653, 282, 762, 623, 680, 81, 927, 626, 789, 125, 
50 |    411, 521, 938, 300, 821, 78, 343, 175, 128, 250, 
51 |    170, 774, 972, 275, 999, 639, 495, 78, 352, 126, 
52 |    857, 956, 358, 619, 580, 124, 737, 594, 701, 612, 
53 |    669, 112, 134, 694, 363, 992, 809, 743, 168, 974, 
54 |    944, 375, 748, 52, 600, 747, 642, 182, 862, 81, 
55 |    344, 805, 988, 739, 511, 655, 814, 334, 249, 515, 
56 |    897, 955, 664, 981, 649, 113, 974, 459, 893, 228, 
57 |    433, 837, 553, 268, 926, 240, 102, 654, 459, 51, 
58 |    686, 754, 806, 760, 493, 403, 415, 394, 687, 700, 
59 |    946, 670, 656, 610, 738, 392, 760, 799, 887, 653, 
60 |    978, 321, 576, 617, 626, 502, 894, 679, 243, 440, 
61 |    680, 879, 194, 572, 640, 724, 926, 56, 204, 700, 
62 |    707, 151, 457, 449, 797, 195, 791, 558, 945, 679, 
63 |    297, 59, 87, 824, 713, 663, 412, 693, 342, 606, 
64 |    134, 108, 571, 364, 631, 212, 174, 643, 304, 329, 
65 |    343, 97, 430, 751, 497, 314, 983, 374, 822, 928, 
66 |    140, 206, 73, 263, 980, 736, 876, 478, 430, 305, 
67 |    170, 514, 364, 692, 829, 82, 855, 953, 676, 246, 
68 |    369, 970, 294, 750, 807, 827, 150, 790, 288, 923, 
69 |    804, 378, 215, 828, 592, 281, 565, 555, 710, 82, 
70 |    896, 831, 547, 261, 524, 462, 293, 465, 502, 56, 
71 |    661, 821, 976, 991, 658, 869, 905, 758, 745, 193, 
72 |    768, 550, 608, 933, 378, 286, 215, 979, 792, 961, 
73 |    61, 688, 793, 644, 986, 403, 106, 366, 905, 644, 
74 |    372, 567, 466, 434, 645, 210, 389, 550, 919, 135, 
75 |    780, 773, 635, 389, 707, 100, 626, 958, 165, 504, 
76 |    920, 176, 193, 713, 857, 265, 203, 50, 668, 108, 
77 |    645, 990, 626, 197, 510, 357, 358, 850, 858, 364, 
78 |    936, 638
79 | };
80 | 
81 | 
82 | /*-------------------------------------------------------------*/
83 | /*--- end                                       randtable.c ---*/
84 | /*-------------------------------------------------------------*/
85 | 


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | domake=0
 4 | docheck=0
 5 | 
 6 | while getopts "mc" opt
 7 | do
 8 | 
 9 | case $opt in
10 | 
11 | m)
12 | 
13 | domake=1
14 | ;;
15 | 
16 | c)
17 | 
18 | docheck=1
19 | ;;
20 | 
21 | \?)
22 | exit
23 | ;;
24 | 
25 | esac 
26 | done
27 | 
28 | shift $(( OPTIND-1 ))
29 | 
30 | if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]
31 | then
32 | echo "usage: ./run.sh [-m] [-c] <blocksize> <filename>"
33 | echo "-m: make, -c: check"
34 | exit
35 | fi
36 | 
37 | blocksize=$1
38 | threads=$2
39 | filename=$3
40 | 
41 | 
42 | if [ $domake -eq 1 ]
43 | then
44 | make clean
45 | make bzip2
46 | echo "*****************"
47 | fi
48 | 
49 | echo "*****************"
50 | echo "Compressing file "$filename
51 | rm -f $filename".bz2"
52 | ./bzip2 -kf -$blocksize -n$threads $filename
53 | ls -lh $filename".bz2"
54 | ls -l $filename".bz2"
55 | sizeVal=`ls -l $filename.bz2 | cut -d' ' -f5`
56 | echo $sizeVal
57 | echo $sizeVal"/(1024*1024)" | bc -l
58 | echo "Done compressing"
59 | echo "*****************"
60 | 
61 | if [ $docheck -eq 1 ]
62 | then
63 | mv $filename diff.txt
64 | ./bzip2 -d $filename".bz2"
65 | echo -n "Performing diff .... "
66 | diff diff.txt $filename
67 | if [ $? -eq 0 ]
68 | then
69 | echo "Success"
70 | else
71 | echo "Failed"
72 | mv diff.txt $filename
73 | fi
74 | fi
75 | echo "***************"
76 | 


--------------------------------------------------------------------------------
/spewG.c:
--------------------------------------------------------------------------------
 1 | 
 2 | /* spew out a thoroughly gigantic file designed so that bzip2
 3 |    can compress it reasonably rapidly.  This is to help test
 4 |    support for large files (> 2GB) in a reasonable amount of time.
 5 |    I suggest you use the undocumented --exponential option to
 6 |    bzip2 when compressing the resulting file; this saves a bit of
 7 |    time.  Note: *don't* bother with --exponential when compressing 
 8 |    Real Files; it'll just waste a lot of CPU time :-)
 9 |    (but is otherwise harmless).
10 | */
11 | 
12 | /* ------------------------------------------------------------------
13 |    This file is part of bzip2/libbzip2, a program and library for
14 |    lossless, block-sorting data compression.
15 | 
16 |    bzip2/libbzip2 version 1.0.6 of 6 September 2010
17 |    Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
18 | 
19 |    Please read the WARNING, DISCLAIMER and PATENTS sections in the 
20 |    README file.
21 | 
22 |    This program is released under the terms of the license contained
23 |    in the file LICENSE.
24 | 	 ------------------------------------------------------------------ */
25 | 
26 | 
27 | #define _FILE_OFFSET_BITS 64
28 | 
29 | #include <stdio.h>
30 | #include <stdlib.h>
31 | 
32 | /* The number of megabytes of junk to spew out (roughly) */
33 | #define MEGABYTES 5000
34 | 
35 | #define N_BUF 1000000
36 | char buf[N_BUF];
37 | 
38 | int main ( int argc, char** argv )
39 | {
40 |    int ii, kk, p;
41 |    srandom(1);
42 |    setbuffer ( stdout, buf, N_BUF );
43 |    for (kk = 0; kk < MEGABYTES * 515; kk+=3) {
44 |       p = 25+random()%50;
45 |       for (ii = 0; ii < p; ii++)
46 |          printf ( "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" );
47 |       for (ii = 0; ii < p-1; ii++)
48 |          printf ( "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" );
49 |       for (ii = 0; ii < p+1; ii++)
50 |          printf ( "ccccccccccccccccccccccccccccccccccccc" );
51 |    }
52 |    fflush(stdout);
53 |    return 0;
54 | }
55 | 


--------------------------------------------------------------------------------
/unzcrash.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /* A test program written to test robustness to decompression of
  3 |    corrupted data.  Usage is 
  4 |        unzcrash filename
  5 |    and the program will read the specified file, compress it (in memory),
  6 |    and then repeatedly decompress it, each time with a different bit of
  7 |    the compressed data inverted, so as to test all possible one-bit errors.
  8 |    This should not cause any invalid memory accesses.  If it does, 
  9 |    I want to know about it!
 10 | 
 11 |    PS.  As you can see from the above description, the process is
 12 |    incredibly slow.  A file of size eg 5KB will cause it to run for
 13 |    many hours.
 14 | */
 15 | 
 16 | /* ------------------------------------------------------------------
 17 |    This file is part of bzip2/libbzip2, a program and library for
 18 |    lossless, block-sorting data compression.
 19 | 
 20 |    bzip2/libbzip2 version 1.0.6 of 6 September 2010
 21 |    Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
 22 | 
 23 |    Please read the WARNING, DISCLAIMER and PATENTS sections in the 
 24 |    README file.
 25 | 
 26 |    This program is released under the terms of the license contained
 27 |    in the file LICENSE.
 28 |    ------------------------------------------------------------------ */
 29 | 
 30 | 
 31 | #include <stdio.h>
 32 | #include <assert.h>
 33 | #include "bzlib.h"
 34 | 
 35 | #define M_BLOCK 1000000
 36 | 
 37 | typedef unsigned char uchar;
 38 | 
 39 | #define M_BLOCK_OUT (M_BLOCK + 1000000)
 40 | uchar inbuf[M_BLOCK];
 41 | uchar outbuf[M_BLOCK_OUT];
 42 | uchar zbuf[M_BLOCK + 600 + (M_BLOCK / 100)];
 43 | 
 44 | int nIn, nOut, nZ;
 45 | 
 46 | static char *bzerrorstrings[] = {
 47 |        "OK"
 48 |       ,"SEQUENCE_ERROR"
 49 |       ,"PARAM_ERROR"
 50 |       ,"MEM_ERROR"
 51 |       ,"DATA_ERROR"
 52 |       ,"DATA_ERROR_MAGIC"
 53 |       ,"IO_ERROR"
 54 |       ,"UNEXPECTED_EOF"
 55 |       ,"OUTBUFF_FULL"
 56 |       ,"???"   /* for future */
 57 |       ,"???"   /* for future */
 58 |       ,"???"   /* for future */
 59 |       ,"???"   /* for future */
 60 |       ,"???"   /* for future */
 61 |       ,"???"   /* for future */
 62 | };
 63 | 
 64 | void flip_bit ( int bit )
 65 | {
 66 |    int byteno = bit / 8;
 67 |    int bitno  = bit % 8;
 68 |    uchar mask = 1 << bitno;
 69 |    //fprintf ( stderr, "(byte %d  bit %d  mask %d)",
 70 |    //          byteno, bitno, (int)mask );
 71 |    zbuf[byteno] ^= mask;
 72 | }
 73 | 
 74 | int main ( int argc, char** argv )
 75 | {
 76 |    FILE* f;
 77 |    int   r;
 78 |    int   bit;
 79 |    int   i;
 80 | 
 81 |    if (argc != 2) {
 82 |       fprintf ( stderr, "usage: unzcrash filename\n" );
 83 |       return 1;
 84 |    }
 85 | 
 86 |    f = fopen ( argv[1], "r" );
 87 |    if (!f) {
 88 |       fprintf ( stderr, "unzcrash: can't open %s\n", argv[1] );
 89 |       return 1;
 90 |    }
 91 | 
 92 |    nIn = fread ( inbuf, 1, M_BLOCK, f );
 93 |    fprintf ( stderr, "%d bytes read\n", nIn );
 94 | 
 95 |    nZ = M_BLOCK;
 96 |    r = BZ2_bzBuffToBuffCompress (
 97 |          zbuf, &nZ, inbuf, nIn, 9, 0, 30 );
 98 | 
 99 |    assert (r == BZ_OK);
100 |    fprintf ( stderr, "%d after compression\n", nZ );
101 | 
102 |    for (bit = 0; bit < nZ*8; bit++) {
103 |       fprintf ( stderr, "bit %d  ", bit );
104 |       flip_bit ( bit );
105 |       nOut = M_BLOCK_OUT;
106 |       r = BZ2_bzBuffToBuffDecompress (
107 |             outbuf, &nOut, zbuf, nZ, 0, 0 );
108 |       fprintf ( stderr, " %d  %s ", r, bzerrorstrings[-r] );
109 | 
110 |       if (r != BZ_OK) {
111 |          fprintf ( stderr, "\n" );
112 |       } else {
113 |          if (nOut != nIn) {
114 |            fprintf(stderr, "nIn/nOut mismatch %d %d\n", nIn, nOut );
115 |            return 1;
116 |          } else {
117 |            for (i = 0; i < nOut; i++)
118 |              if (inbuf[i] != outbuf[i]) { 
119 |                 fprintf(stderr, "mismatch at %d\n", i ); 
120 |                 return 1; 
121 |            }
122 |            if (i == nOut) fprintf(stderr, "really ok!\n" );
123 |          }
124 |       }
125 | 
126 |       flip_bit ( bit );
127 |    }
128 | 
129 | #if 0
130 |    assert (nOut == nIn);
131 |    for (i = 0; i < nOut; i++) {
132 |      if (inbuf[i] != outbuf[i]) {
133 |         fprintf ( stderr, "difference at %d !\n", i );
134 |         return 1;
135 |      }
136 |    }
137 | #endif
138 | 
139 |    fprintf ( stderr, "all ok\n" );
140 |    return 0;
141 | }
142 | 


--------------------------------------------------------------------------------