├── LICENSE
├── README.md
├── benchmark.png
├── bin
    ├── darwin_arm64
    │   └── tscrunch
    ├── linux_amd64
    │   └── tscrunch
    ├── windows_amd64
    │   └── tscrunch.exe
    └── windows_arm64
    │   └── tscrunch.exe
├── decrunch.asm
├── decrunch_extreme.asm
├── decrunch_small.asm
├── readme.txt
├── tscrunch.go
└── tscrunch.py


/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # TSCrunch
  2 | 
  3 | About
  4 | =====
  5 | 
  6 | TSCrunch is an optimal, byte-aligned, LZ+RLE hybrid encoder, designed to maximize decoding speed on NMOS 6502 and derived CPUs, while keeping decent compression performance (for a bytecruncher, that is).
  7 | TSCrunch was designed as the default asset cruncher for the game A Pig Quest, and, as such, it's optimized for in-memory level compression, but as of version 1.0 it can also create SFX executables for off-line prg crunching.
  8 | 
  9 | Requirements
 10 | ============
 11 | 
 12 | TSCrunch is written in GO and therefore should run on any machine that supports a go compiler.
 13 | Precompiled binaries are available for the following platforms:
 14 | - windows x64
 15 | - windows arm64
 16 | - linux x64
 17 | - mac / darwin arm64
 18 | 
 19 | A python version is also supplied as reference encoder, but use of GO version is recommended for speed. 
 20 | The memory decrunchers requires Kick Assembler, but it should be quite easy to port them to your assembler of choice.
 21 | 
 22 | Usage
 23 | =====
 24 | 
 25 | tscrunch [option] infile outfile
 26 | 
 27 | Crunching examples:
 28 | 
 29 | 	tscrunch -x $0820 game.prg crunched.prg
 30 | 	
 31 | Crunches the file game.prg and generates a self executable crunched.prg, using $0820 as post-decrunch jmp address
 32 | 
 33 | 	tscrunch -p game.prg crunched.bin
 34 | 
 35 | Mem-crunches the file game.prg, stripping the 2-byte header and generates a binary file crunched.bin
 36 | 
 37 | 	tscrunch data.bin crunched.bin
 38 | Mem-crunches the file data.bin and generates a binary file crunched.bin
 39 | 
 40 | 	tscrunch -i data.prg crunched.prg
 41 | Mem-crunches the file data.bin for in-place use, and generates a prg file crunched.prg with the appropriate load address
 42 | 
 43 | 	tscrunch -x2 49152 game.prg crunched.prg
 44 | 	
 45 | Crunches the file game.prg and generates a self executable crunched.prg with alternative decrunching code, using $c000 (49152) as post-decrunch jmp address. The alternative decrunching code runs from stack instead of zero-page
 46 | 
 47 | 	tscrunch -x 0x1000 -b game.prg crunched.prg
 48 | 	
 49 | Crunches the file game.prg and generates a self executable crunched.prg that blank the screen while decrunching, using $1000 (0x1000) as post-decrunch jmp address.
 50 | 
 51 | 
 52 | Please refer to the inline help (tscrunch -h) for a detailed description of the different crunching options.
 53 | Note that with the exception of self executables and in-place, all the files generated by TSCrunch are headless binaries, that is they don't come with a 2 byte loader offset.
 54 | 
 55 | Decrunching files from code
 56 | ===========================
 57 | 
 58 | For memory decrunching, please #include decrunch.asm and include the crunched binaries in your code, then use the macro TS_DECRUNCH, as explained by the following code fragment 
 59 | 
 60 | 		.pc = $1000 "test"
 61 | 		//decrunches data to $4000
 62 | 		:TS_DECRUNCH(compressed_data,$4000) 
 63 | 		jmp *
 64 | 
 65 | 		.align $100
 66 | 		#include "decrunch.asm"
 67 | 		
 68 | 		compressed_data:
 69 | 		.import binary "data.bin"
 70 | 		
 71 | 
 72 | For inplace decrunching, please #define INPLACE before including the decruncher code, as explained by the following code fragment
 73 | 
 74 | 		#define INPLACE
 75 | 
 76 | 		.pc = $1000 "test"
 77 | 		//decrunches data inplace
 78 | 		:TS_DECRUNCH(compressed_data) 
 79 | 		jmp *
 80 | 
 81 | 		.align $100
 82 | 		#include "decrunch.asm"
 83 | 		
 84 | 		.pc = LoadAddress //as provided by the cruncher
 85 | 		compressed_data:
 86 | 		.import c64 "data.bin"
 87 | 
 88 | 
 89 | decruncher.asm is the recommended decruncher for the general case, but other than it two alternative decrunchers are supplied: a small version, which saves some bytes at the cost of speed, and an extreme version which is generally marginally faster, but comes with a larger footprint. 
 90 | 
 91 | 
 92 | Performance
 93 | ===========
 94 | 
 95 | TSCrunch is designed for ultra-fast decrunching while keeping a decent compression ratio. Being a byte-cruncher, it falls short of popular bit-crunchers, such as exomizer or B2, when comparing compression efficiency, but it is usually much faster at decoding. Furthermore, you can expect a 20% to 40% speed bump compared to popular byte-crunchers with similar compression efficiency.
 96 | The following benchmark compares TSCrunch performance with those of a fast byte-cruncher, TinyCrunch, and a two fast bit-crunchers, B2 and dali, on a real-case compression scenario: Chopper Command, from the same author.
 97 | 
 98 | ![benchmark](https://user-images.githubusercontent.com/52791690/161444947-1e01a5b1-f89d-4ef1-bd17-54d563cdd670.png)
 99 | 
100 | 
101 | 


--------------------------------------------------------------------------------
/benchmark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonysavon/TSCrunch/76d5a706bf6f442c0b3fbdc13c335175191f4c2e/benchmark.png


--------------------------------------------------------------------------------
/bin/darwin_arm64/tscrunch:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonysavon/TSCrunch/76d5a706bf6f442c0b3fbdc13c335175191f4c2e/bin/darwin_arm64/tscrunch


--------------------------------------------------------------------------------
/bin/linux_amd64/tscrunch:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonysavon/TSCrunch/76d5a706bf6f442c0b3fbdc13c335175191f4c2e/bin/linux_amd64/tscrunch


--------------------------------------------------------------------------------
/bin/windows_amd64/tscrunch.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonysavon/TSCrunch/76d5a706bf6f442c0b3fbdc13c335175191f4c2e/bin/windows_amd64/tscrunch.exe


--------------------------------------------------------------------------------
/bin/windows_arm64/tscrunch.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonysavon/TSCrunch/76d5a706bf6f442c0b3fbdc13c335175191f4c2e/bin/windows_arm64/tscrunch.exe


--------------------------------------------------------------------------------
/decrunch.asm:
--------------------------------------------------------------------------------
  1 | /*
  2 | 
  3 | decrunch.asm
  4 | 
  5 | NMOS 6502 decompressor for data stored in TSCrunch format.
  6 | 
  7 | This code is written for the KickAssembler assembler.
  8 | 
  9 | Copyright Antonio Savona 2022.
 10 | 
 11 | */
 12 | 
 13 | 
 14 | //#define INPLACE 		//Enables inplace decrunching. Use -i switch when crunching. 
 15 | 
 16 | .label tsget 	= $f8	//2 bytes
 17 | .label tstemp	= $fa
 18 | .label tsput 	= $fb	//2 bytes
 19 | .label lzput 	= $fd	//2 bytes
 20 | 
 21 | 
 22 | #if INPLACE
 23 | 
 24 | .macro TS_DECRUNCH(src)
 25 | {
 26 | 		lda #<src
 27 | 		sta.zp tsget
 28 | 		lda #>src
 29 | 		sta.zp tsget + 1
 30 | 		jsr tsdecrunch
 31 | }
 32 | 
 33 | #else
 34 | 
 35 | .macro TS_DECRUNCH(src,dst)
 36 | {
 37 | 		lda #<src
 38 | 		sta.zp tsget
 39 | 		lda #>src
 40 | 		sta.zp tsget + 1
 41 | 		lda #<dst
 42 | 		sta.zp tsput
 43 | 		lda #>dst
 44 | 		sta.zp tsput + 1
 45 | 		jsr tsdecrunch
 46 | }
 47 | 
 48 | #endif
 49 | 
 50 | 
 51 | tsdecrunch:
 52 | {
 53 | 	decrunch:
 54 | 
 55 | 	#if INPLACE
 56 | 			ldy #$ff
 57 | 		!:	iny
 58 | 			lda (tsget),y
 59 | 			sta tsput , y	//last iteration trashes lzput, with no effect.
 60 | 			cpy #3
 61 | 			bne !- 
 62 | 			
 63 | 			pha
 64 | 			
 65 | 			lda lzput
 66 | 			sta optRun + 1
 67 | 			
 68 | 			tya
 69 | 			ldy #0
 70 | 			beq update_getonly
 71 | 	#else 
 72 | 			ldy #0			
 73 | 	
 74 | 
 75 | 			lda (tsget),y
 76 | 			sta optRun + 1
 77 | 
 78 | 			inc tsget
 79 | 			bne entry2
 80 | 			inc tsget + 1
 81 | 	#endif
 82 | 	
 83 | 	entry2:		
 84 | 			lax (tsget),y
 85 | 			
 86 | 			bmi rleorlz
 87 | 			
 88 | 			cmp #$20
 89 | 			bcs lz2	
 90 | 
 91 | 	//literal
 92 | 			
 93 | 	#if INPLACE
 94 | 			
 95 | 			inc tsget
 96 | 			beq updatelit_hi
 97 | 		return_from_updatelit:
 98 | 		
 99 | 		ts_delit_loop:
100 | 
101 | 			lda (tsget),y
102 | 			sta (tsput),y
103 | 			iny
104 | 			dex
105 | 			
106 | 			bne ts_delit_loop	
107 | 			
108 | 			tya
109 | 			tax
110 | 			//carry is clear
111 | 	updatezp:
112 | 			ldy #0
113 | 	#else	//not inplace
114 | 			tay
115 | 			
116 | 		ts_delit_loop:
117 | 			
118 | 			lda (tsget),y
119 | 			dey
120 | 			sta (tsput),y
121 | 			
122 | 			bne ts_delit_loop
123 | 			
124 | 			txa
125 | 			inx
126 | 	#endif
127 | 			
128 | 	updatezp_noclc:
129 | 			adc tsput
130 | 			sta tsput
131 | 			bcs updateput_hi
132 | 		putnoof:
133 | 			txa
134 | 		update_getonly:
135 | 			adc tsget
136 | 			sta tsget
137 | 			bcc entry2
138 | 			inc tsget+1
139 | 			bcs entry2
140 | 	
141 | 	#if INPLACE		
142 | 	updatelit_hi:
143 | 			inc tsget+1
144 | 			bcc return_from_updatelit
145 | 	#endif		
146 | 	updateput_hi:
147 | 			inc tsput+1
148 | 			clc
149 | 			bcc putnoof
150 | 						
151 | 	rleorlz:
152 | 			
153 | 			alr #$7f
154 | 			bcc ts_delz		
155 | 
156 | 		//RLE
157 | 			beq optRun
158 | 				
159 | 		plain:
160 | 			ldx #2
161 | 			iny
162 | 			sta tstemp		//number of bytes to de-rle		
163 | 
164 | 			lda (tsget),y	//fetch rle byte
165 | 			ldy tstemp
166 | 		!runStart:
167 | 			sta (tsput),y
168 | 			
169 | 		ts_derle_loop:
170 | 			
171 | 			dey
172 | 			sta (tsput),y
173 | 
174 | 			bne ts_derle_loop
175 | 			
176 | 			//update zero page with a = runlen, x = 2 , y = 0 
177 | 			lda tstemp		
178 | 
179 | 			bcs updatezp_noclc
180 | 			
181 | 	   done:
182 | #if INPLACE	   
183 | 	   		pla
184 | 	   		sta (tsput),y
185 | #endif	   		
186 | 			rts	
187 | 	//LZ2	
188 | 		lz2:
189 | 			beq done
190 | 			
191 | 			ora #$80
192 | 			adc tsput
193 | 			sta lzput
194 | 			lda tsput + 1
195 | 			sbc #$00
196 | 			sta lzput + 1 		
197 | 	
198 | 			//y already zero			
199 | 			lda (lzput),y
200 | 			sta (tsput),y
201 | 			iny		
202 | 			lda (lzput),y
203 | 			sta (tsput),y
204 | 					
205 | 			tya
206 | 			dey
207 | 			
208 | 			adc tsput
209 | 			sta tsput
210 | 			bcs lz2_put_hi
211 | 		!skp:	
212 | 			inc tsget
213 | 			bne entry2
214 | 			inc tsget + 1
215 | 			bne entry2			
216 | 
217 | 		lz2_put_hi:
218 | 			inc tsput + 1
219 | 			bcs !skp-	
220 | 
221 | 	//LZ
222 | 	ts_delz:
223 | 			
224 | 			lsr 
225 | 			sta lzto + 1
226 | 			
227 | 			iny
228 | 			
229 | 			lda tsput
230 | 			bcc long
231 | 			
232 | 			sbc (tsget),y
233 | 			sta lzput
234 | 			lda tsput+1
235 | 	
236 | 			sbc #$00
237 | 		
238 | 			ldx #2
239 | 			//lz MUST decrunch forward	
240 | 	lz_put:
241 | 			sta lzput+1
242 | 			
243 | 			ldy #0
244 | 	
245 | 			lda (lzput),y
246 | 			sta (tsput),y
247 | 			
248 | 			iny
249 | 			lda (lzput),y
250 | 			sta (tsput),y
251 | 	
252 | 	ts_delz_loop:
253 | 	
254 | 			iny
255 | 		
256 | 			lda (lzput),y
257 | 			sta (tsput),y
258 | 			
259 | 	lzto:	cpy #0
260 | 			bne ts_delz_loop 
261 | 			
262 | 			tya
263 | #if INPLACE
264 | 			//update zero page with a = runlen, x = 2, y = 0
265 | 			//clc not needed as we have len - 1 in A (from the encoder) and C = 1
266 | 			jmp updatezp
267 | #else			
268 | 			//update zero page with a = runlen, x = 2, y = 0
269 | 			ldy #0
270 | 			//clc not needed as we have len - 1 in A (from the encoder) and C = 1
271 | 			jmp updatezp_noclc
272 | #endif
273 | 	
274 | 	optRun:	
275 | 			ldy #255
276 | 			sty tstemp
277 | 
278 | 			ldx #1
279 | 			//A is zero
280 | 			
281 | 			bne !runStart-		
282 | 
283 | 	long:
284 | 			//carry is clear and compensated for from the encoder
285 | 			adc (tsget),y
286 | 			sta lzput
287 | 			iny
288 | 			lax (tsget),y
289 | 			ora #$80
290 | 			adc tsput + 1
291 | 				
292 | 			cpx #$80
293 | 			rol lzto + 1
294 | 			ldx #3
295 | 	
296 | 			bne lz_put
297 | 	
298 | }


--------------------------------------------------------------------------------
/decrunch_extreme.asm:
--------------------------------------------------------------------------------
  1 | /*
  2 | 
  3 | decrunch_extreme.asm
  4 | 
  5 | NMOS 6502 decompressor for data stored in TSCrunch format.
  6 | 
  7 | This code is written for the KickAssembler assembler.
  8 | 
  9 | Copyright Antonio Savona 2022.
 10 | 
 11 | */
 12 | 
 13 | 
 14 | //#define INPLACE 		//Enables inplace decrunching. Use -i switch when crunching. 
 15 | 
 16 | .label tsget 	= $f8	//2 bytes
 17 | .label tstemp	= $fa
 18 | .label tsput 	= $fb	//2 bytes
 19 | .label lzput 	= $fd	//2 bytes
 20 | 
 21 | 
 22 | #if INPLACE
 23 | 
 24 | .macro TS_DECRUNCH(src)
 25 | {
 26 | 		lda #<src
 27 | 		sta.zp tsget
 28 | 		lda #>src
 29 | 		sta.zp tsget + 1
 30 | 		jsr tsdecrunch
 31 | }
 32 | 
 33 | #else
 34 | 
 35 | .macro TS_DECRUNCH(src,dst)
 36 | {
 37 | 		lda #<src
 38 | 		sta.zp tsget
 39 | 		lda #>src
 40 | 		sta.zp tsget + 1
 41 | 		lda #<dst
 42 | 		sta.zp tsput
 43 | 		lda #>dst
 44 | 		sta.zp tsput + 1
 45 | 		jsr tsdecrunch
 46 | }
 47 | 
 48 | #endif
 49 | 
 50 | 
 51 | tsdecrunch:
 52 | {
 53 | 	decrunch:
 54 | 
 55 | 	#if INPLACE
 56 | 			ldy #$ff
 57 | 		!:	iny
 58 | 			lda (tsget),y
 59 | 			sta tsput , y	//last iteration trashes lzput, with no effect.
 60 | 			cpy #3
 61 | 			bne !- 
 62 | 			
 63 | 			pha
 64 | 			
 65 | 			lda lzput
 66 | 			sta optRun + 1
 67 | 			
 68 | 			ldx #$d0 //bne opcode
 69 | 			and #1
 70 | 			bne !skp+
 71 | 			ldx #$29 //and immediate opcode
 72 | 		!skp:
 73 | 			stx optOdd 
 74 | 
 75 | 			tya
 76 | 			ldy #0
 77 | 			beq update_getonly
 78 | 	#else 
 79 | 			ldy #0			
 80 | 	
 81 | 			lda (tsget),y
 82 | 			sta optRun + 1
 83 | 
 84 | 			ldx #$d0 //bne opcode
 85 | 			and #1
 86 | 			bne !skp+
 87 | 			ldx #$29 //and immediate opcode
 88 | 		!skp:
 89 | 			stx optOdd 
 90 | 			
 91 | 			inc tsget
 92 | 			bne entry2
 93 | 			inc tsget + 1
 94 | 	#endif
 95 | 	
 96 | 	entry2:		
 97 | 			lax (tsget),y
 98 | 			
 99 | 			bmi rleorlz
100 | 			
101 | 			cmp #$20
102 | 			bcs lz2	
103 | 	//literal
104 | 			
105 | 	#if INPLACE
106 | 			
107 | 			inc tsget
108 | 			beq updatelit_hi
109 | 		return_from_updatelit:
110 | 			and #1
111 | 			bne !odd+
112 | 		
113 | 		ts_delit_loop:
114 | 
115 | 			lda (tsget),y
116 | 			sta (tsput),y
117 | 			iny
118 | 			dex
119 | 		!odd:	
120 | 			lda (tsget),y
121 | 			sta (tsput),y
122 | 			iny
123 | 			dex
124 | 			
125 | 			bne ts_delit_loop	
126 | 			
127 | 			tya
128 | 			tax
129 | 			//carry is clear
130 | 	updatezp:	
131 | 			ldy #0
132 | 	#else	//not inplace
133 | 			tay
134 | 		
135 | 			and #1
136 | 			bne !odd+
137 | 				
138 | 		ts_delit_loop:
139 | 			
140 | 			lda (tsget),y
141 | 			dey
142 | 			sta (tsput),y
143 | 		!odd:	
144 | 			lda (tsget),y
145 | 			dey
146 | 			sta (tsput),y
147 | 			
148 | 			bne ts_delit_loop
149 | 			
150 | 			txa
151 | 			inx
152 | 	#endif
153 | 			
154 | 	updatezp_noclc:
155 | 			adc tsput
156 | 			sta tsput
157 | 			bcs updateput_hi
158 | 		putnoof:
159 | 			txa
160 | 		update_getonly:
161 | 			adc tsget
162 | 			sta tsget
163 | 			bcc entry2
164 | 			inc tsget+1
165 | 			bcs entry2
166 | 	
167 | 	#if INPLACE		
168 | 	updatelit_hi:
169 | 			inc tsget+1
170 | 			bcc return_from_updatelit
171 | 	#endif		
172 | 	updateput_hi:
173 | 			inc tsput+1
174 | 			clc
175 | 			bcc putnoof
176 | 	
177 | 	//LZ2	
178 | 		lz2:
179 | 			beq done
180 | 			
181 | 			ora #$80
182 | 			adc tsput
183 | 			sta lzput
184 | 			lda tsput + 1
185 | 			sbc #$00
186 | 			sta lzput + 1 		
187 | 	
188 | 			//y already zero			
189 | 			lda (lzput),y
190 | 			sta (tsput),y
191 | 			iny		
192 | 			lda (lzput),y
193 | 			sta (tsput),y
194 | 					
195 | 			tya
196 | 			dey
197 | 			
198 | 			adc tsput
199 | 			sta tsput
200 | 			bcs lz2_put_hi
201 | 		!skp:	
202 | 			inc tsget
203 | 			bne entry2
204 | 			inc tsget + 1
205 | 			bne entry2			
206 | 
207 | 		lz2_put_hi:
208 | 			inc tsput + 1
209 | 			bcs !skp-		
210 | 										
211 | 	rleorlz:
212 | 			
213 | 			alr #$7f
214 | 			bcc ts_delz		
215 | 
216 | 		//RLE
217 | 			beq zeroRun
218 | 				
219 | 		plain:
220 | 			
221 | 			iny
222 | 			sta tstemp		//number of bytes to de-rle		
223 | 
224 | 			lsr				//c = test parity
225 | 		
226 | 			lda (tsget),y	//fetch rle byte
227 | 			ldy tstemp
228 | 		runStart:
229 | 			sta (tsput),y
230 | 
231 | 			bcs !odd+
232 | 			sec
233 | 			
234 | 		ts_derle_loop:
235 | 			dey
236 | 			sta (tsput),y
237 | 		!odd:
238 | 	
239 | 			dey
240 | 			sta (tsput),y
241 | 			
242 | 			bne ts_derle_loop
243 | 			
244 | 			//update zero page with a = runlen, x = 2 , y = 0 
245 | 			lda tstemp		
246 | 			ldx #2
247 | 			bcs updatezp_noclc
248 | 	
249 | 					
250 | 	   done:
251 | #if INPLACE	   
252 | 	   		pla
253 | 	   		sta (tsput),y
254 | #endif	   		
255 | 			rts	
256 | 	
257 | 
258 | 	//LZ
259 | 	ts_delz:
260 | 			
261 | 			lsr 
262 | 			sta lzto + 1
263 | 			
264 | 			iny
265 | 			
266 | 			lda tsput
267 | 			bcc long
268 | 			
269 | 			sbc (tsget),y
270 | 			sta lzput
271 | 			lda tsput+1
272 | 	
273 | 			sbc #$00
274 | 		
275 | 			ldx #2
276 | 			//lz MUST decrunch forward	
277 | 	lz_put:
278 | 			sta lzput+1
279 | 			
280 | 			ldy #0
281 | 		
282 | 			lda lzto + 1
283 | 			lsr
284 | 			bcs !odd+
285 | 
286 | 			lda (lzput),y
287 | 			sta (tsput),y
288 | 	ts_delz_loop:			
289 | 			iny
290 | 
291 | 	!odd:		
292 | 
293 | 			lda (lzput),y
294 | 			sta (tsput),y
295 | 
296 | 			iny
297 | 		
298 | 			lda (lzput),y
299 | 			sta (tsput),y
300 | 			
301 | 	lzto:	cpy #0
302 | 			bne ts_delz_loop 
303 | 			
304 | 			tya
305 | #if INPLACE
306 | 			//update zero page with a = runlen, x = 2, y = 0
307 | 			//clc not needed as we have len - 1 in A (from the encoder) and C = 1
308 | 			jmp updatezp
309 | #else			
310 | 			//update zero page with a = runlen, x = 2, y = 0
311 | 			ldy #0
312 | 			//clc not needed as we have len - 1 in A (from the encoder) and C = 1
313 | 			jmp updatezp_noclc
314 | #endif
315 | 	
316 | 	zeroRun:		
317 | 	optRun:	ldy #255
318 | 			sta (tsput),y
319 | 	optOdd:	bne !odd+
320 | 	ts_dezero_loop:
321 | 			dey
322 | 			sta (tsput),y
323 | 		!odd:
324 | 			dey
325 | 			sta (tsput),y
326 | 			bne ts_dezero_loop
327 | 			
328 | 		    lda optRun + 1
329 | 		    
330 | 			ldx #1
331 | 			jmp updatezp_noclc		
332 | 
333 | 	long:
334 | 			//carry is clear and compensated for from the encoder
335 | 			adc (tsget),y
336 | 			sta lzput
337 | 			iny
338 | 			lax (tsget),y
339 | 			ora #$80
340 | 			adc tsput + 1
341 | 				
342 | 			cpx #$80
343 | 			rol lzto + 1
344 | 			ldx #3
345 | 	
346 | 			bne lz_put
347 | 	
348 | }


--------------------------------------------------------------------------------
/decrunch_small.asm:
--------------------------------------------------------------------------------
  1 | /*
  2 | 
  3 | decrunch.asm
  4 | 
  5 | NMOS 6502 decompressor for data stored in TSCrunch format.
  6 | 
  7 | This code is written for the KickAssembler assembler.
  8 | 
  9 | Copyright Antonio Savona 2022.
 10 | 
 11 | */
 12 | 
 13 | 
 14 | //#define INPLACE 		//Enables inplace decrunching. Use -i switch when crunching. 
 15 | 
 16 | .label tsget 	= $f8	//2 bytes
 17 | .label tstemp	= $fa
 18 | .label tsput 	= $fb	//2 bytes
 19 | .label lzput 	= $fd	//2 bytes
 20 | 
 21 | 
 22 | #if INPLACE
 23 | 
 24 | .macro TS_DECRUNCH(src)
 25 | {
 26 | 		lda #<src
 27 | 		sta.zp tsget
 28 | 		lda #>src
 29 | 		sta.zp tsget + 1
 30 | 		jsr tsdecrunch
 31 | }
 32 | 
 33 | #else
 34 | 
 35 | .macro TS_DECRUNCH(src,dst)
 36 | {
 37 | 		lda #<src
 38 | 		sta.zp tsget
 39 | 		lda #>src
 40 | 		sta.zp tsget + 1
 41 | 		lda #<dst
 42 | 		sta.zp tsput
 43 | 		lda #>dst
 44 | 		sta.zp tsput + 1
 45 | 		jsr tsdecrunch
 46 | }
 47 | 
 48 | #endif
 49 | 
 50 | 
 51 | tsdecrunch:
 52 | {
 53 | 	decrunch:
 54 | 
 55 | 	#if INPLACE
 56 | 			ldy #$ff
 57 | 		!:	iny
 58 | 			lda (tsget),y
 59 | 			sta tsput , y	//last iteration trashes lzput, with no effect.
 60 | 			cpy #3
 61 | 			bne !- 
 62 | 			
 63 | 			pha
 64 | 			
 65 | 			lda lzput
 66 | 			sta optRun + 1
 67 | 			
 68 | 			tya
 69 | 			ldy #0
 70 | 			beq update_getonly
 71 | 	#else 
 72 | 			ldy #0			
 73 | 
 74 | 			lda (tsget),y
 75 | 			sta optRun + 1
 76 | 
 77 | 			inc tsget
 78 | 			bne entry2
 79 | 			inc tsget + 1
 80 | 	#endif
 81 | 	
 82 | 	entry2:		
 83 | 			lax (tsget),y
 84 | 			
 85 | 			bmi rleorlz
 86 | 			
 87 | 			cmp #$20
 88 | 			bcs lz2	
 89 | 
 90 | 	//literal
 91 | 			
 92 | 	#if INPLACE
 93 | 			
 94 | 			inc tsget
 95 | 			beq updatelit_hi
 96 | 		return_from_updatelit:
 97 | 		
 98 | 		ts_delit_loop:
 99 | 
100 | 			lda (tsget),y
101 | 			sta (tsput),y
102 | 			iny
103 | 			dex
104 | 			
105 | 			bne ts_delit_loop	
106 | 			
107 | 			tya
108 | 			tax
109 | 			//carry is clear
110 | 		updatezp:		
111 | 			ldy #0
112 | 	#else	//not inplace
113 | 			tay
114 | 			
115 | 		ts_delit_loop:
116 | 			
117 | 			lda (tsget),y
118 | 			dey
119 | 			sta (tsput),y
120 | 			
121 | 			bne ts_delit_loop
122 | 			
123 | 			txa
124 | 			inx
125 | 	#endif
126 | 			
127 | 	updatezp_noclc:
128 | 			adc tsput
129 | 			sta tsput
130 | 			bcs updateput_hi
131 | 		putnoof:
132 | 			txa
133 | 		update_getonly:
134 | 			adc tsget
135 | 			sta tsget
136 | 			bcc entry2
137 | 			inc tsget+1
138 | 			bcs entry2
139 | 	
140 | 	#if INPLACE		
141 | 	updatelit_hi:
142 | 			inc tsget+1
143 | 			bcc return_from_updatelit
144 | 	#endif		
145 | 	updateput_hi:
146 | 			inc tsput+1
147 | 			clc
148 | 			bcc putnoof
149 | 				
150 | 											
151 | 	rleorlz:
152 | 			alr #$7f
153 | 			bcc ts_delz		
154 | 
155 | 		//RLE
156 | 			beq optRun
157 | 				
158 | 		plain:
159 | 			ldx #2
160 | 			iny
161 | 			sta tstemp		//number of bytes to de-rle		
162 | 
163 | 			lda (tsget),y	//fetch rle byte
164 | 			ldy tstemp
165 | 		!runStart:
166 | 			sta (tsput),y
167 | 			
168 | 		ts_derle_loop:
169 | 			
170 | 			dey
171 | 			sta (tsput),y
172 | 
173 | 			bne ts_derle_loop
174 | 			
175 | 			//update zero page with a = runlen, x = 2 , y = 0 
176 | 			lda tstemp		
177 | 
178 | 			bcs updatezp_noclc
179 | 			
180 | 	   done:
181 | #if INPLACE	   
182 | 	   		pla
183 | 	   		sta (tsput),y
184 | #endif	   		
185 | 			rts	
186 | 	//LZ2	
187 | 		lz2:
188 | 			beq done
189 | 			
190 | 			ora #$80
191 | 			adc tsput
192 | 			sta lzput
193 | 			lda tsput + 1
194 | 			sbc #$00
195 | 			sta lzput + 1 		
196 | 	
197 | 			//y already zero			
198 | 			lda (lzput),y
199 | 			sta (tsput),y
200 | 			iny		
201 | 			lda (lzput),y
202 | 			sta (tsput),y
203 | 					
204 | 			tya //y = a = 1. 
205 | 			tax //y = a = x = 1. a + carry = 2
206 | 	#if INPLACE
207 | 			bne updatezp
208 | 	#else
209 | 			dey //ldy #0			
210 | 			beq updatezp_noclc
211 | 	#endif
212 | 	//LZ
213 | 	ts_delz:
214 | 			
215 | 			lsr 
216 | 			sta lzto + 1
217 | 			
218 | 			iny
219 | 			
220 | 			lda tsput
221 | 			bcc long
222 | 			
223 | 			sbc (tsget),y
224 | 			sta lzput
225 | 			lda tsput+1
226 | 	
227 | 			sbc #$00
228 | 		
229 | 			ldx #2
230 | 			//lz MUST decrunch forward	
231 | 	lz_put:
232 | 			sta lzput+1
233 | 			
234 | 			ldy #0
235 | 	
236 | 			lda (lzput),y
237 | 			sta (tsput),y
238 | 	
239 | 	ts_delz_loop:
240 | 	
241 | 			iny
242 | 		
243 | 			lda (lzput),y
244 | 			sta (tsput),y
245 | 			
246 | 	lzto:	cpy #0
247 | 			bne ts_delz_loop 
248 | 			
249 | 			tya
250 | 	#if INPLACE
251 | 			//update zero page with a = runlen, x = 2, y = 0
252 | 			//clc not needed as we have len - 1 in A (from the encoder) and C = 1			
253 | 			bcs updatezp
254 | 	#else		
255 | 			//update zero page with a = runlen, x = 2, y = 0
256 | 			ldy #0
257 | 			//clc not needed as we have len - 1 in A (from the encoder) and C = 1			
258 | 			bcs updatezp_noclc
259 | 	#endif
260 | 		
261 | 	optRun:	
262 | 			ldy #255
263 | 			sty tstemp
264 | 
265 | 			ldx #1
266 | 			//A is zero		
267 | 			bne !runStart-		
268 | 
269 | 	long:
270 | 			//carry is clear and compensated for from the encoder
271 | 			adc (tsget),y
272 | 			sta lzput
273 | 			iny
274 | 			lax (tsget),y
275 | 			ora #$80
276 | 			adc tsput + 1
277 | 				
278 | 			cpx #$80
279 | 			rol lzto + 1
280 | 			ldx #3
281 | 	
282 | 			bne lz_put
283 | 	
284 | }


--------------------------------------------------------------------------------
/readme.txt:
--------------------------------------------------------------------------------
  1 | TSCrunch V1.3.1
  2 | 
  3 | by Antonio Savona
  4 | 
  5 | March 2025
  6 | 
  7 | 
  8 | About
  9 | =====
 10 | 
 11 | TSCrunch is an optimal, byte-aligned, LZ+RLE hybrid encoder, designed to maximize decoding speed on NMOS 6502 and derived CPUs, while keeping decent compression performance (for a bytecruncher, that is).
 12 | TSCrunch was designed as the default asset cruncher for the game A Pig Quest, and, as such, it's optimized for in-memory level compression, but as of version 1.0 it can also create SFX executables for off-line prg crunching.
 13 | 
 14 | Requirements
 15 | ============
 16 | 
 17 | TSCrunch is written in GO and therefore should run on any machine that supports a go compiler.
 18 | Precompiled binaries are available for the following platforms:
 19 | - windows x64
 20 | - windows arm64
 21 | - linux x64
 22 | - mac / darwin arm64
 23 | 
 24 | A python version is also supplied as reference encoder, but use of GO version is recommended for speed. 
 25 | The memory decrunchers requires Kick Assembler, but it should be quite easy to port them to your assembler of choice.
 26 | 
 27 | Usage
 28 | =====
 29 | 
 30 | tscrunch [option] infile outfile
 31 | 
 32 | Crunching examples:
 33 | 
 34 | 	tscrunch -x $0820 game.prg crunched.prg
 35 | 	
 36 | Crunches the file game.prg and generates a self executable crunched.prg, using $0820 as post-decrunch jmp address
 37 | 
 38 | 	tscrunch -p game.prg crunched.bin
 39 | 
 40 | Mem-crunches the file game.prg, stripping the 2-byte header and generates a binary file crunched.bin
 41 | 
 42 | 	tscrunch data.bin crunched.bin
 43 | Mem-crunches the file data.bin and generates a binary file crunched.bin
 44 | 
 45 | 	tscrunch -i data.prg crunched.prg
 46 | Mem-crunches the file data.bin for in-place use, and generates a prg file crunched.prg with the appropriate load address
 47 | 
 48 | 	tscrunch -x2 49152 game.prg crunched.prg
 49 | 	
 50 | Crunches the file game.prg and generates a self executable crunched.prg with alternative decrunching code, using $c000 (49152) as post-decrunch jmp address. The alternative decrunching code runs from stack instead of zero-page
 51 | 
 52 | 	tscrunch -x 0x1000 -b game.prg crunched.prg
 53 | 	
 54 | Crunches the file game.prg and generates a self executable crunched.prg that blank the screen while decrunching, using $1000 (0x1000) as post-decrunch jmp address.
 55 | 
 56 | 
 57 | Please refer to the inline help (tscrunch -h) for a detailed description of the different crunching options.
 58 | Note that with the exception of self executables and in-place, all the files generated by TSCrunch are headless binaries, that is they don't come with a 2 byte loader offset.
 59 | 
 60 | Decrunching files from code
 61 | ===========================
 62 | 
 63 | For memory decrunching, please #include decrunch.asm and include the crunched binaries in your code, then use the macro TS_DECRUNCH, as explained by the following code fragment 
 64 | 
 65 | 		.pc = $1000 "test"
 66 | 		//decrunches data to $4000
 67 | 		:TS_DECRUNCH(compressed_data,$4000) 
 68 | 		jmp *
 69 | 
 70 | 		.align $100
 71 | 		#include "decrunch.asm"
 72 | 		
 73 | 		compressed_data:
 74 | 		.import binary "data.bin"
 75 | 		
 76 | 
 77 | For inplace decrunching, please #define INPLACE before including the decruncher code, as explained by the following code fragment
 78 | 
 79 | 		#define INPLACE
 80 | 
 81 | 		.pc = $1000 "test"
 82 | 		//decrunches data inplace
 83 | 		:TS_DECRUNCH(compressed_data) 
 84 | 		jmp *
 85 | 
 86 | 		.align $100
 87 | 		#include "decrunch.asm"
 88 | 		
 89 | 		.pc = LoadAddress //as provided by the cruncher
 90 | 		compressed_data:
 91 | 		.import c64 "data.bin"
 92 | 
 93 | 
 94 | decruncher.asm is the recommended decruncher for the general case, but other than it two alternative decrunchers are supplied: a small version, which saves some bytes at the cost of speed, and an extreme version which is generally marginally faster, but comes with a larger footprint. 
 95 | 
 96 | 
 97 | Performance
 98 | ===========
 99 | 
100 | TSCrunch is designed for ultra-fast decrunching while keeping a decent compression ratio. Being a byte-cruncher, it falls short of popular bit-crunchers, such as exomizer or B2, when comparing compression efficiency, but it is usually much faster at decoding. Furthermore, you can expect a 20% to 40% speed bump compared to popular byte-crunchers with similar compression efficiency.
101 | The following benchmark compares TSCrunch performance with those of a fast byte-cruncher, TinyCrunch, and a fast bit-cruncher, B2, on a real-case compression scenario: Chopper Command, from the same author.
102 | 
103 | 
104 | Chopper Command - Raw encoding	 - game prg
105 | 			
106 | 		Tscrunch 1.3		TinyCrunch 1.2		B2
107 | Size		46913			46913			46913
108 | Crunched size	12506			15419			11181
109 | % of original	26.66%			32.87%			23.83%
110 | Decrunch cycles	754733			1133039			1694585
111 | Cycles per byte	16.08792872		24.15191951		36.12186388
112 | 
113 | 
114 | Changelog
115 | =========
116 | 
117 | 1.3.1
118 | -TSCrunch is now available on three platforms: Windows, Linux, and macOS.
119 | -Vastly improved crunching speed, with a reduced memory footprint for both the executable and the Python version.
120 | -Slightly improved compression for SFX files.
121 | -Slightly smaller decrunchers when using the INPLACE option.
122 | 
123 | 1.3
124 | -Improved compression adding near-optimal zero-runs and refactoring literals and lz2 tokens
125 | -Improved decrunching speed 
126 | -Added extreme and small decruncher versions, for maximum speed and minimal footprint respectively
127 | 
128 | 1.2
129 | -Added long matches to improve compression with no effect on decrunching speed
130 | -Fixed bug in LZ2 search that would prevent some short matches from being identified 
131 | -Code is available also in Go to improve crunching speed and increase portability
132 | 
133 | 1.1
134 | -Added Inplace compression
135 | -Minor speed improvement 
136 | 
137 | 1.0
138 | -Initial release


--------------------------------------------------------------------------------
/tscrunch.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | TSCrunch v1.3.1 binary cruncher, by Antonio Savona
  3 | */
  4 | 
  5 | package main
  6 | 
  7 | import (
  8 | 	"bytes"
  9 | 	"container/heap"
 10 | 	"flag"
 11 | 	"fmt"
 12 | 	"math"
 13 | 	"os"
 14 | 	"runtime"
 15 | 	"strconv"
 16 | 	"sync"
 17 | 	"time"
 18 | )
 19 | 
 20 | // ----------------------
 21 | // Local Dijkstra Implementation
 22 | // ----------------------
 23 | 
 24 | // Arc represents an edge from one vertex to another with a weight.
 25 | type Arc struct {
 26 | 	dest   int
 27 | 	weight int64
 28 | }
 29 | 
 30 | // Graph holds an adjacency list representation.
 31 | type Graph struct {
 32 | 	arcs map[int][]Arc
 33 | 	n    int // total number of vertices
 34 | }
 35 | 
 36 | // NewGraph creates a new graph with n vertices.
 37 | func NewGraph(n int) *Graph {
 38 | 	return &Graph{
 39 | 		arcs: make(map[int][]Arc, n),
 40 | 		n:    n,
 41 | 	}
 42 | }
 43 | 
 44 | // AddVertex ensures that vertex v exists.
 45 | func (g *Graph) AddVertex(v int) {
 46 | 	if _, ok := g.arcs[v]; !ok {
 47 | 		g.arcs[v] = []Arc{}
 48 | 	}
 49 | }
 50 | 
 51 | // AddArc adds a directed edge from u to v with the given weight.
 52 | func (g *Graph) AddArc(u, v int, weight int64) {
 53 | 	g.arcs[u] = append(g.arcs[u], Arc{dest: v, weight: weight})
 54 | }
 55 | 
 56 | // Item is an element in the priority queue.
 57 | type Item struct {
 58 | 	vertex   int
 59 | 	priority int64
 60 | 	index    int // index in the heap
 61 | }
 62 | 
 63 | // PriorityQueue implements heap.Interface.
 64 | type PriorityQueue []*Item
 65 | 
 66 | func (pq PriorityQueue) Len() int { return len(pq) }
 67 | func (pq PriorityQueue) Less(i, j int) bool {
 68 | 	return pq[i].priority < pq[j].priority
 69 | }
 70 | func (pq PriorityQueue) Swap(i, j int) {
 71 | 	pq[i], pq[j] = pq[j], pq[i]
 72 | 	pq[i].index = i
 73 | 	pq[j].index = j
 74 | }
 75 | func (pq *PriorityQueue) Push(x interface{}) {
 76 | 	n := len(*pq)
 77 | 	item := x.(*Item)
 78 | 	item.index = n
 79 | 	*pq = append(*pq, item)
 80 | }
 81 | func (pq *PriorityQueue) Pop() interface{} {
 82 | 	old := *pq
 83 | 	n := len(old)
 84 | 	item := old[n-1]
 85 | 	old[n-1] = nil // avoid memory leak
 86 | 	item.index = -1
 87 | 	*pq = old[0 : n-1]
 88 | 	return item
 89 | }
 90 | 
 91 | // Shortest computes the shortest path from source to target using Dijkstra’s algorithm.
 92 | // It returns the path (as a slice of vertex indices), the total cost, and a flag indicating success.
 93 | func (g *Graph) Shortest(source, target int) (path []int, cost int64, found bool) {
 94 | 	const INF = math.MaxInt64
 95 | 	dist := make([]int64, g.n)
 96 | 	prev := make([]int, g.n)
 97 | 	for i := 0; i < g.n; i++ {
 98 | 		dist[i] = INF
 99 | 		prev[i] = -1
100 | 	}
101 | 	dist[source] = 0
102 | 
103 | 	pq := make(PriorityQueue, 0, g.n)
104 | 	heap.Init(&pq)
105 | 	heap.Push(&pq, &Item{vertex: source, priority: 0})
106 | 
107 | 	for pq.Len() > 0 {
108 | 		item := heap.Pop(&pq).(*Item)
109 | 		u := item.vertex
110 | 		if u == target {
111 | 			break
112 | 		}
113 | 		for _, arc := range g.arcs[u] {
114 | 			alt := dist[u] + arc.weight
115 | 			if alt < dist[arc.dest] {
116 | 				dist[arc.dest] = alt
117 | 				prev[arc.dest] = u
118 | 				heap.Push(&pq, &Item{vertex: arc.dest, priority: alt})
119 | 			}
120 | 		}
121 | 	}
122 | 
123 | 	if dist[target] == INF {
124 | 		return nil, 0, false
125 | 	}
126 | 
127 | 	// Reconstruct the path.
128 | 	for u := target; u != -1; u = prev[u] {
129 | 		path = append(path, u)
130 | 	}
131 | 	// Reverse the path to get source->target.
132 | 	for i, j := 0, len(path)-1; i < j; i, j = i+1, j-1 {
133 | 		path[i], path[j] = path[j], path[i]
134 | 	}
135 | 
136 | 	return path, dist[target], true
137 | }
138 | 
139 | // ----------------------
140 | // End Local Dijkstra Implementation
141 | // ----------------------
142 | 
143 | // Go TSCrunch Code
144 | 
145 | type crunchCtx struct {
146 | 	QUIET          bool
147 | 	STATS          bool
148 | 	PRG            bool
149 | 	SFX            bool
150 | 	SFXMODE        int
151 | 	BLANK          bool
152 | 	INPLACE        bool
153 | 	jmp            uint16
154 | 	decrunchTo     uint16
155 | 	loadTo         uint16
156 | 	addr           []byte
157 | 	optimalRun     int
158 | 	crunchedSize   int
159 | 	sourceLen      int
160 | 	sourceAbsLen   int
161 | 	decrunchEnd    uint16
162 | 	prefixArray    map[[MINLZ]byte][]int
163 | 	usePrefixArray bool
164 | }
165 | 
166 | type edge struct {
167 | 	n0 int
168 | 	n1 int
169 | }
170 | 
171 | type token struct {
172 | 	tokentype byte
173 | 	size      int
174 | 	rlebyte   byte
175 | 	offset    int
176 | 	i         int
177 | }
178 | 
179 | type tokenEntry struct {
180 | 	e edge
181 | 	t token
182 | }
183 | 
184 | const LONGESTRLE = 64
185 | const LONGESTLONGLZ = 64
186 | const LONGESTLZ = 32
187 | const LONGESTLITERAL = 31
188 | const MINRLE = 2
189 | const MINLZ = 3
190 | const LZOFFSET = 256
191 | const LONGLZOFFSET = 32767
192 | const LZ2OFFSET = 94
193 | 
194 | const RLEMASK = 0x81
195 | const LZMASK = 0x80
196 | const LITERALMASK = 0x00
197 | const LZ2MASK = 0x00
198 | 
199 | const TERMINATOR = LONGESTLITERAL + 1
200 | 
201 | const LZ2ID = 3
202 | const LZID = 2
203 | const RLEID = 1
204 | const LITERALID = 4
205 | const LONGLZID = 5
206 | const ZERORUNID = 6
207 | 
208 | func min(x, y int) int {
209 | 	if x < y {
210 | 		return x
211 | 	}
212 | 	return y
213 | }
214 | 
215 | func max(x, y int) int {
216 | 	if x > y {
217 | 		return x
218 | 	}
219 | 	return y
220 | }
221 | 
222 | func load_raw(f string) []byte {
223 | 	data, err := os.ReadFile(f)
224 | 	if err == nil {
225 | 		return data
226 | 	}
227 | 	fmt.Println("can't read data")
228 | 	return nil
229 | }
230 | 
231 | func save_raw(f string, data []byte) {
232 | 	os.WriteFile(f, data, 0666)
233 | }
234 | 
235 | func fillPrefixArray(data []byte, ctx *crunchCtx) {
236 | 	ctx.prefixArray = make(map[[MINLZ]byte][]int)
237 | 	for i := 0; i < len(data)-MINLZ; i++ {
238 | 		key := *(*[MINLZ]byte)(data[i:])
239 | 		ctx.prefixArray[key] = append(ctx.prefixArray[key], i)
240 | 	}
241 | }
242 | 
243 | func findall(data []byte, prefix []byte, i int, minlz int, ctx *crunchCtx) <-chan int {
244 | 	c := make(chan int)
245 | 
246 | 	// Full guard against short prefix or bad slice
247 | 	if len(prefix) < MINLZ || len(data) == 0 || minlz < MINLZ || i >= len(data) {
248 | 		close(c)
249 | 		return c
250 | 	}
251 | 
252 | 	x0 := max(0, i-LONGLZOFFSET)
253 | 	x1 := min(i+minlz-1, len(data))
254 | 
255 | 	if ctx.usePrefixArray {
256 | 		// FULL GUARD before accessing key
257 | 		var key [MINLZ]byte
258 | 		copy(key[:], prefix) // will zero-fill if prefix is too short
259 | 		parray := ctx.prefixArray[key]
260 | 
261 | 		go func() {
262 | 			if len(parray) == 0 {
263 | 				close(c)
264 | 				return
265 | 			}
266 | 			l := 0
267 | 			h := len(parray) - 1
268 | 			var mid int
269 | 			for l < h {
270 | 				mid = (h + l) >> 1
271 | 				if parray[mid] < i {
272 | 					l = mid + 1
273 | 				} else if parray[mid] > i {
274 | 					h = mid - 1
275 | 				} else {
276 | 					h = mid
277 | 					l = mid
278 | 				}
279 | 			}
280 | 			for o := mid; o >= 0 && o < len(parray) && parray[o] > x0; o-- {
281 | 				if parray[o] < i && parray[o]+minlz <= len(data) && bytes.Equal(data[parray[o]:parray[o]+minlz], prefix) {
282 | 					c <- parray[o]
283 | 				}
284 | 			}
285 | 			close(c)
286 | 		}()
287 | 	} else {
288 | 		go func() {
289 | 			f := 1
290 | 			for f >= 0 {
291 | 				f = bytes.LastIndex(data[x0:x1], prefix)
292 | 				if f >= 0 {
293 | 					c <- f + x0
294 | 					x1 = x0 + f + minlz - 1
295 | 				}
296 | 			}
297 | 			close(c)
298 | 		}()
299 | 	}
300 | 
301 | 	return c
302 | }
303 | 
304 | func findOptimalZeroRun(src []byte) int {
305 | 	zeroruns := make(map[int]int)
306 | 	var i, j int
307 | 	for i < len(src)-1 {
308 | 		if src[i] == 0 {
309 | 			j = i + 1
310 | 			for j < len(src) && src[j] == 0 && j-i < 256 {
311 | 				j++
312 | 			}
313 | 			if j-i >= MINRLE {
314 | 				zeroruns[j-i]++
315 | 			}
316 | 			i = j
317 | 		} else {
318 | 			i++
319 | 		}
320 | 	}
321 | 	if len(zeroruns) > 0 {
322 | 		bestrun := 0
323 | 		bestvalue := 0.0
324 | 		for key, amount := range zeroruns {
325 | 			currentvalue := float64(key) * math.Pow(float64(amount), 1.1)
326 | 			if currentvalue > bestvalue {
327 | 				bestrun = key
328 | 				bestvalue = currentvalue
329 | 			}
330 | 		}
331 | 		return bestrun
332 | 	}
333 | 	return LONGESTRLE
334 | }
335 | 
336 | func tokenCost(n0, n1 int, t byte) int64 {
337 | 	size := int64(n1 - n0)
338 | 	mdiv := int64(LONGESTLITERAL * (1 << 16))
339 | 	switch t {
340 | 	case LZID:
341 | 		return mdiv*2 + 134 - size
342 | 	case LONGLZID:
343 | 		return mdiv*3 + 138 - size
344 | 	case RLEID:
345 | 		return mdiv*2 + 128 - size
346 | 	case ZERORUNID:
347 | 		return mdiv * 1
348 | 	case LZ2ID:
349 | 		return mdiv*1 + 132 - size
350 | 	case LITERALID:
351 | 		return mdiv*(size+1) + 130 - size
352 | 	default:
353 | 		os.Exit(-1)
354 | 	}
355 | 	return 0
356 | }
357 | 
358 | func tokenPayload(src []byte, t token) []byte {
359 | 	n0 := t.i
360 | 	n1 := t.i + t.size
361 | 	switch t.tokentype {
362 | 	case LZID:
363 | 		return []byte{byte(LZMASK | (((t.size - 1) << 2) & 0x7f) | 2), byte(t.offset & 0xff)}
364 | 	case LONGLZID:
365 | 		negoffset := (0 - t.offset)
366 | 		return []byte{byte(LZMASK | (((t.size-1)>>1)<<2)&0x7f), byte(negoffset & 0xff), byte(((negoffset >> 8) & 0x7f) | (((t.size - 1) & 1) << 7))}
367 | 	case RLEID:
368 | 		return []byte{RLEMASK | byte(((t.size-1)<<1)&0x7f), t.rlebyte}
369 | 	case ZERORUNID:
370 | 		return []byte{RLEMASK}
371 | 	case LZ2ID:
372 | 		return []byte{LZ2MASK | byte(0x7f-t.offset)}
373 | 	default:
374 | 		return append([]byte{byte(LITERALMASK | t.size)}, src[n0:n1]...)
375 | 	}
376 | }
377 | 
378 | func LZ(src []byte, i int, size int, offset int, minlz int, ctx *crunchCtx) token {
379 | 	var lz token
380 | 	lz.tokentype = LZID
381 | 	lz.i = i
382 | 	if i >= 0 {
383 | 		bestpos := i - 1
384 | 		bestlen := 0
385 | 		if i+minlz <= len(src) {
386 | 			prefixes := findall(src, src[i:i+minlz], i, minlz, ctx)
387 | 			for j := range prefixes {
388 | 				l := minlz
389 | 				for i+l < len(src) && l < LONGESTLONGLZ && src[j+l] == src[i+l] {
390 | 					l++
391 | 				}
392 | 				if (l > bestlen && (i-j < LZOFFSET || i-bestpos >= LZOFFSET || l > LONGESTLZ)) || (l > bestlen+1) {
393 | 					bestpos = j
394 | 					bestlen = l
395 | 				}
396 | 			}
397 | 		}
398 | 		lz.size = bestlen
399 | 		lz.offset = i - bestpos
400 | 	} else {
401 | 		lz.size = size
402 | 		lz.offset = offset
403 | 	}
404 | 	if lz.size > LONGESTLZ || lz.offset >= LZOFFSET {
405 | 		lz.tokentype = LONGLZID
406 | 	}
407 | 	return lz
408 | }
409 | 
410 | func RLE(src []byte, i int, size int, rlebyte byte) token {
411 | 	var rle token
412 | 	rle.tokentype = RLEID
413 | 	rle.i = i
414 | 	if i >= 0 {
415 | 		rle.rlebyte = src[i]
416 | 		x := 0
417 | 		for i+x < len(src) && x < LONGESTRLE+1 && src[i+x] == src[i] {
418 | 			x++
419 | 		}
420 | 		rle.size = x
421 | 	} else {
422 | 		rle.size = size
423 | 		rle.rlebyte = rlebyte
424 | 	}
425 | 	return rle
426 | }
427 | 
428 | func ZERORUN(src []byte, i int, optimalRun int) token {
429 | 	var zero token
430 | 	zero.tokentype = ZERORUNID
431 | 	zero.i = i
432 | 	zero.rlebyte = 0
433 | 	zero.size = 0
434 | 	if i >= 0 {
435 | 		var x int
436 | 		for x = 0; x < optimalRun && i+x < len(src) && src[i+x] == 0; x++ {
437 | 		}
438 | 		if x == optimalRun {
439 | 			zero.size = optimalRun
440 | 		}
441 | 	}
442 | 	return zero
443 | }
444 | 
445 | func LZ2(src []byte, i int, size int, offset int) token {
446 | 	var lz2 token
447 | 	lz2.tokentype = LZ2ID
448 | 	lz2.offset = -1
449 | 	lz2.size = -1
450 | 	lz2.i = i
451 | 	if i >= 0 {
452 | 		if i+2 < len(src) {
453 | 			leftbound := max(0, i-LZ2OFFSET)
454 | 			lpart := src[leftbound : i+1]
455 | 			o := bytes.LastIndex(lpart, src[i:i+2])
456 | 			if o >= 0 {
457 | 				lz2.offset = i - (o + leftbound)
458 | 				lz2.size = 2
459 | 			}
460 | 		}
461 | 	} else {
462 | 		lz2.size = size
463 | 		lz2.offset = offset
464 | 	}
465 | 	return lz2
466 | }
467 | 
468 | func LIT(i int, size int) token {
469 | 	var lit token
470 | 	lit.tokentype = LITERALID
471 | 	lit.size = size
472 | 	lit.i = i
473 | 	return lit
474 | }
475 | 
476 | // crunchAtByteWorker processes a single source position and returns any tokens found.
477 | func crunchAtByteWorker(src []byte, i int, ctx *crunchCtx) []tokenEntry {
478 | 	entries := []tokenEntry{}
479 | 	rle := RLE(src, i, 0, 0)
480 | 	rlesize := min(rle.size, LONGESTRLE)
481 | 	var lz, lz2 token
482 | 	if rlesize < LONGESTLONGLZ-1 {
483 | 		lz = LZ(src, i, 0, 0, max(rlesize+1, MINLZ), ctx)
484 | 	} else {
485 | 		lz = LZ(src, -1, -1, -1, -1, ctx)
486 | 	}
487 | 	if len(src)-i > 2 {
488 | 		lz2 = LZ2(src, i, 0, 0)
489 | 	}
490 | 	zero := ZERORUN(src, i, ctx.optimalRun)
491 | 	for size := lz.size; size >= MINLZ && size > rlesize; size-- {
492 | 		tokenCopy := LZ(src, -1, size, lz.offset, MINLZ, ctx)
493 | 		entries = append(entries, tokenEntry{e: edge{i, i + size}, t: tokenCopy})
494 | 	}
495 | 	if rle.size > LONGESTRLE {
496 | 		entries = append(entries, tokenEntry{e: edge{i, i + LONGESTRLE}, t: RLE(src, -1, LONGESTRLE, src[i])})
497 | 	} else {
498 | 		for size := rle.size; size >= MINRLE; size-- {
499 | 			entries = append(entries, tokenEntry{e: edge{i, i + size}, t: RLE(src, -1, size, src[i])})
500 | 		}
501 | 	}
502 | 	if lz2.size == 2 {
503 | 		entries = append(entries, tokenEntry{e: edge{i, i + 2}, t: lz2})
504 | 	}
505 | 	if zero.size != 0 {
506 | 		entries = append(entries, tokenEntry{e: edge{i, i + ctx.optimalRun}, t: zero})
507 | 	}
508 | 	return entries
509 | }
510 | 
511 | func crunch(src []byte, ctx *crunchCtx) []byte {
512 | 	// Boot blocks.
513 | 	var boot = []byte{
514 | 		0x01, 0x08, 0x0B, 0x08, 0x0A, 0x00, 0x9E, 0x32, 0x30, 0x36, 0x31, 0x00,
515 | 		0x00, 0x00, 0x78, 0xA2, 0xCC, 0xBD, 0x1A, 0x08, 0x95, 0x00, 0xCA, 0xD0,
516 | 		0xF8, 0x4C, 0x02, 0x00, 0x34, 0xBD, 0x00, 0x10, 0x9D, 0x00, 0xFF, 0xE8,
517 | 		0xD0, 0xF7, 0xC6, 0x07, 0xA9, 0x06, 0xC7, 0x04, 0x90, 0xEF, 0xA0, 0x00,
518 | 		0xB3, 0x24, 0x30, 0x29, 0xC9, 0x20, 0xB0, 0x47, 0xE6, 0x24, 0xD0, 0x02,
519 | 		0xE6, 0x25, 0xB9, 0xFF, 0xFF, 0x99, 0xFF, 0xFF, 0xC8, 0xCA, 0xD0, 0xF6,
520 | 		0x98, 0xAA, 0xA0, 0x00, 0x65, 0x27, 0x85, 0x27, 0xB0, 0x74, 0x8A, 0x65,
521 | 		0x24, 0x85, 0x24, 0x90, 0xD7, 0xE6, 0x25, 0xB0, 0xD3, 0x4B, 0x7F, 0x90,
522 | 		0x39, 0xF0, 0x68, 0xA2, 0x02, 0x85, 0x59, 0xC8, 0xB1, 0x24, 0xA4, 0x59,
523 | 		0x91, 0x27, 0x88, 0x91, 0x27, 0xD0, 0xFB, 0xA9, 0x00, 0xB0, 0xD5, 0xA9,
524 | 		0x37, 0x85, 0x01, 0x58, 0x4C, 0x61, 0x00, 0xF0, 0xF6, 0x09, 0x80, 0x65,
525 | 		0x27, 0x85, 0xA0, 0xA5, 0x28, 0xE9, 0x00, 0x85, 0xA1, 0xB1, 0xA0, 0x91,
526 | 		0x27, 0xC8, 0xB1, 0xA0, 0x91, 0x27, 0x98, 0xAA, 0xD0, 0xB0, 0x4A, 0x85,
527 | 		0xA5, 0xC8, 0xA5, 0x27, 0x90, 0x31, 0xF1, 0x24, 0x85, 0xA0, 0xA5, 0x28,
528 | 		0xE9, 0x00, 0x85, 0xA1, 0xA2, 0x02, 0xA0, 0x00, 0xB1, 0xA0, 0x91, 0x27,
529 | 		0xC8, 0xB1, 0xA0, 0x91, 0x27, 0xC8, 0xB9, 0xA0, 0x00, 0x91, 0x27, 0xC0,
530 | 		0x00, 0xD0, 0xF6, 0x98, 0xB0, 0x84, 0xE6, 0x28, 0x18, 0x90, 0x87, 0xA0,
531 | 		0xFF, 0x84, 0x59, 0xA2, 0x01, 0xD0, 0x99, 0x71, 0x24, 0x85, 0xA0, 0xC8,
532 | 		0xB3, 0x24, 0x09, 0x80, 0x65, 0x28, 0x85, 0xA1, 0xE0, 0x80, 0x26, 0xA5,
533 | 		0xA2, 0x03, 0xD0, 0xC6,
534 | 	}
535 | 
536 | 	var blank_boot = []byte{
537 | 		0x01, 0x08, 0x0B, 0x08, 0x0A, 0x00, 0x9E, 0x32, 0x30, 0x36, 0x31, 0x00,
538 | 		0x00, 0x00, 0x78, 0xA9, 0x0B, 0x8D, 0x11, 0xD0, 0xA2, 0xCC, 0xBD, 0x1F,
539 | 		0x08, 0x95, 0x00, 0xCA, 0xD0, 0xF8, 0x4C, 0x02, 0x00, 0x34, 0xBD, 0x00,
540 | 		0x10, 0x9D, 0x00, 0xFF, 0xE8, 0xD0, 0xF7, 0xC6, 0x07, 0xA9, 0x06, 0xC7,
541 | 		0x04, 0x90, 0xEF, 0xA0, 0x00, 0xB3, 0x24, 0x30, 0x29, 0xC9, 0x20, 0xB0,
542 | 		0x47, 0xE6, 0x24, 0xD0, 0x02, 0xE6, 0x25, 0xB9, 0xFF, 0xFF, 0x99, 0xFF,
543 | 		0xFF, 0xC8, 0xCA, 0xD0, 0xF6, 0x98, 0xAA, 0xA0, 0x00, 0x65, 0x27, 0x85,
544 | 		0x27, 0xB0, 0x74, 0x8A, 0x65, 0x24, 0x85, 0x24, 0x90, 0xD7, 0xE6, 0x25,
545 | 		0xB0, 0xD3, 0x4B, 0x7F, 0x90, 0x39, 0xF0, 0x68, 0xA2, 0x02, 0x85, 0x59,
546 | 		0xC8, 0xB1, 0x24, 0xA4, 0x59, 0x91, 0x27, 0x88, 0x91, 0x27, 0xD0, 0xFB,
547 | 		0xA9, 0x00, 0xB0, 0xD5, 0xA9, 0x37, 0x85, 0x01, 0x58, 0x4C, 0x61, 0x00,
548 | 		0xF0, 0xF6, 0x09, 0x80, 0x65, 0x27, 0x85, 0xA0, 0xA5, 0x28, 0xE9, 0x00,
549 | 		0x85, 0xA1, 0xB1, 0xA0, 0x91, 0x27, 0xC8, 0xB1, 0xA0, 0x91, 0x27, 0x98,
550 | 		0xAA, 0xD0, 0xB0, 0x4A, 0x85, 0xA5, 0xC8, 0xA5, 0x27, 0x90, 0x31, 0xF1,
551 | 		0x24, 0x85, 0xA0, 0xA5, 0x28, 0xE9, 0x00, 0x85, 0xA1, 0xA2, 0x02, 0xA0,
552 | 		0x00, 0xB1, 0xA0, 0x91, 0x27, 0xC8, 0xB1, 0xA0, 0x91, 0x27, 0xC8, 0xB9,
553 | 		0xA0, 0x00, 0x91, 0x27, 0xC0, 0x00, 0xD0, 0xF6, 0x98, 0xB0, 0x84, 0xE6,
554 | 		0x28, 0x18, 0x90, 0x87, 0xA0, 0xFF, 0x84, 0x59, 0xA2, 0x01, 0xD0, 0x99,
555 | 		0x71, 0x24, 0x85, 0xA0, 0xC8, 0xB3, 0x24, 0x09, 0x80, 0x65, 0x28, 0x85,
556 | 		0xA1, 0xE0, 0x80, 0x26, 0xA5, 0xA2, 0x03, 0xD0, 0xC6,
557 | 	}
558 | 
559 | 	var boot2 = []byte{
560 | 		0x01, 0x08, 0x0B, 0x08, 0x0A, 0x00, 0x9E, 0x32, 0x30, 0x36, 0x31, 0x00,
561 | 		0x00, 0x00, 0x78, 0xA9, 0x34, 0x85, 0x01, 0xA2, 0xD0, 0xBD, 0x1F, 0x08,
562 | 		0x9D, 0xFB, 0x00, 0xCA, 0xD0, 0xF7, 0x4C, 0x00, 0x01, 0xAA, 0xAA, 0xAA,
563 | 		0xAA, 0xBD, 0x00, 0x10, 0x9D, 0x00, 0xFF, 0xE8, 0xD0, 0xF7, 0xCE, 0x05,
564 | 		0x01, 0xA9, 0x06, 0xCF, 0x02, 0x01, 0x90, 0xED, 0xA0, 0x00, 0xB3, 0xFC,
565 | 		0x30, 0x27, 0xC9, 0x20, 0xB0, 0x45, 0xE6, 0xFC, 0xD0, 0x02, 0xE6, 0xFD,
566 | 		0xB1, 0xFC, 0x91, 0xFE, 0xC8, 0xCA, 0xD0, 0xF8, 0x98, 0xAA, 0xA0, 0x00,
567 | 		0x65, 0xFE, 0x85, 0xFE, 0xB0, 0x74, 0x8A, 0x65, 0xFC, 0x85, 0xFC, 0x90,
568 | 		0xD9, 0xE6, 0xFD, 0xB0, 0xD5, 0x4B, 0x7F, 0x90, 0x39, 0xF0, 0x68, 0xA2,
569 | 		0x02, 0x85, 0xF9, 0xC8, 0xB1, 0xFC, 0xA4, 0xF9, 0x91, 0xFE, 0x88, 0x91,
570 | 		0xFE, 0xD0, 0xFB, 0xA5, 0xF9, 0xB0, 0xD5, 0xA9, 0x37, 0x85, 0x01, 0x58,
571 | 		0x4C, 0x5F, 0x01, 0xF0, 0xF6, 0x09, 0x80, 0x65, 0xFE, 0x85, 0xFA, 0xA5,
572 | 		0xFF, 0xE9, 0x00, 0x85, 0xFB, 0xB1, 0xFA, 0x91, 0xFE, 0xC8, 0xB1, 0xFA,
573 | 		0x91, 0xFE, 0x98, 0xAA, 0xD0, 0xB0, 0x4A, 0x8D, 0xA3, 0x01, 0xC8, 0xA5,
574 | 		0xFE, 0x90, 0x30, 0xF1, 0xFC, 0x85, 0xFA, 0xA5, 0xFF, 0xE9, 0x00, 0x85,
575 | 		0xFB, 0xA2, 0x02, 0xA0, 0x00, 0xB1, 0xFA, 0x91, 0xFE, 0xC8, 0xB1, 0xFA,
576 | 		0x91, 0xFE, 0xC8, 0xB1, 0xFA, 0x91, 0xFE, 0xC0, 0x00, 0xD0, 0xF7, 0x98,
577 | 		0xB0, 0x84, 0xE6, 0xFF, 0x18, 0x90, 0x87, 0xA0, 0xAA, 0x84, 0xF9, 0xA2,
578 | 		0x01, 0xD0, 0x99, 0x71, 0xFC, 0x85, 0xFA, 0xC8, 0xB3, 0xFC, 0x09, 0x80,
579 | 		0x65, 0xFF, 0x85, 0xFB, 0xE0, 0x80, 0x2E, 0xA3, 0x01, 0xA2, 0x03, 0xD0,
580 | 		0xC6,
581 | 	}
582 | 
583 | 	// Create a graph with len(src)+1 vertices.
584 | 	g := NewGraph(len(src) + 1)
585 | 	for i := 0; i < len(src)+1; i++ {
586 | 		g.AddVertex(i)
587 | 	}
588 | 
589 | 	ctx.sourceLen = len(src)
590 | 	ctx.sourceAbsLen = ctx.sourceLen
591 | 
592 | 	remainder := []byte{}
593 | 	if ctx.PRG {
594 | 		ctx.addr = src[:2]
595 | 		src = src[2:]
596 | 		ctx.decrunchTo = uint16(ctx.addr[0]) + 256*uint16(ctx.addr[1])
597 | 		ctx.sourceAbsLen -= 2
598 | 	}
599 | 
600 | 	if ctx.INPLACE {
601 | 		remainder = src[len(src)-1:]
602 | 		src = src[:len(src)-1]
603 | 	}
604 | 
605 | 	ctx.optimalRun = findOptimalZeroRun(src)
606 | 	if ctx.usePrefixArray {
607 | 		fillPrefixArray(src, ctx)
608 | 	}
609 | 
610 | 	if !ctx.QUIET {
611 | 		fmt.Print("Populating LZ layer")
612 | 	}
613 | 	tm := time.Now()
614 | 
615 | 	// --- Worker pool with collector goroutine ---
616 | 	numWorkers := runtime.GOMAXPROCS(0)
617 | 	jobs := make(chan int, numWorkers*2)
618 | 	results := make(chan tokenEntry, numWorkers*4)
619 | 
620 | 	// Collector: merge results concurrently into tokenMap.
621 | 	tokenMap := make(map[edge]token)
622 | 	var collectorWg sync.WaitGroup
623 | 	collectorWg.Add(1)
624 | 	go func() {
625 | 		defer collectorWg.Done()
626 | 		for entry := range results {
627 | 			tokenMap[entry.e] = entry.t
628 | 		}
629 | 	}()
630 | 
631 | 	// Launch workers.
632 | 	var wg sync.WaitGroup
633 | 	for w := 0; w < numWorkers; w++ {
634 | 		wg.Add(1)
635 | 		go func() {
636 | 			defer wg.Done()
637 | 			for i := range jobs {
638 | 				entries := crunchAtByteWorker(src, i, ctx)
639 | 				for _, entry := range entries {
640 | 					results <- entry
641 | 				}
642 | 			}
643 | 		}()
644 | 	}
645 | 
646 | 	// Send jobs.
647 | 	for i := 0; i < len(src); i++ {
648 | 		jobs <- i
649 | 	}
650 | 	close(jobs)
651 | 	wg.Wait()
652 | 	close(results)
653 | 	collectorWg.Wait()
654 | 	// --- End worker pool ---
655 | 
656 | 	if !ctx.QUIET {
657 | 		if ctx.STATS {
658 | 			fmt.Println(" ...", time.Since(tm))
659 | 		} else {
660 | 			fmt.Println()
661 | 		}
662 | 		fmt.Print("Closing Gaps")
663 | 	}
664 | 	// Fill gaps with literal tokens.
665 | 	for i := 0; i < len(src); i++ {
666 | 		for j := 1; j < min(LONGESTLITERAL+1, len(src)+1-i); j++ {
667 | 			key := edge{i, i + j}
668 | 			if _, exists := tokenMap[key]; !exists {
669 | 				tokenMap[key] = LIT(i, j)
670 | 			}
671 | 		}
672 | 	}
673 | 
674 | 	if !ctx.QUIET {
675 | 		if ctx.STATS {
676 | 			fmt.Println(" ...", time.Since(tm))
677 | 		} else {
678 | 			fmt.Println()
679 | 		}
680 | 		fmt.Print("Populating Graph")
681 | 	}
682 | 	tm = time.Now()
683 | 	for k, t := range tokenMap {
684 | 		g.AddArc(k.n0, k.n1, tokenCost(k.n0, k.n1, t.tokentype))
685 | 	}
686 | 
687 | 	if !ctx.QUIET {
688 | 		if ctx.STATS {
689 | 			fmt.Println(" ...", time.Since(tm))
690 | 		} else {
691 | 			fmt.Println()
692 | 		}
693 | 		fmt.Print("Computing shortest path")
694 | 	}
695 | 	tm = time.Now()
696 | 	bestPath, _, found := g.Shortest(0, len(src))
697 | 	if !found {
698 | 		fmt.Println("No valid path found")
699 | 		os.Exit(1)
700 | 	}
701 | 	if !ctx.QUIET {
702 | 		if ctx.STATS {
703 | 			fmt.Println(" ...", time.Since(tm))
704 | 		} else {
705 | 			fmt.Println()
706 | 		}
707 | 	}
708 | 
709 | 	crunched := make([]byte, 0)
710 | 	token_list := make([]token, 0)
711 | 	for i := 0; i < len(bestPath)-1; i++ {
712 | 		e := edge{bestPath[i], bestPath[i+1]}
713 | 		token_list = append(token_list, tokenMap[e])
714 | 	}
715 | 
716 | 	if ctx.INPLACE {
717 | 		safety := len(token_list)
718 | 		segmentUncrunchedSize := 0
719 | 		segmentCrunchedSize := 0
720 | 		totalUncrunchedSize := 0
721 | 		for i := len(token_list) - 1; i >= 0; i-- {
722 | 			segmentCrunchedSize += len(tokenPayload(src, token_list[i]))
723 | 			segmentUncrunchedSize += token_list[i].size
724 | 			if segmentUncrunchedSize <= segmentCrunchedSize {
725 | 				safety = i
726 | 				totalUncrunchedSize += segmentUncrunchedSize
727 | 				segmentUncrunchedSize = 0
728 | 				segmentCrunchedSize = 0
729 | 			}
730 | 		}
731 | 		for _, t := range token_list[:safety] {
732 | 			crunched = append(crunched, tokenPayload(src, t)...)
733 | 		}
734 | 		if totalUncrunchedSize > 0 {
735 | 			remainder = append(src[len(src)-totalUncrunchedSize:], remainder...)
736 | 		}
737 | 		crunched = append(crunched, TERMINATOR)
738 | 		crunched = append(crunched, remainder[1:]...)
739 | 		crunched = append(remainder[:1], crunched...)
740 | 		crunched = append([]byte{byte(ctx.optimalRun - 1)}, crunched...)
741 | 		crunched = append(ctx.addr, crunched...)
742 | 	} else {
743 | 		for _, t := range token_list {
744 | 			crunched = append(crunched, tokenPayload(src, t)...)
745 | 		}
746 | 		crunched = append(crunched, TERMINATOR)
747 | 		if !ctx.SFX {
748 | 			crunched = append([]byte{byte(ctx.optimalRun - 1)}, crunched...)
749 | 		}
750 | 	}
751 | 
752 | 	ctx.crunchedSize = len(crunched)
753 | 	if ctx.SFX {
754 | 		if ctx.SFXMODE == 0 {
755 | 			gap := 0
756 | 			if ctx.BLANK {
757 | 				gap = 5
758 | 				boot = blank_boot
759 | 			}
760 | 			fileLen := len(boot) + len(crunched)
761 | 			startAddress := 0x10000 - len(crunched)
762 | 			transfAddress := fileLen + 0x6ff
763 | 
764 | 			boot[0x1e+gap] = byte(transfAddress & 0xff)
765 | 			boot[0x1f+gap] = byte(transfAddress >> 8)
766 | 			boot[0x3f+gap] = byte(startAddress & 0xff)
767 | 			boot[0x40+gap] = byte(startAddress >> 8)
768 | 			boot[0x42+gap] = byte(ctx.decrunchTo & 0xff)
769 | 			boot[0x43+gap] = byte(ctx.decrunchTo >> 8)
770 | 			boot[0x7d+gap] = byte(ctx.jmp & 0xff)
771 | 			boot[0x7e+gap] = byte(ctx.jmp >> 8)
772 | 			boot[0xcc+gap] = byte(ctx.optimalRun - 1)
773 | 		} else {
774 | 			boot = boot2
775 | 			fileLen := len(boot) + len(crunched)
776 | 			startAddress := 0x10000 - len(crunched)
777 | 			transfAddress := fileLen + 0x6ff
778 | 
779 | 			boot[0x26] = byte(transfAddress & 0xff)
780 | 			boot[0x27] = byte(transfAddress >> 8)
781 | 			boot[0x21] = byte(startAddress & 0xff)
782 | 			boot[0x22] = byte(startAddress >> 8)
783 | 			boot[0x23] = byte(ctx.decrunchTo & 0xff)
784 | 			boot[0x24] = byte(ctx.decrunchTo >> 8)
785 | 			boot[0x85] = byte(ctx.jmp & 0xff)
786 | 			boot[0x86] = byte(ctx.jmp >> 8)
787 | 			boot[0xd4] = byte(ctx.optimalRun - 1)
788 | 		}
789 | 		crunched = append(boot, crunched...)
790 | 		ctx.crunchedSize += len(boot)
791 | 		ctx.loadTo = 0x0801
792 | 	}
793 | 
794 | 	ctx.decrunchEnd = uint16(int(ctx.decrunchTo) + ctx.sourceAbsLen - 1)
795 | 	if ctx.INPLACE {
796 | 		ctx.loadTo = ctx.decrunchEnd - uint16(len(crunched)) + 1
797 | 		crunched = append([]byte{byte(ctx.loadTo & 255), byte(ctx.loadTo >> 8)}, crunched...)
798 | 	}
799 | 	return crunched
800 | }
801 | 
802 | func usage() {
803 | 	fmt.Println("TSCrunch 1.3.1 - binary cruncher, by Antonio Savona")
804 | 	fmt.Println("Usage: tscrunch [-p] [-i] [-q] [-x[2] $addr] infile outfile")
805 | 	fmt.Println(" -p  : input file is a prg, first 2 bytes are discarded.")
806 | 	fmt.Println(" -x  $addr: creates a self extracting file (forces -p)")
807 | 	fmt.Println(" -x2 $addr: creates a self extracting file with sfx code in stack (forces -p)")
808 | 	fmt.Println(" -b  : blanks screen during decrunching (only with -x)")
809 | 	fmt.Println(" -i  : inplace crunching (forces -p)")
810 | 	fmt.Println(" -q  : quiet mode")
811 | }
812 | 
813 | func main() {
814 | 	ctx := crunchCtx{
815 | 		usePrefixArray: true,
816 | 		STATS:          true,
817 | 	}
818 | 	var jmp_str string
819 | 	var jmp_str2 string
820 | 	flag.BoolVar(&ctx.PRG, "p", false, "")
821 | 	flag.BoolVar(&ctx.QUIET, "q", false, "")
822 | 	flag.BoolVar(&ctx.INPLACE, "i", false, "")
823 | 	flag.StringVar(&jmp_str, "x", "", "")
824 | 	flag.BoolVar(&ctx.BLANK, "b", false, "")
825 | 	flag.StringVar(&jmp_str2, "x2", "", "")
826 | 	flag.Usage = usage
827 | 	flag.Parse()
828 | 
829 | 	if jmp_str != "" {
830 | 		ctx.SFX = true
831 | 		ctx.PRG = true
832 | 		ctx.SFXMODE = 0
833 | 	}
834 | 	if jmp_str2 != "" {
835 | 		ctx.SFX = true
836 | 		ctx.PRG = true
837 | 		ctx.SFXMODE = 1
838 | 		jmp_str = jmp_str2
839 | 	}
840 | 	if ctx.INPLACE {
841 | 		ctx.PRG = true
842 | 	}
843 | 	if flag.NArg() != 2 {
844 | 		usage()
845 | 		os.Exit(2)
846 | 	}
847 | 	if ctx.SFX {
848 | 		if len(jmp_str) == 0 {
849 | 			usage()
850 | 			os.Exit(2)
851 | 		}
852 | 		var jmp uint64
853 | 		var err error
854 | 		// Check if the argument starts with '$'
855 | 		if jmp_str[0] == '$' {
856 | 			jmp, err = strconv.ParseUint(jmp_str[1:], 16, 16)
857 | 		} else if len(jmp_str) > 1 && (jmp_str[:2] == "0x" || jmp_str[:2] == "0X") {
858 | 			// Check for the 0x or 0X prefix
859 | 			jmp, err = strconv.ParseUint(jmp_str[2:], 16, 16)
860 | 		} else {
861 | 			// Otherwise, assume it's a decimal value.
862 | 			jmp, err = strconv.ParseUint(jmp_str, 10, 16)
863 | 		}
864 | 		if err != nil {
865 | 			fmt.Printf("Invalid jump address: %v\n", err)
866 | 			usage()
867 | 			os.Exit(2)
868 | 		}
869 | 		ctx.jmp = uint16(jmp)
870 | 		if ctx.jmp == 0 {
871 | 			usage()
872 | 			os.Exit(2)
873 | 		}
874 | 	}
875 | 
876 | 	ifidx := flag.NArg() - 2
877 | 	ofidx := flag.NArg() - 1
878 | 
879 | 	src := load_raw(flag.Args()[ifidx])
880 | 	crunched := crunch(src, &ctx)
881 | 	save_raw(flag.Args()[ofidx], crunched)
882 | 
883 | 	if !ctx.QUIET {
884 | 		ratio := (float32(ctx.crunchedSize) * 100.0 / float32(ctx.sourceLen))
885 | 		prg := "RAW"
886 | 		dest_prg := "RAW"
887 | 		if ctx.PRG {
888 | 			prg = "PRG"
889 | 		}
890 | 		if ctx.SFX || ctx.INPLACE {
891 | 			dest_prg = "prg"
892 | 		}
893 | 		fmt.Printf("Input file  %s: %s, $%04x - $%04x : %d bytes\n",
894 | 			prg, flag.Args()[ifidx], ctx.decrunchTo, ctx.decrunchEnd, ctx.sourceLen)
895 | 		fmt.Printf("Output file %s: %s, $%04x - $%04x : %d bytes\n",
896 | 			dest_prg, flag.Args()[ofidx], ctx.loadTo, ctx.crunchedSize+int(ctx.loadTo)-1, ctx.crunchedSize)
897 | 		fmt.Printf("Crunched to %.2f%% of original size\n", ratio)
898 | 	}
899 | }
900 | 


--------------------------------------------------------------------------------
/tscrunch.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """
  4 | TSCrunch 1.3.1 - binary cruncher, by Antonio Savona
  5 | """
  6 | 
  7 | import sys
  8 | 
  9 | REVERSELITERAL	=	False
 10 | VERBOSE			=	True
 11 | PRG				=	False
 12 | SFX 			=	False
 13 | SFXMODE 		= 	0
 14 | INPLACE			=	False
 15 | BLANK 			=	False
 16 | 
 17 | DEBUG 			= 	False
 18 | 
 19 | LONGESTRLE		=	64
 20 | LONGESTLONGLZ	=	64 
 21 | LONGESTLZ 		=	32
 22 | LONGESTLITERAL	=	31
 23 | MINRLE			=	2
 24 | MINLZ			=	3
 25 | LZOFFSET 		=	256
 26 | LONGLZOFFSET	=	32767
 27 | LZ2OFFSET 		=	94
 28 | LZ2SIZE 		=	2
 29 | 
 30 | RLEMASK 		= 	0x81
 31 | LZMASK			= 	0x80
 32 | LITERALMASK 	= 	0x00
 33 | LZ2MASK 		=	0x00
 34 | 
 35 | TERMINATOR 		=	LONGESTLITERAL + 1 
 36 | 
 37 | ZERORUNID		=	4
 38 | LZ2ID 			=	3
 39 | LZID 			= 	2
 40 | RLEID 			= 	1
 41 | LITERALID 		= 	0
 42 | 
 43 | 
 44 | from scipy.sparse.csgraph import dijkstra
 45 | from scipy.sparse import csr_matrix
 46 | 
 47 | boot = [
 48 | 
 49 | 	0x01, 0x08, 0x0B, 0x08, 0x0A, 0x00, 0x9E, 0x32, 0x30, 0x36, 0x31, 0x00,
 50 | 	0x00, 0x00, 0x78, 0xA2, 0xCC, 0xBD, 0x1A, 0x08, 0x95, 0x00, 0xCA, 0xD0,
 51 | 	0xF8, 0x4C, 0x02, 0x00, 0x34, 0xBD, 0x00, 0x10, 0x9D, 0x00, 0xFF, 0xE8,
 52 | 	0xD0, 0xF7, 0xC6, 0x07, 0xA9, 0x06, 0xC7, 0x04, 0x90, 0xEF, 0xA0, 0x00,
 53 | 	0xB3, 0x24, 0x30, 0x29, 0xC9, 0x20, 0xB0, 0x47, 0xE6, 0x24, 0xD0, 0x02,
 54 | 	0xE6, 0x25, 0xB9, 0xFF, 0xFF, 0x99, 0xFF, 0xFF, 0xC8, 0xCA, 0xD0, 0xF6,
 55 | 	0x98, 0xAA, 0xA0, 0x00, 0x65, 0x27, 0x85, 0x27, 0xB0, 0x74, 0x8A, 0x65,
 56 | 	0x24, 0x85, 0x24, 0x90, 0xD7, 0xE6, 0x25, 0xB0, 0xD3, 0x4B, 0x7F, 0x90,
 57 | 	0x39, 0xF0, 0x68, 0xA2, 0x02, 0x85, 0x59, 0xC8, 0xB1, 0x24, 0xA4, 0x59,
 58 | 	0x91, 0x27, 0x88, 0x91, 0x27, 0xD0, 0xFB, 0xA9, 0x00, 0xB0, 0xD5, 0xA9,
 59 | 	0x37, 0x85, 0x01, 0x58, 0x4C, 0x61, 0x00, 0xF0, 0xF6, 0x09, 0x80, 0x65,
 60 | 	0x27, 0x85, 0xA0, 0xA5, 0x28, 0xE9, 0x00, 0x85, 0xA1, 0xB1, 0xA0, 0x91,
 61 | 	0x27, 0xC8, 0xB1, 0xA0, 0x91, 0x27, 0x98, 0xAA, 0xD0, 0xB0, 0x4A, 0x85,
 62 | 	0xA5, 0xC8, 0xA5, 0x27, 0x90, 0x31, 0xF1, 0x24, 0x85, 0xA0, 0xA5, 0x28,
 63 | 	0xE9, 0x00, 0x85, 0xA1, 0xA2, 0x02, 0xA0, 0x00, 0xB1, 0xA0, 0x91, 0x27,
 64 | 	0xC8, 0xB1, 0xA0, 0x91, 0x27, 0xC8, 0xB9, 0xA0, 0x00, 0x91, 0x27, 0xC0,
 65 | 	0x00, 0xD0, 0xF6, 0x98, 0xB0, 0x84, 0xE6, 0x28, 0x18, 0x90, 0x87, 0xA0,
 66 | 	0xFF, 0x84, 0x59, 0xA2, 0x01, 0xD0, 0x99, 0x71, 0x24, 0x85, 0xA0, 0xC8,
 67 | 	0xB3, 0x24, 0x09, 0x80, 0x65, 0x28, 0x85, 0xA1, 0xE0, 0x80, 0x26, 0xA5,
 68 | 	0xA2, 0x03, 0xD0, 0xC6
 69 | 	
 70 |  	]
 71 | 
 72 | blank_boot = [
 73 | 
 74 | 	0x01, 0x08, 0x0B, 0x08, 0x0A, 0x00, 0x9E, 0x32, 0x30, 0x36, 0x31, 0x00,
 75 | 	0x00, 0x00, 0x78, 0xA9, 0x0B, 0x8D, 0x11, 0xD0, 0xA2, 0xCC, 0xBD, 0x1F,
 76 | 	0x08, 0x95, 0x00, 0xCA, 0xD0, 0xF8, 0x4C, 0x02, 0x00, 0x34, 0xBD, 0x00,
 77 | 	0x10, 0x9D, 0x00, 0xFF, 0xE8, 0xD0, 0xF7, 0xC6, 0x07, 0xA9, 0x06, 0xC7,
 78 | 	0x04, 0x90, 0xEF, 0xA0, 0x00, 0xB3, 0x24, 0x30, 0x29, 0xC9, 0x20, 0xB0,
 79 | 	0x47, 0xE6, 0x24, 0xD0, 0x02, 0xE6, 0x25, 0xB9, 0xFF, 0xFF, 0x99, 0xFF,
 80 | 	0xFF, 0xC8, 0xCA, 0xD0, 0xF6, 0x98, 0xAA, 0xA0, 0x00, 0x65, 0x27, 0x85,
 81 | 	0x27, 0xB0, 0x74, 0x8A, 0x65, 0x24, 0x85, 0x24, 0x90, 0xD7, 0xE6, 0x25,
 82 | 	0xB0, 0xD3, 0x4B, 0x7F, 0x90, 0x39, 0xF0, 0x68, 0xA2, 0x02, 0x85, 0x59,
 83 | 	0xC8, 0xB1, 0x24, 0xA4, 0x59, 0x91, 0x27, 0x88, 0x91, 0x27, 0xD0, 0xFB,
 84 | 	0xA9, 0x00, 0xB0, 0xD5, 0xA9, 0x37, 0x85, 0x01, 0x58, 0x4C, 0x61, 0x00,
 85 | 	0xF0, 0xF6, 0x09, 0x80, 0x65, 0x27, 0x85, 0xA0, 0xA5, 0x28, 0xE9, 0x00,
 86 | 	0x85, 0xA1, 0xB1, 0xA0, 0x91, 0x27, 0xC8, 0xB1, 0xA0, 0x91, 0x27, 0x98,
 87 | 	0xAA, 0xD0, 0xB0, 0x4A, 0x85, 0xA5, 0xC8, 0xA5, 0x27, 0x90, 0x31, 0xF1,
 88 | 	0x24, 0x85, 0xA0, 0xA5, 0x28, 0xE9, 0x00, 0x85, 0xA1, 0xA2, 0x02, 0xA0,
 89 | 	0x00, 0xB1, 0xA0, 0x91, 0x27, 0xC8, 0xB1, 0xA0, 0x91, 0x27, 0xC8, 0xB9,
 90 | 	0xA0, 0x00, 0x91, 0x27, 0xC0, 0x00, 0xD0, 0xF6, 0x98, 0xB0, 0x84, 0xE6,
 91 | 	0x28, 0x18, 0x90, 0x87, 0xA0, 0xFF, 0x84, 0x59, 0xA2, 0x01, 0xD0, 0x99,
 92 | 	0x71, 0x24, 0x85, 0xA0, 0xC8, 0xB3, 0x24, 0x09, 0x80, 0x65, 0x28, 0x85,
 93 | 	0xA1, 0xE0, 0x80, 0x26, 0xA5, 0xA2, 0x03, 0xD0, 0xC6
 94 | 
 95 | 	]
 96 | 
 97 | boot2 = [
 98 | 
 99 | 	0x01, 0x08, 0x0B, 0x08, 0x0A, 0x00, 0x9E, 0x32, 0x30, 0x36, 0x31, 0x00,
100 | 	0x00, 0x00, 0x78, 0xA9, 0x34, 0x85, 0x01, 0xA2, 0xD0, 0xBD, 0x1F, 0x08,
101 | 	0x9D, 0xFB, 0x00, 0xCA, 0xD0, 0xF7, 0x4C, 0x00, 0x01, 0xAA, 0xAA, 0xAA,
102 | 	0xAA, 0xBD, 0x00, 0x10, 0x9D, 0x00, 0xFF, 0xE8, 0xD0, 0xF7, 0xCE, 0x05,
103 | 	0x01, 0xA9, 0x06, 0xCF, 0x02, 0x01, 0x90, 0xED, 0xA0, 0x00, 0xB3, 0xFC,
104 | 	0x30, 0x27, 0xC9, 0x20, 0xB0, 0x45, 0xE6, 0xFC, 0xD0, 0x02, 0xE6, 0xFD,
105 | 	0xB1, 0xFC, 0x91, 0xFE, 0xC8, 0xCA, 0xD0, 0xF8, 0x98, 0xAA, 0xA0, 0x00,
106 | 	0x65, 0xFE, 0x85, 0xFE, 0xB0, 0x74, 0x8A, 0x65, 0xFC, 0x85, 0xFC, 0x90,
107 | 	0xD9, 0xE6, 0xFD, 0xB0, 0xD5, 0x4B, 0x7F, 0x90, 0x39, 0xF0, 0x68, 0xA2,
108 | 	0x02, 0x85, 0xF9, 0xC8, 0xB1, 0xFC, 0xA4, 0xF9, 0x91, 0xFE, 0x88, 0x91,
109 | 	0xFE, 0xD0, 0xFB, 0xA5, 0xF9, 0xB0, 0xD5, 0xA9, 0x37, 0x85, 0x01, 0x58,
110 | 	0x4C, 0x5F, 0x01, 0xF0, 0xF6, 0x09, 0x80, 0x65, 0xFE, 0x85, 0xFA, 0xA5,
111 | 	0xFF, 0xE9, 0x00, 0x85, 0xFB, 0xB1, 0xFA, 0x91, 0xFE, 0xC8, 0xB1, 0xFA,
112 | 	0x91, 0xFE, 0x98, 0xAA, 0xD0, 0xB0, 0x4A, 0x8D, 0xA3, 0x01, 0xC8, 0xA5,
113 | 	0xFE, 0x90, 0x30, 0xF1, 0xFC, 0x85, 0xFA, 0xA5, 0xFF, 0xE9, 0x00, 0x85,
114 | 	0xFB, 0xA2, 0x02, 0xA0, 0x00, 0xB1, 0xFA, 0x91, 0xFE, 0xC8, 0xB1, 0xFA,
115 | 	0x91, 0xFE, 0xC8, 0xB1, 0xFA, 0x91, 0xFE, 0xC0, 0x00, 0xD0, 0xF7, 0x98,
116 | 	0xB0, 0x84, 0xE6, 0xFF, 0x18, 0x90, 0x87, 0xA0, 0xAA, 0x84, 0xF9, 0xA2,
117 | 	0x01, 0xD0, 0x99, 0x71, 0xFC, 0x85, 0xFA, 0xC8, 0xB3, 0xFC, 0x09, 0x80,
118 | 	0x65, 0xFF, 0x85, 0xFB, 0xE0, 0x80, 0x2E, 0xA3, 0x01, 0xA2, 0x03, 0xD0,
119 | 	0xC6
120 | 
121 | 	]
122 | 
123 | def load_raw(fi):
124 | 	data = bytes(fi.read())
125 | 	return data
126 | 
127 | def save_raw(fo, data):
128 | 	fo.write(bytes(data))
129 | 
130 | #finds all the occurrences of prefix in the range [max(0,i - LONGLZOFFSET) i) 	
131 | #the search window is quite small, so brute force here performs as well as suffix trees
132 | def findall(data, prefix, i, minlz = MINLZ):
133 | 	x0 = max(0, i - LONGLZOFFSET)
134 | 	x1 = min(i + minlz - 1, len(data))
135 | 	f = 1
136 | 	while f >= 0:
137 | 		f = data.rfind(prefix, x0, x1)
138 | 		if f >= 0:
139 | 			yield f
140 | 			x1 = f + minlz - 1
141 | 	
142 | #pretty prints a progress bar	
143 | def progress(description, current, total):
144 | 	percentage = 100 * current // total
145 | 	tchars = 16 * current // total
146 | 	sys.stdout.write("\r%s [%s%s]%02d%%" %(description, '*'*tchars, ' '*(16-tchars), percentage))
147 | 	
148 | 	
149 | def findOptimalZero(src):
150 | 	zeroruns = dict()
151 | 	i = 0
152 | 	while i < len(src) - 1:
153 | 		
154 | 		if src[i] == 0:
155 | 			j = i + 1
156 | 			while j < len(src) and src[j] == 0 and j-i < 256:
157 | 				j+=1
158 | 			if j - i >= MINRLE:
159 | 				zeroruns[j-i] = zeroruns.get(j-i, 0) + 1	
160 | 			i = j
161 | 		else:
162 | 			i+=1
163 | 	
164 | 	if len(zeroruns) > 0:
165 | 		return 	min(list(zeroruns.items()), key = lambda x:-x[0]*(x[1]**1.1))[0]
166 | 	else: 
167 | 		return LONGESTRLE	
168 | 	
169 | 	
170 | class Token:
171 | 	def __init__(self, src = None):
172 | 		self.type = None
173 | 
174 | 
175 | class ZERORUN(Token):
176 | 	def __init__(self, src, i, size = LONGESTRLE, token = None):
177 | 		self.type = ZERORUNID
178 | 		self.size = size
179 | 		if token != None:
180 | 			self.fromToken(token)
181 | 		else:
182 | 			if not(i+size < len(src) and src[i:i+size] == bytes([0] * size)):
183 | 				self.size = 0
184 | 			
185 | 	def getCost(self):
186 | 		return 1
187 | 	
188 | 	def getPayload(self):
189 | 		return [RLEMASK]
190 | 	
191 | class RLE(Token):
192 | 	def __init__(self, src, i, size = None, token = None):
193 | 		self.type = RLEID
194 | 		self.rleByte = src[i]
195 | 		
196 | 		if token != None:
197 | 			self.fromToken(token)
198 | 		
199 | 		elif size == None:
200 | 			x = 0
201 | 			while i + x < len(src) and x < LONGESTRLE + 1 and src[i + x] == src[i]:
202 | 				x+=1
203 | 			self.size = x
204 | 		else:
205 | 			self.size = size
206 | 	
207 | 	def getCost(self):
208 | 		return 2 + 0.00128 - 0.00001 * self.size
209 | 
210 | 	def getPayload(self):
211 | 		return [RLEMASK | (((self.size-1) << 1) & 0x7f ), self.rleByte]
212 | 	
213 | 	
214 | class LZ(Token):
215 | 	def __init__(self, src, i, size = None, offset = None, minlz = MINLZ, token = None):
216 | 		self.type = LZID
217 | 	
218 | 		if token != None:
219 | 			self.fromToken(token)
220 | 			
221 | 		elif size == None: 
222 | 			
223 | 			bestpos , bestlen = i - 1 , 0
224 | 	
225 | 			if len(src) - i >= minlz:
226 | 				for j in findall(src, src[i:i+minlz], i, minlz):
227 | 					
228 | 					l = minlz 
229 | 					while i + l < len(src) and l < LONGESTLONGLZ and src[j + l] == src[i + l] :
230 | 						l+=1
231 | 					if (l > bestlen and (i - j < LZOFFSET or i - bestpos >= LZOFFSET or l > LONGESTLZ)) or (l > bestlen + 1):
232 | 						bestpos, bestlen = j , l
233 | 	
234 | 			self.size = bestlen
235 | 			self.offset = i - bestpos	
236 | 			
237 | 		else:
238 | 			self.size = size
239 | 		if offset != None:
240 | 			self.offset = offset
241 | 			
242 | 	def getCost(self):
243 | 		if (self.offset < LZOFFSET) and (self.size <= LONGESTLZ):
244 | 			return 2 + 0.00134 - 0.00001 * self.size
245 | 		else:
246 | 			return 3 + 0.00138 - 0.00001 * self.size
247 | 		
248 | 	def getPayload(self):
249 | 		if self.offset >= LZOFFSET or self.size > LONGESTLZ:
250 | 			negoffset = (0 - self.offset) 
251 | 			return [LZMASK | ((((self.size - 1)>>1)<< 2) & 0x7f) | 0 , (negoffset & 0xff) , ((negoffset >> 8) & 0x7f) | (((self.size - 1) & 1) << 7 )]	
252 | 		else:
253 | 			return [LZMASK | (((self.size - 1)<< 2) & 0x7f) | 2 , (self.offset & 0xff) ] 
254 | 
255 | 
256 | class LZ2(Token):
257 | 	def __init__(self, src, i, offset = None, token = None):
258 | 		self.type = LZ2ID
259 | 		self.size = 2
260 | 		
261 | 		if token != None:
262 | 			self.fromToken(token)
263 | 			
264 | 		elif offset == None: 
265 | 			if i+2 < len(src):
266 | 				o = src.rfind(src[i:i+LZ2SIZE], max(0, i - LZ2OFFSET), i + 1)
267 | 				if o >= 0:
268 | 					self.offset = i - o
269 | 				else:
270 | 					self.offset = -1
271 | 			
272 | 			else:
273 | 				 self.offset = -1
274 | 			
275 | 		else:
276 | 			self.offset = offset
277 | 		
278 | 
279 | 	def getCost(self):
280 | 		return 1 + 0.00132 - 0.00001 * self.size
281 | 		
282 | 	def getPayload(self):
283 | 		return [LZ2MASK | (127 - self.offset) ]
284 | 	
285 | 	
286 | class LIT(Token):
287 | 	def __init__(self, src, i, token = None):
288 | 		self.type = LITERALID	
289 | 		self.size = 1
290 | 		self.start = i
291 | 
292 | 		if token != None:
293 | 			self.fromToken(token)
294 | 
295 | 	def getCost(self):
296 | 		return self.size + 1 + 0.00130 - 0.00001 * self.size
297 | 
298 | 	def getPayload(self):
299 | 		return bytes([LITERALMASK | (self.size)]) + src[self.start : self.start + self.size]
300 | 	
301 | 	
302 | class Cruncher:
303 | 
304 | 	def __init__(self, src = None):
305 | 		self.crunched = []
306 | 		self.token_list = []
307 | 		self.src = src
308 | 		self.graph = dict()
309 | 		self.crunchedSize = 0
310 | 
311 | 	def get_path(self, p):	
312 | 		i = len(p) - 1
313 | 		path = [i]
314 | 		while p[i] >= 0:
315 | 			path.append(p[i])
316 | 			i = p[i]
317 | 		path.reverse()
318 | 		return list(zip(path[::], path[1::]))
319 | 
320 | 	def prepend(self, data):
321 | 		self.crunched = bytes(data) + bytes(self.crunched)
322 | 
323 | 	def ocrunch(self):
324 | 		from concurrent.futures import ThreadPoolExecutor
325 | 		from multiprocessing import cpu_count
326 | 		import itertools
327 | 		from scipy.sparse import csr_matrix
328 | 		from scipy.sparse.csgraph import dijkstra
329 | 
330 | 		if INPLACE:	
331 | 			remainder = self.src[-1:]
332 | 			src = bytes(self.src[:-1])
333 | 		else:
334 | 			src = bytes(self.src)
335 | 
336 | 		self.optimalRun = findOptimalZero(src)
337 | 
338 | 		progress_string = "Populating LZ layer\t"
339 | 		if VERBOSE:
340 | 			progress(progress_string, 0, 1)
341 | 
342 | 		def process_token_candidates(i):
343 | 			tmp_graph = {}
344 | 			rle = RLE(src, i)
345 | 			rlesize = min(rle.size, LONGESTRLE)
346 | 			if rlesize < LONGESTLONGLZ - 1:
347 | 				lz = LZ(src, i, minlz = max(rlesize + 1, MINLZ))
348 | 			else:
349 | 				lz = LZ(src, i, size = 1)
350 | 
351 | 			while lz.size >= MINLZ and lz.size > rlesize:
352 | 				tmp_graph[(i, i+lz.size)] = lz
353 | 				lz = LZ(src, i, size = lz.size - 1, offset = lz.offset)
354 | 
355 | 			if rle.size > LONGESTRLE:
356 | 				rle = RLE(src, i, LONGESTRLE)
357 | 				tmp_graph[(i, i+LONGESTRLE)] = rle
358 | 			else:
359 | 				while rle.size >= MINRLE:
360 | 					tmp_graph[(i, i+rle.size)] = rle
361 | 					rle = RLE(src, i, rle.size - 1)
362 | 
363 | 			lz2 = LZ2(src, i)
364 | 			if lz2.offset > 0:
365 | 				tmp_graph[(i, i+LZ2SIZE)] = lz2
366 | 
367 | 			zero = ZERORUN(src, i, self.optimalRun)
368 | 			if zero.size > 0:
369 | 				tmp_graph[(i, i+self.optimalRun)] = zero
370 | 
371 | 			return tmp_graph
372 | 
373 | 		with ThreadPoolExecutor(max_workers=cpu_count()) as executor:
374 | 			results = executor.map(process_token_candidates, range(len(src)))
375 | 			for partial_graph in results:
376 | 				self.graph.update(partial_graph)
377 | 
378 | 		if VERBOSE:
379 | 			progress(progress_string, 1, 1)
380 | 			sys.stdout.write('\n')
381 | 
382 | 		progress_string = "Closing gaps\t\t"
383 | 		if VERBOSE:
384 | 			progress(progress_string, 0, 1)
385 | 
386 | 		def fill_literals_chunk(start, end):
387 | 			tmp_graph = {}
388 | 			for i in range(start, end):
389 | 				for j in range(1, min(LONGESTLITERAL + 1, len(src) + 1 - i)):
390 | 					if (i, i + j) not in self.graph:
391 | 						lit = LIT(src, i)
392 | 						lit.size = j
393 | 						tmp_graph[(i, i + j)] = lit
394 | 			return tmp_graph
395 | 
396 | 		chunksize = 512
397 | 		indices = [(i, min(i + chunksize, len(src) - 1)) for i in range(0, len(src) - 1, chunksize)]
398 | 		with ThreadPoolExecutor(max_workers=cpu_count()) as executor:
399 | 			results = executor.map(lambda args: fill_literals_chunk(*args), indices)
400 | 			for partial_graph in results:
401 | 				self.graph.update(partial_graph)
402 | 
403 | 		if VERBOSE:
404 | 			progress(progress_string, 1, 1)
405 | 			sys.stdout.write('\n')
406 | 
407 | 		progress_string = "Populating graph\t"
408 | 		if VERBOSE:
409 | 			progress(progress_string, 0, 3)
410 | 
411 | 		graph_items = list(self.graph.items())
412 | 
413 | 		def compute_weights_for_chunk(chunk):
414 | 			return [v.getCost() for _, v in chunk]
415 | 
416 | 		chunk_size = max(1, len(graph_items) // cpu_count())
417 | 		chunks = [graph_items[i:i + chunk_size] for i in range(0, len(graph_items), chunk_size)]
418 | 
419 | 		with ThreadPoolExecutor(max_workers=cpu_count()) as executor:
420 | 			weights_chunks = list(executor.map(compute_weights_for_chunk, chunks))
421 | 
422 | 		weights = list(itertools.chain.from_iterable(weights_chunks))
423 | 		sources = tuple(s for s, _ in self.graph.keys())
424 | 		targets = tuple(t for _, t in self.graph.keys())
425 | 
426 | 		if VERBOSE:
427 | 			progress(progress_string, 1, 3)
428 | 
429 | 		n = len(src) + 1
430 | 		dgraph = csr_matrix((weights, (sources, targets)), shape=(n, n))
431 | 
432 | 		if VERBOSE:
433 | 			progress(progress_string, 2, 3)
434 | 			progress(progress_string, 3, 3)
435 | 			sys.stdout.write('\ncomputing shortest path\n')
436 | 
437 | 		d, p = dijkstra(dgraph, indices=0, return_predecessors=True)
438 | 		for key in self.get_path(p):
439 | 			self.token_list.append(self.graph[key])
440 | 
441 | 
442 | 		if INPLACE:
443 | 			safety = len(self.token_list)
444 | 			segment_uncrunched_size = 0
445 | 			segment_crunched_size = 0
446 | 			total_uncrunched_size = 0
447 | 			for i in range(len(self.token_list) - 1, -1, -1):
448 | 				segment_crunched_size+=len(self.token_list[i].getPayload()) #token size
449 | 				segment_uncrunched_size+=self.token_list[i].size #decrunched token raw size
450 | 				if segment_uncrunched_size <= segment_crunched_size + 0:
451 | 					safety = i
452 | 					total_uncrunched_size+=segment_uncrunched_size
453 | 					segment_uncrunched_size = 0
454 | 					segment_crunched_size = 0
455 | 
456 | 			for token in (self.token_list[:safety]):
457 | 				self.crunched.extend(token.getPayload())
458 | 			if total_uncrunched_size > 0:
459 | 				remainder = src[-total_uncrunched_size:] + remainder
460 | 			self.crunched.extend(bytes([TERMINATOR]) + remainder[1:])
461 | 			self.crunched = addr + bytes([self.optimalRun - 1]) + remainder[:1] + bytes(self.crunched)
462 | 			
463 | 		else:
464 | 			if not SFX:
465 | 				self.crunched.extend([self.optimalRun - 1])
466 | 			for token in (self.token_list):
467 | 				self.crunched.extend(token.getPayload())	
468 | 			self.crunched = bytes(self.crunched + [TERMINATOR])
469 | 		self.crunchedSize = len(self.crunched)	
470 | 
471 | 		if DEBUG:
472 | 			nlz2 = 0; nlzl = 0; nlz = 0; nrle = 0; nlit = 0; nz = 0; nlit1 = 0
473 | 			lzstat = [0] * (LONGESTLONGLZ + 1)
474 | 
475 | 			for token in self.token_list:
476 | 				if token.type == LITERALID:
477 | 					nlit+=1
478 | 					if token.size == 1:
479 | 						nlit1+=1
480 | 				elif token.type == LZ2ID:
481 | 					nlz2+=1
482 | 				elif token.type == RLEID:
483 | 					nrle +=1
484 | 				elif token.type == ZERORUNID:
485 | 					nz +=1
486 | 				else:
487 | 					stat = (token.getPayload()[0] & 0x7f) >> 2
488 | 					if len(token.getPayload()) == 3:
489 | 						nlzl+=1
490 | 						stat = (stat << 1) | (1 if token.getPayload()[1] >= 128 else 0)
491 | 					else:
492 | 						nlz+=1
493 | 					lzstat[stat+1] += 1
494 | 			
495 | 			tot = sum((nlz, nlzl, nlz2, nrle, nz, nlit))
496 | 			sys.stdout.write ("lz: %d, lzl: %d, lz2: %d, rle: %d, zero: %d, lit: %d (1 = %d) tot: %d\n" % (nlz,nlzl,nlz2,nrle,nz,nlit,nlit1,tot))
497 | 			#for i in range(len(lzstat)):
498 | 			#	if lzstat[i] > 0:
499 | 			#		sys.stdout.write ("lz %d: %d\n" % (i, lzstat[i]))
500 | 			from rich.console import Console
501 | 			from rich.style import Style
502 | 
503 | 			console = Console()
504 | 			data = lzstat
505 | 			max_val = max(data)
506 | 			bar_width = 80
507 | 
508 | 			for i, val in enumerate(data):
509 | 				filled = int(val / max_val * bar_width)
510 | 				empty = bar_width - filled
511 | 				bar = f"[bold green]{'█' * filled}[/][dim]{'░' * empty}[/]"
512 | 				console.print(f"{i:02d}: {bar} ({val})")
513 | 
514 | 
515 | class Decruncher:
516 | 	def __init__(self, src = None):
517 | 
518 | 		self.src = src
519 | 		self.decrunch()
520 | 				
521 | 	def decrunch(self, src = None):
522 | 		
523 | 		if src != None:
524 | 			self.src = src
525 | 		if self.src == None:
526 | 			self.decrunched = None
527 | 		else:
528 | 			
529 | 			nlz2 = 0; nlz = 0; nrle = 0; nz = 0; nlit = 0; 
530 | 			
531 | 			self.decrunched = bytearray([])
532 | 			self.optimalRun = self.src[0] + 1
533 | 			i=1
534 | 			while self.src[i] != TERMINATOR:
535 | 				
536 | 				code = self.src[i]
537 | 				if ((code & 0x80 == LITERALMASK) and code & 0x7f < 32) :
538 | 										
539 | 					run = (code & 0x1f)
540 | 					chunk = self.src[i + 1 : i + run + 1]
541 | 					if REVERSELITERAL:
542 | 						chunk.reverse()
543 | 					self.decrunched.extend(chunk)
544 | 					i+=run + 1
545 | 					nlit+=1
546 | 							
547 | 				elif (code & 0x80 == LZ2MASK):
548 | 					
549 | 					run = LZ2SIZE
550 | 					offset =  127 - (code & 0x7f) 
551 | 					p = len(self.decrunched)
552 | 					for l in range(run):
553 | 						self.decrunched.append(self.decrunched[p - offset + l])
554 | 					i+=1
555 | 					nlz2+=1	
556 | 					
557 | 				elif (code & 0x81) == RLEMASK and (code & 0x7e) != 0:
558 | 					run = ((code & 0x7f) >> 1) + 1
559 | 					self.decrunched.extend([self.src[i+1]] * run)
560 | 					i+=2
561 | 					nrle+=1
562 | 					
563 | 				elif (code & 0x81) == RLEMASK and (code & 0x7e)	== 0:
564 | 					run = self.optimalRun
565 | 					self.decrunched.extend(bytes([0] * run))
566 | 					i+=1
567 | 					nz+=1
568 | 					
569 | 				else:
570 | 					if (code & 2) == 2:
571 | 						run = ((code & 0x7f) >> 2) + 1
572 | 						offset = self.src[i+1]
573 | 						i+=2
574 | 					else:
575 | 						lookahead = self.src[i+2]
576 | 						run = 1 + (((code & 0x7f) >> 2) << 1) + (1 if (lookahead & 128 == 128) else 0)
577 | 						offset =  32768 - (self.src[i+1]  + 256 * (lookahead & 0x7f))
578 | 						i+=3
579 | 					p = len(self.decrunched)
580 | 					for l in range(run):
581 | 						self.decrunched.append(self.decrunched[p - offset + l])			
582 | 					nlz+=1
583 | 					
584 | 			tot = sum((nlz, nlz2, nrle, nz, nlit))
585 | 			sys.stdout.write ("lz: %d, lz2: %d, rle: %d, nz: %d,  lit: %d tot: %d\n" % (nlz, nlz2, nrle, nz, nlit, tot))
586 | 
587 | 			# Token transition analysis
588 | 			from collections import defaultdict, Counter
589 | 			token_ids = []
590 | 			i = 1
591 | 			while self.src[i] != TERMINATOR:
592 | 				code = self.src[i]
593 | 				if ((code & 0x80 == LITERALMASK) and code & 0x7f < 32):
594 | 					token_ids.append(LITERALID)
595 | 					run = (code & 0x1f)
596 | 					i += run + 1
597 | 				elif (code & 0x80 == LZ2MASK):
598 | 					token_ids.append(LZ2ID)
599 | 					i += 1
600 | 				elif (code & 0x81) == RLEMASK and (code & 0x7e) != 0:
601 | 					token_ids.append(RLEID)
602 | 					i += 2
603 | 				elif (code & 0x81) == RLEMASK and (code & 0x7e) == 0:
604 | 					token_ids.append(ZERORUNID)
605 | 					i += 1
606 | 				else:
607 | 					token_ids.append(LZID)
608 | 					if (code & 2) == 2:
609 | 						i += 2
610 | 					else:
611 | 						i += 3
612 | 
613 | 			# Build token bigram frequencies
614 | 			transitions = defaultdict(Counter)
615 | 			for prev, curr in zip(token_ids, token_ids[1:]):
616 | 				transitions[prev][curr] += 1
617 | 
618 | 			# Print the transition frequencies (filtered for freq >= 5)
619 | 			print("\nToken transitions (most common followers):\n")
620 | 			token_names = {LITERALID: "LIT", RLEID: "RLE", LZID: "LZ", LZ2ID: "LZ2", ZERORUNID: "ZERO"}
621 | 			for t, counter in transitions.items():
622 | 				sorted_followers = [(token_names.get(k, k), v) for k, v in counter.items() if v >= 1]
623 | 				sorted_followers.sort(key=lambda x: -x[1])
624 | 				if sorted_followers:
625 | 					print(f"{token_names.get(t, t)}: ", end='')
626 | 					print(", ".join(f"{k}({v})" for k, v in sorted_followers))
627 | 	
628 | def usage():
629 | 	print ("TSCrunch 1.3.1 - binary cruncher, by Antonio Savona")
630 | 	print ("Usage: tscrunch [-p] [-i] [-r] [-q] [-x[2] $addr] infile outfile")
631 | 	print (" -p  : input file is a prg, first 2 bytes are discarded")
632 | 	print (" -x  $addr: creates a self extracting file (forces -p)")
633 | 	print (" -x2 $addr: creates a self extracting file with sfx code in stack (forces -p)")
634 | 	print (" -b  : blanks screen during decrunching (only with -x)")
635 | 	print (" -i  : inplace crunching (forces -p)")
636 | 	print (" -q  : quiet mode")
637 | 	
638 | 
639 | if __name__ == "__main__":
640 | 
641 | 	if "-h" in sys.argv or len(sys.argv) < 3:
642 | 		usage()
643 | 	else:
644 | 	
645 | 		if "-q" in sys.argv:
646 | 			VERBOSE = False
647 | 
648 | 		if "-x" in sys.argv:
649 | 			SFX = True
650 | 			SFXMODE = 0
651 | 			PRG = True
652 | 			jmp_str = sys.argv[sys.argv.index("-x") + 1].strip("$")
653 | 			jmp = int(jmp_str, base = 16)
654 | 		
655 | 		if "-x2" in sys.argv:
656 | 			SFX = True
657 | 			SFXMODE = 1
658 | 			PRG = True
659 | 			jmp_str = sys.argv[sys.argv.index("-x2") + 1].strip("$")
660 | 			jmp = int(jmp_str, base = 16)
661 | 		
662 | 		if "-b" in sys.argv:
663 | 			BLANK = True
664 | 		
665 | 		if "-i" in sys.argv:
666 | 			INPLACE = True
667 | 			PRG = True
668 | 			
669 | 		if "-p" in sys.argv:
670 | 			PRG = True
671 | 		
672 | 		if SFX and INPLACE:
673 | 			sys.stderr.write ("Can't create an sfx prg with inplace crunching\n")
674 | 			exit(-1)
675 | 			
676 | 		fr = open(sys.argv[-2], "rb")
677 | 		src = load_raw(fr)
678 | 
679 | 		sourceLen = len(src)
680 | 		
681 | 		decrunchTo = 0
682 | 		loadTo = 0
683 | 		
684 | 		if PRG:
685 | 			addr = src[:2]
686 | 			src = src[2:]		
687 | 			decrunchTo = addr[0] + 256 * addr[1]
688 | 
689 | 		cruncher = Cruncher(src)
690 | 		cruncher.ocrunch()
691 | 		
692 | 		if SFX:
693 | 			if SFXMODE == 0:
694 | 				gap = 0
695 | 				if BLANK:
696 | 					boot = blank_boot
697 | 					gap = 5
698 | 				
699 | 				fileLen = len(boot) + len(cruncher.crunched)
700 | 				startAddress = 0x10000 - len(cruncher.crunched)
701 | 				transfAddress =  fileLen + 0x6ff
702 | 			
703 | 				boot[0x1e + gap] = transfAddress & 0xff #transfer from
704 | 				boot[0x1f + gap] = transfAddress >> 8
705 | 				
706 | 				boot[0x3f + gap] = startAddress & 0xff # Depack from..
707 | 				boot[0x40 + gap] = startAddress >> 8  
708 | 			    
709 | 				boot[0x42 + gap] = decrunchTo & 0xff # decrunch to..
710 | 				boot[0x43 + gap] = decrunchTo >> 8 
711 | 			    
712 | 				boot[0x7d + gap] = jmp & 0xff; # Jump to..
713 | 				boot[0x7e + gap] = jmp >> 8;   
714 | 				
715 | 				boot[0xcc + gap] = cruncher.optimalRun - 1
716 | 			
717 | 			else:
718 | 				boot = boot2
719 | 				fileLen = len(boot) + len(cruncher.crunched)
720 | 				startAddress = 0x10000 - len(cruncher.crunched)
721 | 				transfAddress =  fileLen + 0x6ff
722 | 			
723 | 				boot[0x26] = transfAddress & 0xff #transfer from
724 | 				boot[0x27] = transfAddress >> 8
725 | 				
726 | 				boot[0x21] = startAddress & 0xff # Depack from..
727 | 				boot[0x22] = startAddress >> 8  
728 | 			    
729 | 				boot[0x23] = decrunchTo & 0xff # decrunch to..
730 | 				boot[0x24] = decrunchTo >> 8 
731 | 			    
732 | 				boot[0x85] = jmp & 0xff; # Jump to..
733 | 				boot[0x86] = jmp >> 8;   
734 | 				
735 | 				boot[0xd4] = cruncher.optimalRun - 1
736 | 				
737 | 			cruncher.prepend(boot)
738 | 
739 | 			cruncher.crunchedSize+=len(boot)
740 | 			loadTo = 0x0801
741 | 			
742 | 			
743 | 		decrunchEnd = decrunchTo + len(src) - 1
744 | 		
745 | 		if INPLACE:
746 | 			loadTo = decrunchEnd - len(cruncher.crunched) + 1
747 | 			cruncher.prepend([loadTo & 255, loadTo >> 8])
748 | 			
749 | 		fo = open(sys.argv[-1], "wb")
750 | 
751 | 		save_raw(fo, cruncher.crunched)
752 | 		fo.close()
753 | 		
754 | 		if VERBOSE:
755 | 			ratio = (float(cruncher.crunchedSize) * 100.0 / sourceLen)
756 | 			print ("input file  %s: %s, $%04x - $%04x : %d bytes" 
757 | 			  %("PRG" if PRG else "RAW", sys.argv[-2], decrunchTo, decrunchEnd, sourceLen))
758 | 			print ("output file %s: %s, $%04x - $%04x : %d bytes" 
759 | 			  %("PRG" if SFX or INPLACE else "RAW", sys.argv[-1],  loadTo, cruncher.crunchedSize + loadTo - 1, cruncher.crunchedSize))
760 | 			print ("crunched to %.2f%% of original size" %ratio)
761 | 			
762 | 		if DEBUG and not (SFX or INPLACE):
763 | 			decruncher = Decruncher(cruncher.crunched)
764 | 		
765 | 			fo = open("test.raw", "wb")
766 | 
767 | 			save_raw(fo, decruncher.decrunched)
768 | 			fo.close()
769 | 		
770 | 			assert(decruncher.decrunched == src)
771 | 


--------------------------------------------------------------------------------