├── installers ├── .gitkeep ├── genozip-osx-arm.tar ├── genozip-osx-x86.tar ├── genozip-installer.exe └── genozip-linux-x86_64.tar ├── src ├── mac │ ├── Resources │ │ └── .gitkeep │ ├── darwinpkg │ │ ├── .gitkeep │ │ └── Library │ │ │ ├── .gitkeep │ │ │ └── genozip │ │ │ └── .gitkeep │ ├── scripts │ │ └── .gitkeep │ ├── welcome.template.html │ ├── postinstall.template.sh │ ├── Distribution │ └── uninstall.template.sh ├── objdir.linux │ ├── .gitkeep │ ├── bsc │ │ └── .gitkeep │ ├── bzlib │ │ └── .gitkeep │ ├── igzip │ │ ├── .gitkeep │ │ ├── noarch │ │ │ └── .gitkeep │ │ ├── x86_64 │ │ │ └── .gitkeep │ │ └── aarch64 │ │ │ └── .gitkeep │ ├── lzma │ │ └── .gitkeep │ ├── secure │ │ ├── .gitkeep │ │ └── license.o │ ├── zlib-ng │ │ └── .gitkeep │ ├── zlib │ │ └── .gitkeep │ ├── htscodecs │ │ └── .gitkeep │ ├── zlib-intel │ │ └── .gitkeep │ ├── libdeflate_1.19 │ │ ├── .gitkeep │ │ ├── arm │ │ │ └── .gitkeep │ │ └── x86 │ │ │ └── .gitkeep │ └── libdeflate_1.7 │ │ └── .gitkeep ├── objdir.osx-arm │ ├── .gitkeep │ └── secure │ │ ├── .gitkeep │ │ └── license.o ├── objdir.osx-x86 │ ├── .gitkeep │ ├── bsc │ │ └── .gitkeep │ ├── bzlib │ │ └── .gitkeep │ ├── igzip │ │ ├── .gitkeep │ │ ├── aarch64 │ │ │ └── .gitkeep │ │ ├── noarch │ │ │ └── .gitkeep │ │ └── x86_64 │ │ │ └── .gitkeep │ ├── lzma │ │ └── .gitkeep │ ├── zlib │ │ └── .gitkeep │ ├── htscodecs │ │ └── .gitkeep │ ├── libdeflate │ │ └── .gitkeep │ ├── secure │ │ ├── .gitkeep │ │ └── license.o │ ├── zlib-intel │ │ └── .gitkeep │ ├── zlib-ng │ │ └── .gitkeep │ ├── libdeflate_1.19 │ │ ├── .gitkeep │ │ ├── arm │ │ │ └── .gitkeep │ │ └── x86 │ │ │ └── .gitkeep │ └── libdeflate_1.7 │ │ └── .gitkeep ├── objdir.windows │ ├── .gitkeep │ ├── bsc │ │ └── .gitkeep │ ├── bzlib │ │ └── .gitkeep │ ├── igzip │ │ ├── .gitkeep │ │ ├── aarch64 │ │ │ └── .gitkeep │ │ ├── noarch │ │ │ └── .gitkeep │ │ └── x86_64 │ │ │ └── .gitkeep │ ├── lzma │ │ └── .gitkeep │ ├── zlib │ │ └── .gitkeep │ ├── htscodecs │ │ └── .gitkeep │ ├── libdeflate │ │ └── .gitkeep │ ├── secure │ │ ├── .gitkeep │ │ └── license.o │ ├── zlib-intel │ │ └── .gitkeep │ ├── zlib-ng │ │ └── .gitkeep │ ├── libdeflate_1.19 │ │ ├── .gitkeep │ │ ├── arm │ │ │ └── .gitkeep │ │ └── x86 │ │ │ └── .gitkeep │ └── libdeflate_1.7 │ │ └── .gitkeep ├── conda │ ├── build.sh │ ├── bld.bat │ ├── generate_README.sh │ └── generate_meta.sh ├── igzip │ ├── flatten_ll.h │ ├── x86_64 │ │ ├── igzip_decode_block_stateless_01.asm │ │ ├── igzip_decode_block_stateless_04.asm │ │ ├── igzip_update_histogram_01.asm │ │ ├── igzip_update_histogram_04.asm │ │ ├── igzip_inflate_multibinary.asm │ │ ├── crc_multibinary.asm │ │ └── bitbuf2.asm │ ├── igzip_checksums.h │ ├── aarch64 │ │ ├── aarch64_label.h │ │ ├── igzip_inflate_multibinary_arm64.S │ │ ├── crc32_gzip_refl_pmull.S │ │ ├── crc_multibinary_arm.S │ │ ├── stdmac_aarch64.h │ │ ├── igzip_multibinary_arm64.S │ │ ├── bitbuf2_aarch64.h │ │ ├── options_aarch64.h │ │ ├── crc_aarch64_dispatcher.c │ │ └── crc32_gzip_refl_crc_ext.S │ ├── flatten_ll.c │ ├── encode_df.c │ ├── encode_df.h │ ├── igzip_level_buf_structs.h │ ├── LICENSE │ ├── noarch │ │ └── crc_base_aliases.c │ ├── igzip_wrapper.h │ └── adler32_base.c ├── windows │ ├── genozip.ico │ ├── genozip-installer.jpg │ └── generate-ifp.sh ├── objdir.mac │ └── secure │ │ └── license.o ├── wsl-make.sh ├── docker │ └── Dockerfile ├── version.h ├── libdeflate_1.7 │ ├── deflate_compress.h │ ├── COPYING │ └── deflate_constants.h ├── libdeflate_1.19 │ ├── deflate_compress.h │ ├── .cirrus.yml │ ├── COPYING │ ├── gzip_constants.h │ ├── deflate_constants.h │ ├── x86 │ │ └── decompress_impl.h │ └── arm │ │ └── matchfinder_impl.h ├── bzlib │ ├── README.bzlib │ └── LICENSE.bzlib ├── genols.h ├── bsc │ ├── AUTHORS.libbsc │ └── adler32.h ├── html-to-md.sed ├── bases_filter.h ├── user_message.h ├── zriter.h ├── biopsy.h ├── mac_compat.h ├── tokenizer.h ├── qname_filter.h ├── aliases.h ├── base64.h ├── zip.h ├── zlib │ └── README.zlib ├── md5.h ├── aes.h ├── coverage.h ├── distribution.c ├── dict_io.h ├── singularity │ └── Singularity ├── tar.h ├── tip.h ├── zip_dyn_int.h ├── b250.h ├── txtheader.h ├── filename.h ├── crypt.h ├── progress.h ├── sam_blasr.c ├── compressor.h ├── sam_cpu.c ├── generic.h ├── license.h ├── fasta_private.h ├── user_message.c ├── stats.h ├── vcf_cosmic.c ├── regions.h ├── sam_bowtie2.c ├── locs.h ├── vcf_1000G.c ├── writer.h ├── vcf_gwas.c ├── url.h ├── codec_none.c ├── lookback.h ├── refhash.h ├── txtfile.h ├── sam_dragen.c ├── vcf_vagrent.c ├── me23.h ├── mac_compat.c ├── vcf_vblock.c ├── aligner.h ├── arch.h ├── chrom.h ├── stream.h ├── bed.h ├── sam_hisat2.c ├── threads.h ├── ref_iupacs.h ├── lzma │ ├── LzHash.h │ ├── LzmaEnc.h │ └── LzFind.h ├── sam_tmap.c ├── hash.h ├── piz.h ├── text_help.h ├── multiplexer.h ├── vcf_melt.c ├── random_access.h ├── htscodecs │ ├── LICENSE.htscodecs.md │ └── rANS_static4x16.h ├── buf_list.h ├── vcf_local_alleles.c ├── sam_gem3.c ├── huffman.h └── mac-pkg-build.sh ├── .vscode ├── .gdbinit ├── c_cpp_properties.json ├── settings.json └── launch.json ├── AUTHORS ├── easybuild ├── build-eb-genozip-latest.sh └── genozip.eb ├── Makefile ├── .gitattributes ├── genozip.code-workspace └── .gitignore /installers/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/mac/Resources/.gitkeep: -------------------------------------------------------------------------------- 1 | hi -------------------------------------------------------------------------------- /src/mac/darwinpkg/.gitkeep: -------------------------------------------------------------------------------- 1 | hi -------------------------------------------------------------------------------- /src/mac/scripts/.gitkeep: -------------------------------------------------------------------------------- 1 | hi -------------------------------------------------------------------------------- /src/objdir.linux/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.osx-arm/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.osx-x86/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.windows/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.linux/bsc/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.linux/bzlib/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.linux/igzip/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.linux/lzma/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.linux/secure/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.linux/zlib-ng/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.linux/zlib/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.osx-x86/bsc/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.osx-x86/bzlib/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.osx-x86/igzip/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.osx-x86/lzma/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.osx-x86/zlib/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.windows/bsc/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.windows/bzlib/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.windows/igzip/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.windows/lzma/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.windows/zlib/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/mac/darwinpkg/Library/.gitkeep: -------------------------------------------------------------------------------- 1 | hi -------------------------------------------------------------------------------- /src/objdir.linux/htscodecs/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.linux/igzip/noarch/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.linux/igzip/x86_64/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.linux/zlib-intel/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.osx-arm/secure/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.osx-x86/htscodecs/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.osx-x86/libdeflate/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.osx-x86/secure/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.osx-x86/zlib-intel/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.osx-x86/zlib-ng/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.windows/htscodecs/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.windows/libdeflate/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.windows/secure/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.windows/zlib-intel/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.windows/zlib-ng/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.linux/igzip/aarch64/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.linux/libdeflate_1.19/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.linux/libdeflate_1.7/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.osx-x86/igzip/aarch64/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.osx-x86/igzip/noarch/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.osx-x86/igzip/x86_64/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.osx-x86/libdeflate_1.19/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.osx-x86/libdeflate_1.7/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.windows/igzip/aarch64/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.windows/igzip/noarch/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.windows/igzip/x86_64/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.windows/libdeflate_1.19/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.windows/libdeflate_1.7/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/mac/darwinpkg/Library/genozip/.gitkeep: -------------------------------------------------------------------------------- 1 | hi -------------------------------------------------------------------------------- /src/objdir.linux/libdeflate_1.19/arm/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.linux/libdeflate_1.19/x86/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.osx-x86/libdeflate_1.19/arm/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.osx-x86/libdeflate_1.19/x86/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.windows/libdeflate_1.19/arm/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/objdir.windows/libdeflate_1.19/x86/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/conda/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | make -C src install -------------------------------------------------------------------------------- /src/igzip/flatten_ll.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void flatten_ll(uint32_t *ll_hist); 4 | -------------------------------------------------------------------------------- /src/windows/genozip.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divonlan/genozip/HEAD/src/windows/genozip.ico -------------------------------------------------------------------------------- /.vscode/.gdbinit: -------------------------------------------------------------------------------- 1 | # see internal-docs/SIGTRAP in memory functions - Windows gdb.txt 2 | handle SIGTRAP nostop 3 | -------------------------------------------------------------------------------- /installers/genozip-osx-arm.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divonlan/genozip/HEAD/installers/genozip-osx-arm.tar -------------------------------------------------------------------------------- /installers/genozip-osx-x86.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divonlan/genozip/HEAD/installers/genozip-osx-x86.tar -------------------------------------------------------------------------------- /installers/genozip-installer.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divonlan/genozip/HEAD/installers/genozip-installer.exe -------------------------------------------------------------------------------- /src/objdir.linux/secure/license.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divonlan/genozip/HEAD/src/objdir.linux/secure/license.o -------------------------------------------------------------------------------- /src/objdir.mac/secure/license.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divonlan/genozip/HEAD/src/objdir.mac/secure/license.o -------------------------------------------------------------------------------- /src/windows/genozip-installer.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divonlan/genozip/HEAD/src/windows/genozip-installer.jpg -------------------------------------------------------------------------------- /installers/genozip-linux-x86_64.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divonlan/genozip/HEAD/installers/genozip-linux-x86_64.tar -------------------------------------------------------------------------------- /src/objdir.osx-arm/secure/license.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divonlan/genozip/HEAD/src/objdir.osx-arm/secure/license.o -------------------------------------------------------------------------------- /src/objdir.osx-x86/secure/license.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divonlan/genozip/HEAD/src/objdir.osx-x86/secure/license.o -------------------------------------------------------------------------------- /src/objdir.windows/secure/license.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/divonlan/genozip/HEAD/src/objdir.windows/secure/license.o -------------------------------------------------------------------------------- /src/igzip/x86_64/igzip_decode_block_stateless_01.asm: -------------------------------------------------------------------------------- 1 | %define ARCH 01 2 | 3 | %include "igzip_decode_block_stateless.asm" 4 | -------------------------------------------------------------------------------- /src/igzip/x86_64/igzip_decode_block_stateless_04.asm: -------------------------------------------------------------------------------- 1 | %define ARCH 04 2 | %define USE_HSWNI 3 | 4 | %include "igzip_decode_block_stateless.asm" 5 | -------------------------------------------------------------------------------- /src/wsl-make.sh: -------------------------------------------------------------------------------- 1 | # this script is run as the "Build Task" menu in Visual Studio Code, for building Linux, as defined in .vscode/tasks.json 2 | make -j -C ~/genozip/src $1 3 | -------------------------------------------------------------------------------- /src/igzip/x86_64/igzip_update_histogram_01.asm: -------------------------------------------------------------------------------- 1 | %define ARCH 01 2 | 3 | %ifndef COMPARE_TYPE 4 | %define COMPARE_TYPE 2 5 | %endif 6 | 7 | %include "igzip_update_histogram.asm" 8 | -------------------------------------------------------------------------------- /src/conda/bld.bat: -------------------------------------------------------------------------------- 1 | make all 2 | 3 | copy src/genozip.exe %LIBRARY_BIN% 4 | copy src/genounzip.exe %LIBRARY_BIN% 5 | copy src/genols.exe %LIBRARY_BIN% 6 | copy src/genocat.exe %LIBRARY_BIN% 7 | -------------------------------------------------------------------------------- /src/igzip/x86_64/igzip_update_histogram_04.asm: -------------------------------------------------------------------------------- 1 | %define ARCH 04 2 | %define USE_HSWNI 3 | 4 | %ifndef COMPARE_TYPE 5 | %define COMPARE_TYPE 3 6 | %endif 7 | 8 | %include "igzip_update_histogram.asm" 9 | -------------------------------------------------------------------------------- /src/conda/generate_README.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash --norc 2 | 3 | # This script is invoked by Makefile 4 | 5 | ( sed /__README_MD__/Q conda/README.template.md ; \ 6 | ./html-to-md.sed ../README.md | grep -v " 5 | 6 | #define MAX_ADLER_BUF (1 << 28) 7 | #define ADLER_MOD 65521 8 | 9 | uint32_t isal_adler32(uint32_t init_crc, const unsigned char *buf, uint64_t len); 10 | uint32_t isal_adler32_bam1(uint32_t init_crc, const unsigned char *buf, uint64_t len); 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /src/version.h: -------------------------------------------------------------------------------- 1 | #define GENOZIP_CODE_VERSION "15.0.75" 2 | 3 | extern int code_version_major (void); 4 | extern int code_version_minor (void); 5 | extern bool version_is_devel (void); 6 | extern StrText code_version (void); 7 | extern StrText file_version (void); 8 | extern StrText version_str (void); 9 | extern void version_background_test_for_newer (void); 10 | extern void version_print_notice_if_has_newer (void); 11 | extern const char *genozip_update_msg (void); 12 | -------------------------------------------------------------------------------- /src/igzip/aarch64/aarch64_label.h: -------------------------------------------------------------------------------- 1 | #ifndef __AARCH64_LABEL_H__ 2 | #define __AARCH64_LABEL_H__ 3 | 4 | #ifdef __USER_LABEL_PREFIX__ 5 | #define CONCAT1(a, b) CONCAT2(a, b) 6 | #define CONCAT2(a, b) a ## b 7 | #define cdecl(x) CONCAT1 (__USER_LABEL_PREFIX__, x) 8 | #else 9 | #define cdecl(x) x 10 | #endif 11 | 12 | #ifdef __APPLE__ 13 | #define ASM_DEF_RODATA .section __TEXT,__const 14 | #else 15 | #define ASM_DEF_RODATA .section .rodata 16 | #endif 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /src/libdeflate_1.7/deflate_compress.h: -------------------------------------------------------------------------------- 1 | #ifndef LIB_DEFLATE_COMPRESS_H 2 | #define LIB_DEFLATE_COMPRESS_H 3 | 4 | #include "lib_common.h" 5 | 6 | /* DEFLATE compression is private to deflate_compress.c, but we do need to be 7 | * able to query the compression level for zlib and gzip header generation. */ 8 | 9 | struct libdeflate_compressor; 10 | 11 | unsigned int deflate_get_compression_level(struct libdeflate_compressor *c); 12 | 13 | #endif /* LIB_DEFLATE_COMPRESS_H */ 14 | -------------------------------------------------------------------------------- /src/libdeflate_1.19/deflate_compress.h: -------------------------------------------------------------------------------- 1 | #ifndef LIB_DEFLATE_COMPRESS_H 2 | #define LIB_DEFLATE_COMPRESS_H 3 | 4 | #include "lib_common.h" 5 | 6 | /* 7 | * DEFLATE compression is private to deflate_compress.c, but we do need to be 8 | * able to query the compression level for zlib and gzip header generation. 9 | */ 10 | 11 | struct libdeflate_compressor; 12 | 13 | unsigned int libdeflate_get_compression_level(struct libdeflate_compressor *c); 14 | 15 | #endif /* LIB_DEFLATE_COMPRESS_H */ 16 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ifeq ($(OS),Windows_NT) 2 | EXE = .exe 3 | endif 4 | 5 | all debug opt clean clean-debug clean-optimized git-pull genozip$(EXE) genounzip$(EXE) genocat$(EXE) genols$(EXE) genozip-prod genozip-prod.exe install distribution-maintenance distribution: 6 | @(cd src ; $(MAKE) $(MAKECMDGOALS)) # note: cd because make -C doesn't work well on Mac 7 | 8 | .PHONY: all debug opt clean clean-debug clean-optimized git-pull genozip$(EXE) genounzip$(EXE) genocat$(EXE) genols$(EXE) genozip-prod genozip-prod.exe install 9 | -------------------------------------------------------------------------------- /src/libdeflate_1.19/.cirrus.yml: -------------------------------------------------------------------------------- 1 | task: 2 | freebsd_instance: 3 | matrix: 4 | - image_family: freebsd-13-2 5 | - image_family: freebsd-14-0 6 | install_script: pkg install -y cmake 7 | script: 8 | - cmake -B build -DLIBDEFLATE_BUILD_TESTS=1 9 | - cmake --build build 10 | - ctest --test-dir build 11 | # Direct compilation without official build system 12 | - cc -O2 -Wall -Werror lib/*.c lib/*/*.c programs/gzip.c programs/prog_util.c programs/tgetopt.c -o libdeflate-gzip 13 | -------------------------------------------------------------------------------- /src/bzlib/README.bzlib: -------------------------------------------------------------------------------- 1 | Note from the author of Genozip regarding the bzlib directory: 2 | 3 | The files contained in this directory are modified files which are part of bzip2/libbzip2 version 1.0.8. Use of these 4 | files is governed by the unmodified license of bzip2/libbzip2 included in this directory. 5 | 6 | The modifications are as follows: 7 | 1) removal of files that are not needed by Genozip 8 | 2) optimization of the code - 64 bit integers, increased block sizes etc 9 | 3) additional APIs needed by Genozip 10 | -------------------------------------------------------------------------------- /src/genols.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // genols.h 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited, 7 | // under penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "genozip.h" 12 | 13 | extern void genols (rom z_filename, bool finalize, rom subdir, bool recursive); 14 | -------------------------------------------------------------------------------- /src/bsc/AUTHORS.libbsc: -------------------------------------------------------------------------------- 1 | -- Authors of bsc and libbsc 2 | 3 | Ilya Grebnov 4 | 5 | -- This program is based on (at least) the work of 6 | 7 | Yuta Mori, Charles Bloom, Julian Seward, Mike Burrows, Matt Mahoney, 8 | David Wheeler, Sebastian Deorowicz, Florin Ghido, Peter Fenwick, 9 | Michael Schindler, Bulat Ziganshin, Eugene Shelwien, Yann Collet, 10 | Dmitry Shkarin, Mark Adler, Przemyslaw Skibinski, Duane Merrill, 11 | Michael Maniscalco, Jarek Duda, Fabian Giesen, Pascal Massimino, 12 | James K. Bonfield, Nania Francesco. 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /src/mac/welcome.template.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 |
8 |
9 |

This will install genozip __VERSION__ on your computer

10 |
11 |

Click “Continue" to continue the setup

12 |
13 | 14 | 15 | -------------------------------------------------------------------------------- /src/html-to-md.sed: -------------------------------------------------------------------------------- 1 | #!/bin/sed -Ef 2 | 3 | # ------------------------------------------------------------------ 4 | # html-to-md.sed 5 | # Copyright (C) 2020-2025 Genozip Limited. Patent Pending. 6 | # Please see terms and conditions in the file LICENSE.txt 7 | # 8 | # WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 9 | # and subject to penalties specified in the license. 10 | 11 | s/
$/ /g 12 | s//\*\*/g 13 | s/<\/b>/\*\*/g 14 | s//\*/g 15 | s/<\/i>/\*/g 16 | s/

//g 17 | s/<\/h1>/\n=======/g 18 | s/ / /g 19 | -------------------------------------------------------------------------------- /src/bases_filter.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // bases_filter.h 3 | // Copyright (C) 2021-2025 Genozip Limited. Patent pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited, 7 | // under penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | extern void iupac_set (rom optarg); 12 | extern void iupac_show (void); 13 | extern bool iupac_is_included_ascii (STRp(seq)); 14 | extern bool iupac_is_included_bam (STRp(seq)); 15 | -------------------------------------------------------------------------------- /.vscode/c_cpp_properties.json: -------------------------------------------------------------------------------- 1 | { 2 | "configurations": [ 3 | { 4 | "name": "Win32", 5 | "includePath": [ 6 | "C:\\Users\\divon\\projects\\genozip\\src\\" 7 | ], 8 | "defines": [ 9 | "_DEBUG", 10 | "UNICODE", 11 | "_UNICODE" 12 | ], 13 | "compilerPath": "C:/msys64/mingw64/bin/gcc.exe", 14 | "cStandard": "gnu11", 15 | "intelliSenseMode": "windows-gcc-x64", 16 | "configurationProvider": "ms-vscode.makefile-tools" 17 | } 18 | ], 19 | "version": 4 20 | } -------------------------------------------------------------------------------- /src/user_message.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // user_message.h 3 | // Copyright (C) 2023-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "genozip.h" 12 | 13 | extern void user_message_init (rom filename); 14 | extern void user_message_compress (void); 15 | extern void user_message_display (void); 16 | extern bool has_user_msg (void); 17 | -------------------------------------------------------------------------------- /src/zriter.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // zriter.h 3 | // Copyright (C) 2023-2025 Genozip Limited. Patent pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited, 7 | // under penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "genozip.h" 12 | 13 | extern void zriter_write (BufferP buf, BufferP section_list, int64_t offset_in_z_file, bool background); 14 | extern void zriter_wait_for_bg_writing (void); 15 | extern void zriter_flush (void); 16 | -------------------------------------------------------------------------------- /src/biopsy.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // biopsy.h 3 | // Copyright (C) 2021-2025 Genozip Limited. Patent pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited, 7 | // under penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "genozip.h" 12 | 13 | extern void biopsy_init (rom optarg); 14 | extern void biopsy_take (VBlockP vb); 15 | extern bool biopsy_is_done (void); 16 | extern void biopsy_data_is_exhausted (void); 17 | extern void biopsy_finalize (void); 18 | -------------------------------------------------------------------------------- /src/mac_compat.h: -------------------------------------------------------------------------------- 1 | #if defined __APPLE__ 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #ifndef CLOCK_REALTIME 10 | 11 | #ifndef mach_time_h 12 | #define mach_time_h 13 | 14 | // only supports a single timer 15 | #define TIMER_ABSTIME -1 16 | #define CLOCK_REALTIME CALENDAR_CLOCK 17 | #define CLOCK_MONOTONIC SYSTEM_CLOCK 18 | 19 | typedef int clockid_t; 20 | 21 | extern int clock_gettime(clockid_t clk_id, struct timespec *tp); 22 | #endif 23 | 24 | #endif 25 | 26 | #endif 27 | 28 | /* Copyright (c) 2015-2018 Alf Watt - Open Source - https://opensource.org/licenses/MIT */ 29 | 30 | -------------------------------------------------------------------------------- /src/tokenizer.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // tokenizer.h 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited, 7 | // under penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "genozip.h" 12 | 13 | extern const char sep_with_space[], sep_without_space[]; 14 | 15 | extern void tokenizer_zip_initialize (void); 16 | extern void tokenizer_seg (VBlockP vb, ContextP field_ctx, STRp(field), rom is_sep, unsigned add_additional_bytes); 17 | -------------------------------------------------------------------------------- /src/qname_filter.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // qname_filter.h 3 | // Copyright (C) 2024-2025 Genozip Limited. Patent pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "genozip.h" 12 | 13 | extern void qname_filter_initialize_from_file (rom filename, CompIType comp_i); 14 | extern void qname_filter_initialize_from_opt (rom opt, CompIType comp_i); 15 | extern bool qname_filter_does_line_survive (VBlockP vb, STRp(qname)); 16 | -------------------------------------------------------------------------------- /src/mac/postinstall.template.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash --norc 2 | 3 | # ------------------------------------------------------------------ 4 | # postinstall 5 | # Copyright (C) 2020-2025 Genozip Limited 6 | # Please see terms and conditions in the file LICENSE.txt 7 | # 8 | # WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 9 | # and subject to penalties specified in the license. 10 | 11 | FILES=(__FILES__) 12 | 13 | [ -d /usr/local/bin ] || mkdir /usr/local/bin 14 | 15 | chmod -R 755 /Library/genozip 16 | 17 | for file in "${FILES[@]}" 18 | do 19 | rm -f /usr/local/bin/${file} 20 | ln -s /Library/genozip/${file} /usr/local/bin/${file} 21 | done 22 | 23 | -------------------------------------------------------------------------------- /src/aliases.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // aliases.h 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "sections.h" 12 | 13 | typedef struct __attribute__ ((packed)) { 14 | AliasType alias_type; // added v15 - up to v14 DictIdAlias contained only alias, dst 15 | DictId alias, dst; 16 | } DictIdAlias; 17 | 18 | extern void aliases_compress (void); 19 | extern ConstBufferP aliases_get (void); 20 | extern void show_aliases (void); 21 | -------------------------------------------------------------------------------- /src/base64.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // base64.h 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "genozip.h" 12 | 13 | extern unsigned base64_encode (bytes in, unsigned in_len, char *b64_str); 14 | extern uint32_t base64_decode (rom b64_str, unsigned *b64_str_len /* in / out */, uint8_t *data, uint32_t data_len); 15 | 16 | #define base64_size(plain_size) ((unsigned)(((plain_size) + 2) / 3) * 4) 17 | #define base64_sizeof(type_or_variable) base64_size(sizeof(type_or_variable)) 18 | -------------------------------------------------------------------------------- /src/zip.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // zip.h 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "genozip.h" 12 | 13 | extern void zip_one_file (rom vcf_basename, bool is_last_user_txt_file); 14 | extern void zip_init_vb (VBlockP vb); 15 | extern bool zip_is_input_exhausted (void); 16 | extern uint64_t zip_get_target_progress (void); 17 | 18 | extern LocalGetLineCB *zip_get_local_data_callback (DataType dt, ContextP ctx); 19 | extern void zip_set_no_stons_if_callback (VBlockP vb); 20 | -------------------------------------------------------------------------------- /src/zlib/README.zlib: -------------------------------------------------------------------------------- 1 | Note from the author of genozip regarding the zlib directory: 2 | 3 | The files contained in this directory are modified files which are part of zlib project that can be found here: https://github.com/madler/zlib. 4 | Use of these files is governed by the unmodified license of which can be found in the file zlib.h. 5 | 6 | The modifications are as follows: 7 | 1) remomed files that are not needed by this project 8 | 2) change z_stream.total_in from uLong to z_off64_t 9 | 3) added a counter z_stream.total_ever_in - counting the total number of bytes in, even across inflateReset 10 | that occurs when gzip blocks are deflated separately and concatenated, as in the BGZF format 11 | 4) added the functions gzconsumed64 and gzinject (canceled in 15.0.43) 12 | 5) replaced adler32 and crc32 with calls to libdeflate 13 | -------------------------------------------------------------------------------- /src/md5.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // md5.h 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "genozip.h" 12 | #include "digest.h" 13 | 14 | extern void md5_initialize (Md5StateP ctx); 15 | extern Digest md5_finalize (Md5StateP ctx); 16 | extern Digest md5_do (const void *data, uint32_t len); 17 | extern void md5_update (Md5StateP ctx, const void *data, uint32_t len); 18 | extern void md5_display_state (const Md5State *ctx); // for debugging 19 | extern Digest md5_read (const char str[32]); -------------------------------------------------------------------------------- /src/aes.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // aes.h 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "genozip.h" 12 | 13 | #define AES_BLOCKLEN 16 // aes encrypts blocks of 128 bits 14 | #define AES_KEYLEN 32 // 256 bit 15 | 16 | extern void aes_initialize (VBlockP vb, bytes key); 17 | extern void aes_xcrypt_buffer (VBlockP vb, uint8_t *data, uint32_t length); 18 | extern StrText aes_display_key (bytes key); 19 | extern rom aes_display_data (bytes data, unsigned data_len); 20 | 21 | -------------------------------------------------------------------------------- /src/coverage.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // coverage.h 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited, 7 | // under penalties specified in the license. 8 | 9 | #include "genozip.h" 10 | 11 | // if updating, update cvr_names in coverage.c too 12 | typedef enum { CVR_OTHER_CONTIGS, CVR_ALL_CONTIGS, CVR_SOFT_CLIP, CVR_UNMAPPED, CVR_SECONDARY, CVR_SUPPLEMENTARY, CVR_FAILED, CVR_DUPLICATE, CVR_TOTAL, NUM_COVER_TYPES } CoverTypes; 13 | 14 | extern void coverage_initialize (VBlockP vb); 15 | extern void coverage_add_one_vb (VBlockP vb); 16 | extern void coverage_show_coverage (void); 17 | extern void coverage_show_idxstats (void); 18 | -------------------------------------------------------------------------------- /src/distribution.c: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // distribution.c 3 | // Copyright (C) 2020-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #include "genozip.h" 10 | #include "version.h" 11 | 12 | rom get_distribution (void) 13 | { 14 | return version_is_devel() ? "devel" : DISTRIBUTION; 15 | } 16 | 17 | bool dist_is_conda (void) { return !strcmp (get_distribution(), "conda" ); } 18 | bool dist_is_github (void) { return !strcmp (get_distribution(), "github" ); } 19 | bool dist_is_installforge (void) { return !strcmp (get_distribution(), "InstallForge" ); } 20 | -------------------------------------------------------------------------------- /src/dict_io.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // dict_io.h 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited, 7 | // under penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | extern void dict_io_read_all_dictionaries (void); 12 | extern void dict_io_compress_dictionaries (void); 13 | 14 | extern StrTextMegaLong str_snip_ex (STRp(snip), bool add_quote); 15 | #define str_snip str_snip_ex (snip, snip_##len, true).s 16 | extern void dict_io_print (FILE *fp, STRp(data), bool with_word_index, bool add_quotation_marks, bool add_newline, bool remove_non_contigs); 17 | extern void dict_io_show_singletons (VBlockP vb, ContextP ctx); 18 | -------------------------------------------------------------------------------- /src/singularity/Singularity: -------------------------------------------------------------------------------- 1 | Bootstrap:docker 2 | From:ubuntu 3 | 4 | %labels 5 | Maintainer divonlan 6 | 7 | %help 8 | Help me. I'm in the container. 9 | 10 | %environment 11 | PATH=/code:${PATH} 12 | export PATH 13 | 14 | %post 15 | #echo "Installing genozip into container..." 16 | 17 | %runscript 18 | exec /code/genozip "$@" 19 | 20 | # apps 21 | 22 | %apprun genozip 23 | exec /code/genozip "$@" 24 | 25 | %apprun genounzip 26 | exec /code/genounzip "$@" 27 | 28 | %apprun genocat 29 | exec /code/genocat "$@" 30 | 31 | %apprun genols 32 | exec /code/genols "$@" 33 | 34 | %apphelp genozip 35 | This is the help for genozip app. 36 | 37 | %apphelp genounzip 38 | This is the help for genounzip app. 39 | 40 | %apphelp genocat 41 | This is the help for genocat app. 42 | 43 | %apphelp genols 44 | This is the help for genols app. 45 | 46 | -------------------------------------------------------------------------------- /src/tar.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // tar.h 3 | // Copyright (C) 2021-2025 Genozip Limited. Patent pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited, 7 | // under penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "genozip.h" 12 | 13 | extern void tar_set_tar_name (rom tar_filename); 14 | extern rom tar_get_tar_name (void); 15 | extern void tar_initialize (BufferP input_files_buf); 16 | extern FILE *tar_open_file (rom fn_on_disk, rom fn_in_tar); 17 | extern void tar_copy_file (rom fn_on_disk, rom fn_in_tar); 18 | extern bool tar_zip_is_tar (void); 19 | extern int64_t tar_file_offset (void); 20 | extern void tar_close_file (void **file); 21 | extern void tar_finalize (void); 22 | -------------------------------------------------------------------------------- /src/tip.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // tip.h 3 | // Copyright (C) 2022-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "genozip.h" 12 | 13 | extern void tip_dt_encountered (DataType dt); 14 | extern void tip_print (void); 15 | 16 | // protect from spamming the user with more than one tip 17 | // Note: use this for learning-curve tips, not for failure cases which should always display 18 | #define TIP(format, ...) ( { if (!flag.no_tip) iprintf ("\n\nTIP: "format "\n\n", __VA_ARGS__); flag.no_tip = true; } ) 19 | #define TIP0(str) ( { if (!flag.no_tip) iprint0 ("\n\nTIP: " str "\n\n"); flag.no_tip = true; } ) 20 | -------------------------------------------------------------------------------- /src/zip_dyn_int.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // dyn_int.h 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "genozip.h" 12 | #include "local_type.h" 13 | 14 | extern void dyn_int_init_ctx (VBlockP vb, ContextP ctx, int64_t value); 15 | extern void dyn_int_append (VBlockP vb, ContextP ctx, int64_t value, unsigned add_bytes); 16 | extern void dyn_int_append_nothing_char (VBlockP vb, ContextP ctx, unsigned add_bytes); 17 | extern LocalType dyn_int_get_ltype (ContextP ctx); 18 | extern rom dyn_int_lt_order_name (uint8_t dyn_lt_order); 19 | extern void dyn_int_transpose (VBlockP vb, ContextP ctx); 20 | -------------------------------------------------------------------------------- /src/conda/generate_meta.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash --norc 2 | 3 | # ------------------------------------------------------------------ 4 | # generate-meta.sh 5 | # Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 6 | # Please see terms and conditions in the file LICENSE.txt 7 | # 8 | # WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 9 | # and subject to penalties specified in the license. 10 | 11 | # This script is invoked by Makefile 12 | 13 | version=$(head -n1 version.h |cut -d\" -f2) 14 | 15 | ( sed /__README_MD__/Q conda/meta.template.yaml ; \ 16 | cat ../README.md | sed "s/<.\{1,3\}>//g"| sed "s/\ / /g" | grep -v "b250)) 17 | 18 | extern void b250_seg_append (VBlockP vb, ContextP vctx, WordIndex node_index); 19 | extern void b250_seg_remove_last (VBlockP vb, ContextP ctx, WordIndex node_index); 20 | extern bool b250_zip_generate (VBlockP vb, ContextP ctx); 21 | 22 | 23 | // PIZ 24 | extern WordIndex b250_piz_decode (bytes *b, bool advance, B250Size b250_size, rom ctx_name); 25 | -------------------------------------------------------------------------------- /src/txtheader.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // txtheader.h 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #include "genozip.h" 10 | 11 | //---------- 12 | // ZIP stuff 13 | //---------- 14 | 15 | extern void txtheader_compress (BufferP vcf_header_text, uint64_t unmodified_txt_header_len, Digest header_digest, bool is_first_vcf, CompIType comp_i); 16 | extern int64_t txtheader_zip_read_and_compress (int64_t *txt_header_offset, CompIType comp_i); 17 | 18 | //---------- 19 | // PIZ stuff 20 | //---------- 21 | 22 | extern void txtheader_piz_read_and_reconstruct (Section txt_header_sec); 23 | extern StrTextLong txtheader_get_txt_filename_from_section (CompIType comp_i); 24 | 25 | -------------------------------------------------------------------------------- /src/igzip/flatten_ll.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "flatten_ll.h" 6 | 7 | void flatten_ll(uint32_t * ll_hist) 8 | { 9 | uint32_t i, j; 10 | uint32_t *s = ll_hist, x, *p; 11 | 12 | s[265] += s[266]; 13 | s[266] = s[267] + s[268]; 14 | s[267] = s[269] + s[270]; 15 | s[268] = s[271] + s[272]; 16 | s[269] = s[273] + s[274] + s[275] + s[276]; 17 | s[270] = s[277] + s[278] + s[279] + s[280]; 18 | s[271] = s[281] + s[282] + s[283] + s[284]; 19 | s[272] = s[285] + s[286] + s[287] + s[288]; 20 | p = s + 289; 21 | for (i = 273; i < 277; i++) { 22 | x = *(p++); 23 | for (j = 1; j < 8; j++) 24 | x += *(p++); 25 | s[i] = x; 26 | } 27 | for (; i < 281; i++) { 28 | x = *(p++); 29 | for (j = 1; j < 16; j++) 30 | x += *(p++); 31 | s[i] = x; 32 | } 33 | for (; i < 285; i++) { 34 | x = *(p++); 35 | for (j = 1; j < 32; j++) 36 | x += *(p++); 37 | s[i] = x; 38 | } 39 | s[284] -= s[512]; 40 | s[285] = s[512]; 41 | } 42 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Set the default behavior, in case people don't have core.autocrlf set. 2 | * text=lf 3 | 4 | *.sh eol=lf 5 | *.txt eol=lf 6 | *.md eol=lf 7 | *.eb eol=lf 8 | *.json eol=lf 9 | *.sed eol=lf 10 | *.c eol=lf 11 | *.cpp eol=lf 12 | *.h eol=lf 13 | *.asm eol=lf 14 | *.S eol=lf 15 | *.yaml eol=lf 16 | Makefile eol=lf 17 | 18 | # Declare our textual formats as binary so we can check in test files with Unix or Windows style end-of-lines 19 | *.vcf binary 20 | *.bcf binary 21 | *.sam binary 22 | *.bam binary 23 | *.cram binary 24 | *.gff3 binary 25 | *.gff binary 26 | *.gvf binary 27 | *.gtf binary 28 | genome_*.txt binary 29 | *.fa binary 30 | *.fasta binary 31 | *.fa binary 32 | *.faa binary 33 | *.ffn binary 34 | *.fnn binary 35 | *.fna binary 36 | *.frn binary 37 | *.fas binary 38 | *.fq binary 39 | *.fastq binary 40 | *.phy binary 41 | *.bed binary 42 | *.kraken binary 43 | *.chain binary 44 | *.locs binary 45 | *.genozip binary 46 | 47 | # installer files 48 | *.tar binary 49 | *.exe binary 50 | *.o binary 51 | -------------------------------------------------------------------------------- /genozip.code-workspace: -------------------------------------------------------------------------------- 1 | { 2 | "folders": [ 3 | { 4 | "path": "." 5 | }, 6 | { 7 | "path": "src/secure" 8 | }, 9 | { 10 | "path": "../genozip/private" 11 | }, 12 | { 13 | "path": "../genozip-feedstock" 14 | } 15 | ], 16 | "settings": { 17 | "terminal.integrated.profiles.windows": { 18 | "msys64": { 19 | "path": "C:\\msys64\\usr\\bin\\bash.exe", 20 | "env": { 21 | "CHERE_INVOKING": "1", 22 | "MSYSTEM": "MINGW64" 23 | } 24 | } 25 | }, 26 | "files.associations": { 27 | "*.vcf": "plaintext", 28 | "*.sam": "plaintext", 29 | "*.fq": "plaintext", 30 | "*.fastq": "plaintext", 31 | "*.fa": "plaintext", 32 | "*.fna": "plaintext", 33 | "*.fasta": "plaintext", 34 | "*.eb": "python", 35 | "*.c": "c", 36 | "*.h": "c", 37 | "*.cpp": "cpp", 38 | "*.bat": "bat", 39 | "*.yaml": "yaml", 40 | "*.sh": "shellscript", 41 | "typeinfo": "c", 42 | "compare": "c" 43 | }, 44 | "makefile.configureOnOpen": false 45 | } 46 | } -------------------------------------------------------------------------------- /easybuild/genozip.eb: -------------------------------------------------------------------------------- 1 | easyblock = 'ConfigureMake' 2 | 3 | name = 'genozip' 4 | 5 | # Set version number here, or use build-eb-genozip-latest.sh to build the latest version 6 | version = '0.0.0' 7 | 8 | # Set your institution name here, for example: 'UniversityOfAdelaide' 9 | local_institution = 'MyInstitutionName' 10 | 11 | skipsteps = ['configure'] 12 | homepage = 'https://genozip.com' 13 | description = """Genozip is a compressor for BAM, FASTQ, VCF and other genomic files. Documentation: genozip.com. Support: support@genozip.com""" 14 | 15 | toolchain = SYSTEM 16 | 17 | sources = [{ 18 | 'filename': 'genozip_%(version)s.tar.gz', 19 | 'git_config': { 20 | 'url': 'https://github.com/divonlan/', 21 | 'repo_name': 'genozip', 22 | 'tag': 'genozip-%(version)s', 23 | }, 24 | }] 25 | 26 | install_cmd = "PREFIX=%(installdir)s DISTRIBUTION=" + local_institution + " make install" 27 | 28 | sanity_check_paths = { 29 | 'files': ['bin/genocat', 'bin/genozip', 'bin/genounzip', 'bin/genols'], 30 | 'dirs': ['bin'], 31 | } 32 | 33 | 34 | -------------------------------------------------------------------------------- /src/filename.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // filename.h 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "file_types.h" 12 | #include "flags.h" 13 | 14 | extern rom filename_z_by_flag (void); 15 | extern rom filename_z_normal (rom txt_filename, DataType dt, FileType txt_ft); 16 | extern char *filename_z_pair (rom fn1, rom fn2, bool test_only); 17 | extern char *filename_z_deep (rom sam_name); 18 | 19 | extern rom filename_guess_original (ConstFileP file); 20 | 21 | extern bool filename_has_ext (rom filename, rom extension); 22 | extern void filename_remove_codec_ext (char *filename, FileType ft); 23 | extern rom filename_base (rom filename, bool remove_exe, rom default_basename, char *basename, unsigned basename_size); 24 | 25 | extern char *filename_make_unix (char *filename); 26 | -------------------------------------------------------------------------------- /src/mac/Distribution: -------------------------------------------------------------------------------- 1 | 2 | 3 | genozip 4 | 5 | 6 | 7 | 8 | 9 | 10 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | genozip.pkg 28 | 29 | -------------------------------------------------------------------------------- /src/igzip/encode_df.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #if __x86_64__ || __i386__ || _M_X64 || _M_IX86 8 | #ifdef _MSC_VER 9 | # include 10 | #else 11 | # include 12 | #endif 13 | #endif //__x86_64__ || __i386__ || _M_X64 || _M_IX86 14 | 15 | #include "encode_df.h" 16 | #include "bitbuf2.h" 17 | 18 | struct deflate_icf *encode_deflate_icf_base(struct deflate_icf *next_in, 19 | struct deflate_icf *end_in, struct BitBuf2 *bb, 20 | struct hufftables_icf *hufftables) 21 | { 22 | struct huff_code lsym, dsym; 23 | 24 | while (next_in < end_in && !is_full(bb)) { 25 | lsym = hufftables->lit_len_table[next_in->lit_len]; 26 | dsym = hufftables->dist_lit_table[next_in->lit_dist]; 27 | 28 | // insert ll code, dist_code, and extra_bits 29 | write_bits_unsafe(bb, lsym.code_and_extra, lsym.length); 30 | write_bits_unsafe(bb, dsym.code, dsym.length); 31 | write_bits_unsafe(bb, next_in->dist_extra, dsym.extra_bit_count); 32 | flush_bits(bb); 33 | 34 | next_in++; 35 | } 36 | 37 | return next_in; 38 | } 39 | -------------------------------------------------------------------------------- /src/libdeflate_1.19/COPYING: -------------------------------------------------------------------------------- 1 | Copyright 2016 Eric Biggers 2 | 3 | Permission is hereby granted, free of charge, to any person 4 | obtaining a copy of this software and associated documentation files 5 | (the "Software"), to deal in the Software without restriction, 6 | including without limitation the rights to use, copy, modify, merge, 7 | publish, distribute, sublicense, and/or sell copies of the Software, 8 | and to permit persons to whom the Software is furnished to do so, 9 | subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/libdeflate_1.7/COPYING: -------------------------------------------------------------------------------- 1 | Copyright 2016 Eric Biggers 2 | 3 | Permission is hereby granted, free of charge, to any person 4 | obtaining a copy of this software and associated documentation files 5 | (the "Software"), to deal in the Software without restriction, 6 | including without limitation the rights to use, copy, modify, merge, 7 | publish, distribute, sublicense, and/or sell copies of the Software, 8 | and to permit persons to whom the Software is furnished to do so, 9 | subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/mac/uninstall.template.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash --norc 2 | 3 | # ------------------------------------------------------------------ 4 | # uninstall.sh 5 | # Copyright (C) 2020-2025 Genozip Limited where applies 6 | # Please see terms and conditions in the file LICENSE.txt 7 | # 8 | # WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 9 | # and subject to penalties specified in the license. 10 | 11 | # loosely based on https://github.com/KosalaHerath/macos-installer-builder which is licensed under Apache 2.0 license 12 | 13 | if (( $EUID != 0 )); then 14 | echo "Please run as root." 15 | exit 16 | fi 17 | 18 | while true; do 19 | read -p "Do you wish to uninstall genozip [Y/n]?" answer 20 | [[ $answer == "y" || $answer == "Y" || $answer == "" ]] && break 21 | [[ $answer == "n" || $answer == "N" ]] && exit 0 22 | echo "Please answer with 'y' or 'n'" 23 | done 24 | 25 | 26 | (cd /usr/local/bin; rm -f __FILES__) 27 | 28 | pkgutil --forget "org.genozip.__VERSION__" > /dev/null 2>&1 29 | 30 | [ -e "/Library/genozip" ] && rm -rf "/Library/genozip" 31 | 32 | echo Done 33 | exit 0 34 | -------------------------------------------------------------------------------- /src/crypt.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // crypt.h 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "sections.h" 12 | 13 | extern void crypt_set_password (rom new_password); 14 | extern rom crypt_get_password (void); 15 | extern bool has_password (void); 16 | extern bool crypt_prompt_for_password(void); 17 | extern uint32_t crypt_padded_len (uint32_t len); 18 | extern bool crypt_get_encrypted_len (uint32_t *data_encrypted_len /* in/out */, uint32_t *padding_len /* out */); 19 | extern void crypt_do (VBlockP vb, uint8_t *data, uint32_t data_len, VBIType vb_i, SectionType sec_type, bool is_header); 20 | extern void crypt_continue (VBlockP vb, uint8_t *data, uint32_t data_len); 21 | extern void crypt_pad (uint8_t *data, uint32_t data_len, uint32_t padding_len); 22 | extern uint32_t crypt_max_padding_len(void); 23 | 24 | extern rom encryption_name (EncryptionType encryption_type); 25 | -------------------------------------------------------------------------------- /src/progress.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // zip.h 3 | // Copyright (C) 2020-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "genozip.h" 12 | #include "digest.h" 13 | 14 | extern void progress_new_component (rom component_name, rom message, bool test_mode, TimeSpecType *start_time); 15 | extern void progress_update (rom task, uint64_t sofar, uint64_t total, bool done); 16 | extern void progress_finalize_component (rom status); 17 | extern void progress_finalize_component_time (rom status, Digest md5); 18 | extern void progress_finalize_component_time_ratio (rom me, float ratio, Digest md5); 19 | extern void progress_finalize_component_time_ratio_better (rom me, float ratio, rom better_than, float ratio_than, Digest md5); 20 | extern bool progress_has_component (void); 21 | extern void progress_concatenated_md5 (rom me, Digest md5); 22 | extern void progress_erase (void); 23 | extern void progress_newline(void); 24 | -------------------------------------------------------------------------------- /src/sam_blasr.c: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // sam_pacbio.c 3 | // Copyright (C) 2022-2025 Genozip Limited. Patent pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited, 7 | // under penalties specified in the license. 8 | 9 | // Compresses auxilliary fields generated by the BLASR aligner 10 | 11 | #include "sam_private.h" 12 | 13 | // read alignment start position (1 based) 14 | void sam_seg_blasr_FI_i (VBlockSAMP vb, ZipDataLineSAMP dl, int64_t fi, unsigned add_bytes) 15 | { 16 | decl_ctx (OPTION_FI_i); 17 | 18 | // check if prediction is correct (it is often correct, but not always) 19 | if (fi == 1 + vb->soft_clip[dl->FLAG.rev_comp]) 20 | seg_special0 (VB, SAM_SPECIAL_FI, ctx, add_bytes); 21 | 22 | else 23 | seg_integer (VB, ctx, fi, true, add_bytes); 24 | } 25 | 26 | SPECIAL_RECONSTRUCTOR (sam_piz_special_FI) 27 | { 28 | new_value->i = 1 + VB_SAM->soft_clip[last_flags.rev_comp]; 29 | 30 | if (reconstruct) 31 | RECONSTRUCT_INT (new_value->i); 32 | 33 | return HAS_NEW_VALUE; 34 | } 35 | -------------------------------------------------------------------------------- /src/compressor.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // compresssor.h 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "codec.h" 12 | 13 | extern uint32_t comp_compress (VBlockP vb, ContextP ctx, BufferP z_data, SectionHeaderUnionP header, 14 | rom uncompressed_data, // option 1 - compress contiguous data 15 | LocalGetLineCB callback, rom name); // option 2 - compress data one line at a time 16 | 17 | extern bool comp_compress_complex_codec (VBlockP vb, ContextP ctx, SectionHeaderP header, bool is_2nd_try, 18 | uint32_t *uncompressed_len, qSTRp (compressed), rom name); 19 | 20 | extern void comp_uncompress (VBlockP vb, ContextP ctx, Codec codec, Codec sub_codec, uint8_t param, 21 | STRp(compressed_data), BufferP uncompressed_data, uint64_t uncompressed_len, rom name); 22 | 23 | 24 | -------------------------------------------------------------------------------- /src/libdeflate_1.19/gzip_constants.h: -------------------------------------------------------------------------------- 1 | /* 2 | * gzip_constants.h - constants for the gzip wrapper format 3 | */ 4 | 5 | #ifndef LIB_GZIP_CONSTANTS_H 6 | #define LIB_GZIP_CONSTANTS_H 7 | 8 | #define GZIP_MIN_HEADER_SIZE 10 9 | #define GZIP_FOOTER_SIZE 8 10 | #define GZIP_MIN_OVERHEAD (GZIP_MIN_HEADER_SIZE + GZIP_FOOTER_SIZE) 11 | 12 | #define GZIP_ID1 0x1F 13 | #define GZIP_ID2 0x8B 14 | 15 | #define GZIP_CM_DEFLATE 8 16 | 17 | #define GZIP_FTEXT 0x01 18 | #define GZIP_FHCRC 0x02 19 | #define GZIP_FEXTRA 0x04 20 | #define GZIP_FNAME 0x08 21 | #define GZIP_FCOMMENT 0x10 22 | #define GZIP_FRESERVED 0xE0 23 | 24 | #define GZIP_MTIME_UNAVAILABLE 0 25 | 26 | #define GZIP_XFL_SLOWEST_COMPRESSION 0x02 27 | #define GZIP_XFL_FASTEST_COMPRESSION 0x04 28 | 29 | #define GZIP_OS_FAT 0 30 | #define GZIP_OS_AMIGA 1 31 | #define GZIP_OS_VMS 2 32 | #define GZIP_OS_UNIX 3 33 | #define GZIP_OS_VM_CMS 4 34 | #define GZIP_OS_ATARI_TOS 5 35 | #define GZIP_OS_HPFS 6 36 | #define GZIP_OS_MACINTOSH 7 37 | #define GZIP_OS_Z_SYSTEM 8 38 | #define GZIP_OS_CP_M 9 39 | #define GZIP_OS_TOPS_20 10 40 | #define GZIP_OS_NTFS 11 41 | #define GZIP_OS_QDOS 12 42 | #define GZIP_OS_RISCOS 13 43 | #define GZIP_OS_UNKNOWN 255 44 | 45 | #endif /* LIB_GZIP_CONSTANTS_H */ 46 | -------------------------------------------------------------------------------- /src/sam_cpu.c: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // sam_cpu.c 3 | // Copyright (C) 2024-2025 Genozip Limited. Patent pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited, 7 | // under penalties specified in the license. 8 | 9 | // Compresses auxilliary fields generated by tmap (IonXpress) mapper 10 | 11 | #include "sam_private.h" 12 | 13 | static rom xl_prediction (bool line_has_XM) 14 | { 15 | return line_has_XM ? "aln" : "mem"; 16 | } 17 | 18 | void sam_seg_CPU_XL_Z (VBlockSAMP vb, STRp(xl), unsigned add_bytes) 19 | { 20 | decl_ctx (OPTION_XL_Z); 21 | 22 | if (str_issame_(STRa(xl), xl_prediction (has(XM_i)), 3)) 23 | seg_special0 (VB, SAM_SPECIAL_CPU_XL, ctx, add_bytes); 24 | 25 | else 26 | seg_by_ctx (VB, STRa(xl), ctx, add_bytes); 27 | } 28 | 29 | // since 15.0.65 30 | SPECIAL_RECONSTRUCTOR (sam_piz_special_CPU_XL) 31 | { 32 | if (reconstruct) { 33 | rom prediction = xl_prediction (curr_container_has (vb, _OPTION_XM_i)); 34 | RECONSTRUCT (prediction, 3); 35 | } 36 | 37 | return NO_NEW_VALUE; 38 | } 39 | -------------------------------------------------------------------------------- /src/generic.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // generic.h 3 | // Copyright (C) 2020-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "genozip.h" 12 | 13 | #pragma GENDICT_PREFIX GNRIC 14 | #pragma GENDICT GNRIC_DATA=DTYPE_FIELD=DATA 15 | #pragma GENDICT GNRIC_TOPLEVEL=DTYPE_FIELD=TOPLEVEL 16 | 17 | extern int32_t generic_unconsumed (VBlockP vb, uint32_t first_i); 18 | extern int32_t generic_is_header_done (bool is_eof); 19 | extern void generic_seg_initialize (VBlockP vb); 20 | extern rom generic_seg_txt_line (VBlockP vb, rom next_line, uint32_t remaining_txt_len, bool *has_13); 21 | extern rom generic_assseg_line (VBlockP vb); 22 | extern void generic_seg_finalize (VBlockP vb); 23 | extern bool generic_seg_is_small (ConstVBlockP vb, DictId dict_id); 24 | extern StrTextLong generic_get_magic (void); 25 | extern rom generic_get_ext (void); 26 | extern rom fallback_to_generic (VBlockP vb); 27 | 28 | // SPECIALS 29 | SPECIAL (GNRIC, 0, TOPLEVEL, generic_piz_TOPLEVEL); 30 | -------------------------------------------------------------------------------- /src/igzip/encode_df.h: -------------------------------------------------------------------------------- 1 | #ifndef ENCODE_DF_H 2 | #define ENCODE_DF_H 3 | 4 | #include 5 | #include "igzip_lib.h" 6 | #include "huff_codes.h" 7 | 8 | /* Deflate Intermediate Compression Format */ 9 | #define LIT_LEN_BIT_COUNT 10 10 | #define LIT_LEN_MASK ((1 << LIT_LEN_BIT_COUNT) - 1) 11 | #define DIST_LIT_BIT_COUNT 9 12 | #define DIST_LIT_MASK ((1 << DIST_LIT_BIT_COUNT) - 1) 13 | #define ICF_DIST_OFFSET LIT_LEN_BIT_COUNT 14 | #define NULL_DIST_SYM 30 15 | 16 | #define LEN_START ISAL_DEF_LIT_SYMBOLS 17 | #define LEN_OFFSET (LEN_START - ISAL_DEF_MIN_MATCH) 18 | #define LEN_MAX (LEN_OFFSET + ISAL_DEF_MAX_MATCH) 19 | #define LIT_START (NULL_DIST_SYM + 1) 20 | #define ICF_CODE_LEN 32 21 | 22 | struct deflate_icf { 23 | #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 24 | uint32_t lit_len:LIT_LEN_BIT_COUNT; 25 | uint32_t lit_dist:DIST_LIT_BIT_COUNT; 26 | uint32_t dist_extra:ICF_CODE_LEN - DIST_LIT_BIT_COUNT - ICF_DIST_OFFSET; 27 | #else 28 | uint32_t dist_extra:ICF_CODE_LEN - DIST_LIT_BIT_COUNT - ICF_DIST_OFFSET; 29 | uint32_t lit_dist:DIST_LIT_BIT_COUNT; 30 | uint32_t lit_len:LIT_LEN_BIT_COUNT; 31 | #endif 32 | }; 33 | 34 | struct deflate_icf *encode_deflate_icf(struct deflate_icf *next_in, struct deflate_icf *end_in, 35 | struct BitBuf2 *bb, struct hufftables_icf *hufftables); 36 | #endif 37 | -------------------------------------------------------------------------------- /src/igzip/igzip_level_buf_structs.h: -------------------------------------------------------------------------------- 1 | #ifndef IGZIP_LEVEL_BUF_STRUCTS_H 2 | #define IGZIP_LEVEL_BUF_STRUCTS_H 3 | 4 | #include "igzip_lib.h" 5 | #include "huff_codes.h" 6 | #include "encode_df.h" 7 | 8 | #define MATCH_BUF_SIZE (4 * 1024) 9 | 10 | struct hash8k_buf { 11 | uint16_t hash_table[IGZIP_HASH8K_HASH_SIZE]; 12 | }; 13 | 14 | struct hash_hist_buf { 15 | uint16_t hash_table[IGZIP_HASH_HIST_SIZE]; 16 | }; 17 | 18 | struct hash_map_buf { 19 | uint16_t hash_table[IGZIP_HASH_MAP_HASH_SIZE]; 20 | struct deflate_icf *matches_next; 21 | struct deflate_icf *matches_end; 22 | struct deflate_icf matches[MATCH_BUF_SIZE]; 23 | struct deflate_icf overflow[ISAL_LOOK_AHEAD]; 24 | }; 25 | 26 | #define MAX_LVL_BUF_SIZE sizeof(struct hash_map_buf) 27 | 28 | struct level_buf { 29 | struct hufftables_icf encode_tables; 30 | struct isal_mod_hist hist; 31 | uint32_t deflate_hdr_count; 32 | uint32_t deflate_hdr_extra_bits; 33 | uint8_t deflate_hdr[ISAL_DEF_MAX_HDR_SIZE]; 34 | struct deflate_icf *icf_buf_next; 35 | uint64_t icf_buf_avail_out; 36 | struct deflate_icf *icf_buf_start; 37 | union { 38 | struct hash8k_buf hash8k; 39 | struct hash_hist_buf hash_hist; 40 | struct hash_map_buf hash_map; 41 | 42 | struct hash8k_buf lvl1; 43 | struct hash_hist_buf lvl2; 44 | struct hash_map_buf lvl3; 45 | }; 46 | }; 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /src/license.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // license.h 3 | // Copyright (C) 2020-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "sections.h" 12 | 13 | extern void license_activate (void); 14 | extern bool license_is_activated (void); 15 | extern void license_set_filename (rom filename); 16 | extern void license_load (void); 17 | extern StrText license_get_number (void); 18 | extern rom lic_type_name (int8_t lic_type); 19 | extern rom license_get_one_line (void); 20 | extern void license_display (bool show_latest); 21 | extern bool license_allow_tip (void); 22 | extern bool license_allow_distribution (void); 23 | extern void license_eval_notice (void); 24 | extern bool license_is_eval (void); 25 | extern bool license_is_standard (void); 26 | extern bool license_is_enterprise (void); 27 | extern void license_prepare (rom arg); 28 | extern bool license_piz_prepare_genozip_header (SectionHeaderGenozipHeaderP header, FailType fail_type); 29 | extern StrTextLong license_academic_tip (void); 30 | extern bool am_i_submit (void); -------------------------------------------------------------------------------- /src/fasta_private.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // fasta_private.h 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited, 7 | // under penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "vblock.h" 12 | #include "contigs.h" 13 | 14 | typedef struct VBlockFASTA { 15 | VBLOCK_COMMON_FIELDS 16 | 17 | bool contig_grepped_out; 18 | // note: last_line is initialized to FASTA_LINE_SEQ (=0) so that a ; line as the first line of the VB is interpreted as a description, not a comment 19 | enum { FASTA_LINE_SEQ, FASTA_LINE_DESC, FASTA_LINE_COMMENT } last_line; // ZIP & PIZ (FASTA only, not REF) 20 | 21 | bool vb_has_no_newline; // ZIP: VB ends contains part of a sequence, not ending in a newline, which is continued in the next VB 22 | bool ra_initialized; // ZIP: RA was initialized for this VB 23 | uint32_t lines_this_contig; // ZIP 24 | 25 | // --make-reference 26 | bool has_contig_metadata; // used by make-reference 27 | ContigMetadata contig_metadata; 28 | } VBlockFASTA, *VBlockFASTAP; 29 | 30 | #define VB_FASTA ((VBlockFASTAP)vb) 31 | -------------------------------------------------------------------------------- /src/user_message.c: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // user_message.c 3 | // Copyright (C) 2023-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #include "file.h" 10 | #include "user_message.h" 11 | #include "zfile.h" 12 | #include "license.h" 13 | 14 | static Buffer user_message = {}; 15 | 16 | void user_message_init (rom filename) 17 | { 18 | file_get_file (evb, filename, &user_message, "user_message", 0, VERIFY_UTF8, false); 19 | } 20 | 21 | bool has_user_msg (void) 22 | { 23 | return user_message.len > 0; 24 | } 25 | 26 | // ZIP main thread: write to global section 27 | void user_message_compress (void) 28 | { 29 | if (user_message.len) 30 | zfile_compress_section_data (evb, SEC_USER_MESSAGE, &user_message); 31 | } 32 | 33 | // PIZ main thread 34 | void user_message_display (void) 35 | { 36 | if (flag.quiet || flag.test) return; 37 | 38 | Section sec = sections_first_sec (SEC_USER_MESSAGE, SOFT_FAIL); 39 | if (!sec) return; // no user message in this file 40 | 41 | zfile_get_global_section (SectionHeader, sec, &user_message, "user_message"); 42 | 43 | iprintf ("%.*s", STRfb(user_message)); 44 | } 45 | -------------------------------------------------------------------------------- /src/stats.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // stats.h 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "genozip.h" 12 | #include "buffer.h" 13 | 14 | typedef struct { 15 | Did my_did_i, st_did_i; 16 | bool is_dict_alias; 17 | int64_t txt_len, z_size; 18 | char name[100]; 19 | rom type; 20 | StrText did_i, words, dict_words, local_words, failed_ston_words, hash, uncomp_dict, comp_dict, comp_b250, uncomp_local, comp_local; 21 | float pc_of_txt, pc_of_z, pc_hash_occupancy, pc_ston_hash_occup; 22 | rom bcodec, lcodec, dcodec; 23 | } StatsByLine; 24 | 25 | extern void stats_generate (void); 26 | extern void stats_read_and_display (void); 27 | extern void stats_add_txt_name (rom fn); 28 | extern void stats_finalize (void); 29 | 30 | extern void stats_add_one_program (STRp(prog_name)); 31 | extern void stats_remove_data_after_program_name (rom program); 32 | extern rom stats_find_in_programs (rom signature); 33 | static inline bool stats_is_in_programs (rom signature) { return (bool)stats_find_in_programs (signature); } 34 | 35 | extern Buffer stats_programs; 36 | -------------------------------------------------------------------------------- /src/vcf_cosmic.c: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // vcf_cosmic.c 3 | // Copyright (C) 2022-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #include "vcf_private.h" 10 | 11 | void vcf_cosmic_seg_initialize (VBlockVCFP vb) 12 | { 13 | } 14 | 15 | // eg: "COSN8882887", "COSN30315184" 16 | void vcf_seg_INFO_LEGACY_ID (VBlockVCFP vb, ContextP ctx, STRp(lid)) 17 | { 18 | seg_id_field (VB, ctx, STRa(lid), false, lid_len); 19 | } 20 | 21 | void vcf_seg_INFO_SO_TERM (VBlockVCFP vb, ContextP ctx, STRp(st)) 22 | { 23 | if ((vb->REF_len == 1 && vb->ALT_len == 1 && str_issame_(st, st_len, "SNV" , 3)) || 24 | (vb->REF_len == 1 && vb->ALT_len > 1 && str_issame_(st, st_len, "insertion", 9)) || 25 | (vb->REF_len > 1 && vb->ALT_len == 1 && str_issame_(st, st_len, "deletion" , 8))) 26 | seg_special0 (VB, VCF_SPECIAL_SO_TERM, ctx, st_len); 27 | 28 | else 29 | seg_by_ctx (VB, STRa(st), ctx, st_len); 30 | } 31 | 32 | SPECIAL_RECONSTRUCTOR (vcf_piz_special_SO_TERM) 33 | { 34 | if (VT0(SNP)) RECONSTRUCT ("SNV" , 3); 35 | else if (VT0(INS)) RECONSTRUCT ("insertion", 9); 36 | else RECONSTRUCT ("deletion" , 8); 37 | 38 | return NO_NEW_VALUE; 39 | } 40 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "C_Cpp.dimInactiveRegions": false, 3 | "restructuredtext.confPath": "${workspaceFolder}\\docs", 4 | 5 | "terminal.integrated.profiles.windows": { 6 | "msys64": { 7 | "path": "C:\\msys64\\usr\\bin\\bash.exe", 8 | "env": { 9 | "CHERE_INVOKING" : "1" , 10 | "MSYSTEM": "MINGW64", 11 | }, 12 | } 13 | }, 14 | "restructuredtext.languageServer.disabled": true, 15 | 16 | "files.eol": "\n", 17 | 18 | "files.associations": { 19 | "*.vcf": "plaintext", 20 | "*.eb": "python", 21 | "genozip.h": "c", 22 | "strings.h": "c", 23 | "type_traits": "c", 24 | "typeinfo": "c", 25 | "piz.h": "c", 26 | "sam_private.h": "c", 27 | "errno.h": "c", 28 | "*.data": "c", 29 | "buffer.h": "c", 30 | "profiler.h": "c", 31 | "stats.h": "c", 32 | "file.h": "c", 33 | "*.tcc": "c", 34 | "optional": "c", 35 | "system_error": "c", 36 | "array": "c", 37 | "functional": "c", 38 | "tuple": "c", 39 | "utility": "c", 40 | "endianness.h": "c", 41 | "version.h": "c", 42 | "libgen.h": "c", 43 | "compare": "c", 44 | "progress.h": "c", 45 | "pthread.h": "c", 46 | "numbers": "c", 47 | "license.h": "c" 48 | }, 49 | "cmake.sourceDirectory": "C:/Users/divon/genozip/src/onion", 50 | "cmake.configureOnOpen": false 51 | } 52 | -------------------------------------------------------------------------------- /src/regions.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // regions.h 3 | // Copyright (C) 2020-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "genozip.h" 12 | 13 | extern void regions_add (rom reg_str); 14 | extern void regions_add_by_file (rom regions_filename); 15 | extern void regions_make_chregs (ContextP chrom_ctx); 16 | extern void regions_transform_negative_to_positive_complement(void); 17 | extern bool regions_get_ra_intersection (WordIndex chrom_node_index, PosType64 min_pos, PosType64 max_pos); 18 | 19 | extern unsigned regions_get_num_range_intersections (WordIndex chrom_word_index); 20 | extern bool regions_get_range_intersection (WordIndex chrom_word_index, PosType64 min_pos, PosType64 max_pos, unsigned intersect_i, PosType64 *intersect_min_pos, PosType64 *intersect_max_pos, bool *revcomp); 21 | 22 | extern unsigned regions_max_num_chregs(void); 23 | extern void regions_display(rom title); 24 | extern bool regions_is_site_included (VBlockP vb); 25 | extern bool regions_is_range_included (WordIndex chrom, PosType64 start_pos, PosType64 end_pos, bool completely_included); 26 | #define regions_is_ra_included(ra) regions_is_range_included(ra->chrom_index, ra->min_pos, ra->max_pos, false) 27 | 28 | extern void regions_destroy (void); -------------------------------------------------------------------------------- /src/sam_bowtie2.c: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // sam_bowtie2.c 3 | // Copyright (C) 2022-2025 Genozip Limited. Patent pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited, 7 | // under penalties specified in the license. 8 | 9 | #include "sam_private.h" 10 | #include "chrom.h" 11 | 12 | // ---------------------------------------------------------------------------------------------- 13 | // YS:i mate alignment score (bowtie2 and hisat2) 14 | // ---------------------------------------------------------------------------------------------- 15 | 16 | void sam_seg_bowtie2_YS_i (VBlockSAMP vb, ZipDataLineSAMP dl, ValueType YS, unsigned add_bytes) 17 | { 18 | ASSERT (YS.i >= MIN_AS_i && YS.i <= MAX_AS_i, "%s: YS=%"PRId64" is ∉ [%d,%d]", LN_NAME, YS.i, MIN_AS_i, MAX_AS_i); 19 | 20 | ctx_set_last_value (VB, CTX (OPTION_YS_i), YS); 21 | dl->YS = YS.i; 22 | 23 | ZipDataLineSAMP mate_dl = DATA_LINE (vb->mate_line_i); // an invalid pointer if mate_line_i is -1 24 | 25 | ContextP channel_ctx = seg_mux_get_channel_ctx (VB, OPTION_YS_i, (MultiplexerP)&vb->mux_YS, sam_has_mate); 26 | 27 | if (sam_has_mate && mate_dl->AS == YS.i) 28 | seg_by_ctx (VB, STRa(copy_mate_AS_snip), channel_ctx, add_bytes); 29 | 30 | else 31 | seg_integer_as_snip_do (VB, channel_ctx, YS.i, add_bytes); 32 | 33 | seg_by_did (VB, STRa(vb->mux_YS.snip), OPTION_YS_i, 0); // de-multiplexer 34 | } 35 | -------------------------------------------------------------------------------- /src/locs.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // locs.h 3 | // Copyright (C) 2021-2025 Genozip Limited. Patent pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited, 7 | // under penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "genozip.h" 12 | 13 | #pragma GENDICT_PREFIX LOCS 14 | #pragma GENDICT LOCS_X=DTYPE_FIELD=X 15 | #pragma GENDICT LOCS_Y=DTYPE_FIELD=Y 16 | #pragma GENDICT LOCS_TOPLEVEL=DTYPE_FIELD=TOPLEVEL 17 | #pragma GENDICT LOCS_DEBUG_LINES=DTYPE_FIELD=DBGLINES // used by --debug-lines 18 | 19 | extern int32_t locs_is_header_done (bool is_eof); 20 | extern int32_t locs_unconsumed (VBlockP vb, uint32_t first_i); 21 | extern bool locs_seg_is_small (ConstVBlockP vb, DictId dict_id); 22 | extern void locs_seg_initialize (VBlockP vb); 23 | extern void locs_seg_finalize (VBlockP vb); 24 | extern rom locs_seg_txt_line (VBlockP vb, rom field_start_line, uint32_t remaining_txt_len, bool *has_special_eol); 25 | 26 | // SPECIALs 27 | SPECIAL (LOCS, 0, DELTA_FLOAT, locs_piz_special_DELTA_FLOAT); 28 | 29 | // Important: Numbers (and order) of translators cannot be changed, as they are part of the file format 30 | // (they are included in the TOPLEVEL container) 31 | // translator numbers must start from 1 - 0 is reserved for "none" 32 | TRANSLATOR (LOCS, LOCS, 1, LTEN_F32, container_translate_LTEN_F32) 33 | #define NUM_LOCS_TRANS 2 // including "none" 34 | #define LOCS_TRANSLATORS { NULL /* none */, container_translate_LTEN_F32 } 35 | -------------------------------------------------------------------------------- /src/vcf_1000G.c: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // vcf_1000G.c : 1000 Genome Project fields 3 | // Copyright (C) 2024-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #include "vcf_private.h" 10 | 11 | sSTRl(half_AN_snip, 32); 12 | 13 | void vcf_1000G_zip_initialize (void) 14 | { 15 | DO_ONCE { 16 | seg_prepare_snip_special_other_int (VCF_SPECIAL_DIVIDE_BY, _INFO_AN, half_AN_snip, 2); 17 | } 18 | } 19 | 20 | void vcf_1000G_seg_initialize (VBlockVCFP vb) 21 | { 22 | ctx_set_same_line (VB, INFO_MAF, INFO_NS, DID_EOL); 23 | } 24 | 25 | void vcf_seg_INFO_MAF (VBlockVCFP vb, ContextP ctx, STRp(maf)) 26 | { 27 | if (has(AF) && str_issame_(STRa(maf), STRa(BII(AF)->value))) 28 | seg_by_ctx (VB, STRa(copy_INFO_AF_snip), ctx, maf_len); 29 | 30 | else 31 | seg_by_ctx (VB, STRa(maf), ctx, maf_len); 32 | } 33 | 34 | void vcf_seg_INFO_NS (VBlockVCFP vb, ContextP ctx, STRp(ns_str)) 35 | { 36 | // TO DO: a better method would be to postpone NS to after samples and actually count samples with data 37 | int64_t an, ns; 38 | 39 | if (has(AN) && str_get_int (STRa(BII(AN)->value), &an) && 40 | str_get_int (STRa(ns_str), &ns) && ns == an / 2) 41 | 42 | seg_by_ctx (VB, STRa(half_AN_snip), ctx, ns_str_len); 43 | 44 | else 45 | seg_integer_or_not (VB, ctx, STRa(ns_str), ns_str_len); 46 | } 47 | -------------------------------------------------------------------------------- /src/writer.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // writer.h 3 | // Copyright (C) 2021-2025 Genozip Limited. Patent pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited, 7 | // under penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "genozip.h" 12 | 13 | extern void writer_z_initialize (void); 14 | extern void writer_create_plan (CompIType comp_i); 15 | 16 | // Writer thread 17 | extern bool writer_handover_data (VBlockP *vb_p); 18 | extern bool writer_handover_txtheader (VBlockP *txt_header_vb_p); 19 | extern void writer_finish_writing (bool is_last_txt_file); 20 | 21 | // BGZF threads 22 | extern uint32_t writer_get_max_bgzf_threads (void); 23 | 24 | // VBlock and Txt Header properties 25 | extern bool writer_am_i_pair_2 (VBIType vb_i, VBIType *pair_1_vb_i); 26 | extern bool writer_does_txtheader_need_write (CompIType comp_i); 27 | extern bool writer_does_txtheader_need_recon (CompIType comp_i); 28 | extern bool writer_does_vb_need_recon (VBIType vb_i); 29 | extern bool writer_does_vb_need_write (VBIType vb_i); 30 | extern BitsP writer_get_is_dropped (VBIType vb_i); 31 | extern bool writer_get_fasta_contig_grepped_out (VBIType vb_i); 32 | extern void writer_set_fasta_contig_grepped_out (VBIType vb_i); 33 | extern int64_t writer_get_txt_line_i (VBlockP vb, LineIType line_in_vb); 34 | extern void writer_set_num_txtheader_lines (CompIType comp_i, uint32_t num_txtheader_lines); 35 | 36 | extern VBlockP wvb; 37 | extern rom recon_plan_flavors[8]; 38 | -------------------------------------------------------------------------------- /src/vcf_gwas.c: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // vcf_gwas.c 3 | // Copyright (C) 2022-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #include "vcf_private.h" 10 | 11 | // --------------- 12 | // ZIP 13 | // --------------- 14 | 15 | void vcf_gwas_zip_initialize (void) 16 | { 17 | } 18 | 19 | void vcf_gwas_seg_initialize (VBlockVCFP vb) 20 | { 21 | // even though sparse, ES, ES, LP and AF are rarely unique - we're slightly better off without singletons 22 | ctx_set_no_stons (VB, INFO_AF, FORMAT_ES, FORMAT_SE, FORMAT_LP, DID_EOL); 23 | } 24 | 25 | //-------------------------------------------------------------------------------------------------------------- 26 | // VCF-GWAS: 27 | // example: rs1885866 28 | //-------------------------------------------------------------------------------------------------------------- 29 | void vcf_gwas_seg_FORMAT_ID (VBlockVCFP vb, ContextP ctx, STRp(id)) 30 | { 31 | if (ctx_encountered_in_line (VB, VCF_ID) && 32 | CTX(VCF_ID)->last_txt.index != INVALID_LAST_TXT_INDEX && 33 | str_issame_(STRa(id), STRtxt(CTX(VCF_ID)->last_txt))) 34 | 35 | seg_by_ctx (VB, STRa(copy_VCF_ID_snip), ctx, id_len); 36 | 37 | else 38 | seg_id_field (VB, ctx, STRa(id), false, id_len); 39 | } 40 | 41 | // --------------- 42 | // PIZ 43 | // --------------- 44 | -------------------------------------------------------------------------------- /src/url.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // url.h 3 | // Copyright (C) 2020-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include 12 | #include "genozip.h" 13 | #include "stream.h" 14 | #include "buffer.h" 15 | 16 | // remote file stream 17 | extern FILE *url_open_remote_file (StreamP parent_stream, rom url); 18 | extern void url_reset_if_remote_file_stream (StreamP maybe_remote_file_stream); 19 | extern void url_close_remote_file_stream (FILE **copy_of_input_pipe); 20 | extern void url_disconnect_from_remote_file_stream (FILE **copy_of_input_pipe); 21 | 22 | // access URLs 23 | extern rom url_get_status (rom url, thool *is_file_exists, int64_t *file_size); 24 | extern int32_t url_read_string (rom url, STRc(STRc), bool blocking, bool follow_redirects, rom show_errors); 25 | extern bool url_get_redirect (rom url, STRc(redirect_url), StreamP *redirect_stream); 26 | 27 | // string operations 28 | extern bool url_is_url (rom filename); 29 | 30 | extern char *url_esc_non_valid_chars_(rom in, char *out, bool esc_all_or_none); 31 | static inline char *url_esc_non_valid_chars (rom in) { return url_esc_non_valid_chars_ (in, NULL, false); } // on heap 32 | static inline char *url_esc_all_or_none (rom in) { return url_esc_non_valid_chars_ (in, NULL, true ); } // on heap 33 | 34 | extern StrTextLong url_esc_non_valid_charsS (rom in); // for short strings - on stack 35 | -------------------------------------------------------------------------------- /src/codec_none.c: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // codec_none.c 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #include "compressor.h" 10 | #include "vblock.h" 11 | #include "buffer.h" 12 | 13 | COMPRESS (codec_none_compress) 14 | { 15 | if (*compressed_len < *uncompressed_len && soft_fail) 16 | return false; 17 | 18 | ASSERT (*compressed_len >= *uncompressed_len, "\"%s\": expecting compressed_len=%u >= uncompressed_len=%u for ", name, *compressed_len, *uncompressed_len); 19 | 20 | if (get_line_cb) { 21 | char *next = compressed; 22 | for (uint32_t line_i=0; line_i < vb->lines.len32; line_i++) { 23 | char *start1=0; 24 | uint32_t len1=0; 25 | 26 | get_line_cb (vb, ctx, line_i, &start1, &len1, *uncompressed_len - (next - compressed), NULL); 27 | 28 | if (start1 && len1) { memcpy (next, start1, len1); next += len1; } 29 | } 30 | } 31 | else 32 | memcpy (compressed, uncompressed, *uncompressed_len); 33 | 34 | *compressed_len = *uncompressed_len; 35 | 36 | return true; 37 | } 38 | 39 | UNCOMPRESS (codec_none_uncompress) 40 | { 41 | memcpy (uncompressed_buf->data, compressed, compressed_len); 42 | } 43 | 44 | uint32_t codec_none_est_size (Codec codec, uint64_t uncompressed_len) 45 | { 46 | return (uint32_t)uncompressed_len; 47 | } 48 | -------------------------------------------------------------------------------- /src/lookback.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // lookback.h 3 | // Copyright (C) 2021-2025 Genozip Limited. Patent pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited, 7 | // under penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "sections.h" 12 | 13 | #define STORE_LAST_TXT STORE_NONE 14 | extern void lookback_init (VBlockP vb, ContextP lb_ctx, ContextP ctx, StoreType store_type); 15 | 16 | extern void lookback_insert (VBlockP vb, Did lb_did_i, Did did_i, bool copy_last_value, ValueType value); 17 | extern void lookback_insert_container (VBlockP vb, ConstContainerP con, unsigned num_items, ContextP *item_ctxs); 18 | 19 | extern const void *lookback_get_do (VBlockP vb, ContextP lb_ctx, ContextP ctx, uint32_t lookback); 20 | #define lookback_get_index(vb, lb_ctx, ctx, lookback) (*(WordIndex *)lookback_get_do ((VBlockP)vb, (lb_ctx), (ctx), lookback)) 21 | #define lookback_get_value(vb, lb_ctx, ctx, lookback) (*(ValueType *)lookback_get_do ((VBlockP)vb, (lb_ctx), (ctx), lookback)) 22 | 23 | extern bool lookback_is_same_txt (VBlockP vb, Did lb_did_i, ContextP ctx, uint32_t lookback, STRp(str)); 24 | 25 | extern uint32_t lookback_get_next (VBlockP vb, ContextP lb_ctx, ContextP ctx, WordIndex search_for, int64_t *iterator); 26 | 27 | extern void lookback_flush (VBlockP vb, ConstMediumContainerP con); 28 | 29 | extern uint8_t lookback_size_to_local_param (uint32_t size); 30 | 31 | extern void lookback_shift_txt_index (VBlockP vb, ContextP lb_ctx, ContextP ctx, STRp (insert)); 32 | -------------------------------------------------------------------------------- /src/refhash.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // refhash.h 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "genozip.h" 12 | #include "reference.h" 13 | 14 | #define HOOK 'G' 15 | #define HOOK_REV 'C' // complement of HOOK 16 | 17 | #define MAX_ALIGNER_GPOS ((PosType64)0xfffffffe) 18 | #define NO_GPOS ((PosType64)0xffffffff) 19 | #define NO_GPOS64 ((PosType64)-1) 20 | 21 | extern unsigned num_layers; 22 | extern bool bits_per_hash_is_odd; // true bits_per_hash is odd 23 | extern uint32_t nukes_per_hash; // = layer_bits[0] / 2 24 | extern uint32_t layer_bitmask[64]; // 1s in the layer_bits[] LSbs 25 | extern uint32_t **refhashs; 26 | extern Buffer refhash_buf; 27 | 28 | // make-reference stuff 29 | extern void refhash_initialize_for_make (void); 30 | extern void refhash_compress_refhash (void); 31 | extern void refhash_calc_one_range (VBlockP vb, ConstRangeP r, ConstRangeP next_r); 32 | extern Digest refhash_get_digest (void); 33 | 34 | // stuff for loading and using refhash when ZIPping a fastq or sam/bam file 35 | extern uint64_t refhash_get_refhash_size (void); 36 | extern void refhash_load (void); 37 | extern void refhash_load_standalone (void); 38 | extern void refhash_destroy (void); 39 | extern void refhash_set_digest (Digest digest); 40 | extern void refhash_update_layers (int64_t delta_bytes); 41 | extern ConstBitsP refhash_get_emoneg (void); 42 | -------------------------------------------------------------------------------- /src/txtfile.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // txtfile.h 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "genozip.h" 12 | #include "digest.h" 13 | 14 | #define TXTFILE_READ_VB_PADDING 16 // we need this quantity of unused bytes at the end of vb.txt_data 15 | 16 | extern uint32_t txtfile_fread (FileP file, FILE *fp, void *addr, int32_t size, int64_t *disk_so_far); 17 | extern void txtfile_fwrite (const void *data, uint32_t size); 18 | extern void txtfile_query_first_bytes_in_file (rom filename, uint32_t len); 19 | extern void txtfile_initialize_igzip (FileP file); 20 | extern StrText txtfile_dump_vb (VBlockP vb, rom base_name, BufferP txt_data); 21 | extern StrTextLong txtfile_codec_name (FileP z_file, CompIType comp_i, bool obscure_fname); 22 | extern void txtfile_zip_finalize_codecs (void); 23 | extern void txtfile_read_header (bool is_first_txt); 24 | extern uint32_t txt_data_alloc_size (uint32_t vb_size) ; 25 | extern void txtfile_read_vblock (VBlockP vb); 26 | extern bool txtfile_is_gzip (FileP file); 27 | extern void txtfile_discover_specific_gz (FileP file); 28 | extern rom isal_error (int ret); 29 | 30 | // callbacks 31 | extern int32_t def_unconsumed (VBlockP vb, uint32_t first_i); 32 | extern int32_t def_is_header_done (bool is_eof); 33 | 34 | extern DataType txtfile_zip_get_file_dt (rom filename); 35 | 36 | // misc 37 | extern StrTextLong display_gz_header (STR8p(h), bool obscure_fname); -------------------------------------------------------------------------------- /src/sam_dragen.c: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // sam_dragen.c 3 | // Copyright (C) 2023-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited, 7 | // under penalties specified in the license. 8 | 9 | #include "sam_private.h" 10 | 11 | void sam_dragen_seg_initialize (VBlockSAMP vb) 12 | { 13 | seg_mux_init (vb, OPTION_sd_f, SAM_SPECIAL_sd, false, dragen_sd); 14 | } 15 | 16 | static int sd_channel_i (int seq_len, int as) 17 | { 18 | return (seq_len == as); 19 | } 20 | 21 | void sam_dragen_seg_sd_f (VBlockSAMP vb, ZipDataLineSAMP dl, STRp(sd), ValueType numeric, unsigned add_bytes) 22 | { 23 | if (dl->AS) { 24 | int channel_i = sd_channel_i (dl->SEQ.len, dl->AS); 25 | ContextP channel_ctx = seg_mux_get_channel_ctx (VB, OPTION_sd_f, (MultiplexerP)&vb->mux_dragen_sd, channel_i); 26 | 27 | sam_seg_float_as_snip (vb, channel_ctx, STRa(sd), numeric, add_bytes); 28 | 29 | seg_by_did (VB, STRa(vb->mux_dragen_sd.snip), OPTION_sd_f, 0); // de-multiplexer 30 | } 31 | 32 | // no AS in line (not expected in Dragen) or AS=0 (not expected either) 33 | else 34 | sam_seg_float_as_snip (vb, CTX(OPTION_sd_f), STRa(sd), numeric, add_bytes); 35 | } 36 | 37 | SPECIAL_RECONSTRUCTOR (sam_piz_special_sd) 38 | { 39 | ContextP as_ctx; 40 | ASSPIZ0 (ctx_has_value_in_line (vb, _OPTION_AS_i, &as_ctx), "AS:i was not reconstructed for this line"); 41 | 42 | int channel_i = sd_channel_i (vb->seq_len, as_ctx->last_value.i); 43 | 44 | return reconstruct_demultiplex (vb, ctx, STRa(snip), channel_i, new_value, reconstruct); 45 | } 46 | -------------------------------------------------------------------------------- /src/vcf_vagrent.c: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // vcf_vagrent.c 3 | // Copyright (C) 2022-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #include "vcf_private.h" 10 | 11 | sSTRl(copy_VD_snip, 30); 12 | 13 | void vcf_vagrent_zip_initialize (void) 14 | { 15 | DO_ONCE { 16 | seg_prepare_snip_other (SNIP_COPY, _INFO_VD, false, 0, copy_VD_snip); 17 | } 18 | } 19 | 20 | static void vcf_seg_VD_VW (VBlockVCFP vb, ContextP ctx, STRp(value)) 21 | { 22 | // examples of values: 23 | // ENSG00000197049|ENST00000358533|downstream 24 | // SAMD11|CCDS2.2|intronic 25 | // ISG15|CCDS6.1|r.44_45insa|c.?|p.? 26 | // CDK11B|CCDS44042.1|r.452_453insaaagaa|c.372_373insAAAGAA|p.E126_R127insKE 27 | 28 | // TO DO: improve this 29 | seg_array (VB, ctx, ctx->did_i, STRa(value), '|', 0, false, STORE_NONE, _INFO_VDVW_ARR, value_len); 30 | } 31 | 32 | // ##INFO= 33 | void vcf_seg_INFO_VD (VBlockVCFP vb, ContextP ctx, STRp(vd)) 34 | { 35 | vcf_seg_VD_VW (vb, ctx, STRa(vd)); 36 | 37 | set_last_txt (INFO_VD, vd); 38 | } 39 | 40 | // ##INFO= 41 | void vcf_seg_INFO_VW (VBlockVCFP vb, ContextP ctx, STRp(vw)) 42 | { 43 | if (ctx_encountered_in_line (VB, INFO_VD) && str_issame_(STRa(vw), STRlst(INFO_VD))) 44 | seg_by_ctx (VB, STRa(copy_VD_snip), ctx, vw_len); 45 | 46 | else 47 | vcf_seg_VD_VW (vb, ctx, STRa(vw)); 48 | } 49 | -------------------------------------------------------------------------------- /src/me23.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // me23.h 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | 10 | #pragma once 11 | 12 | #include "genozip.h" 13 | 14 | #pragma GENDICT_PREFIX ME23 15 | #pragma GENDICT ME23_CHROM=DTYPE_FIELD=CHROM 16 | #pragma GENDICT ME23_POS=DTYPE_FIELD=POS 17 | #pragma GENDICT ME23_ID=DTYPE_FIELD=ID 18 | #pragma GENDICT ME23_GENOTYPE=DTYPE_FIELD=GENOTYPE 19 | #pragma GENDICT ME23_EOL=DTYPE_FIELD=EOL 20 | #pragma GENDICT ME23_TOPLEVEL=DTYPE_FIELD=TOPLEVEL 21 | #pragma GENDICT ME23_TOP2VCF=DTYPE_FIELD=TOP2VCF 22 | #pragma GENDICT ME23_DEBUG_LINES=DTYPE_FIELD=DBGLINES // used by --debug-lines 23 | 24 | extern bool is_me23 (STRp(header), bool *need_more); 25 | extern rom me23_seg_txt_line (VBlockP vb_, rom field_start_line, uint32_t remaining_txt_len, bool *has_special_eol); 26 | extern void me23_seg_initialize (VBlockP vb); 27 | extern void me23_seg_finalize (VBlockP vb); 28 | extern bool me23_seg_is_small (ConstVBlockP vb, DictId dict_id); 29 | extern bool me23_header_inspect (VBlockP txt_header_vb, BufferP txt_header, struct FlagsTxtHeader txt_header_flags); 30 | 31 | // translator numbers must start from 1 - 0 is reserved for "none" 32 | TRANSLATOR (ME23, VCF, 1, GENOTYPE, sam_piz_m232vcf_GENOTYPE) // reconstruct VCF GENOTYPE field as VCF - REF,ALT,QUAL,FILTER,INFO,FORMAT,Sample 33 | #define NUM_ME23_TRANS 2 // including "none" 34 | #define ME23_TRANSLATORS { NULL /* none */, sam_piz_m232vcf_GENOTYPE } 35 | 36 | TXTHEADER_TRANSLATOR (txtheader_me232vcf); 37 | -------------------------------------------------------------------------------- /src/mac_compat.c: -------------------------------------------------------------------------------- 1 | #if defined __APPLE__ 2 | 3 | #include "mac_compat.h" 4 | #include 5 | 6 | #ifdef mach_time_h 7 | 8 | #define MT_NANO (+1.0E-9) 9 | #define MT_GIGA UINT64_C(1000000000) 10 | 11 | // TODO create a list of timers, 12 | static double mt_timebase = 0.0; 13 | static uint64_t mt_timestart = 0; 14 | 15 | // the clock_gettime function: 16 | // Source: https://gist.github.com/alfwatt/3588c5aa1f7a1ef7a3bb 17 | // Copyright (c) 2015-2018 Alf Watt - Open Source - https://opensource.org/licenses/MIT 18 | int clock_gettime(clockid_t clk_id, struct timespec *tp) 19 | { 20 | kern_return_t retval = KERN_SUCCESS; 21 | if( clk_id == TIMER_ABSTIME) { 22 | if (!mt_timestart) { // only one timer, initilized on the first call to the TIMER 23 | mach_timebase_info_data_t tb = { 0 }; 24 | mach_timebase_info(&tb); 25 | mt_timebase = tb.numer; 26 | mt_timebase /= tb.denom; 27 | mt_timestart = mach_absolute_time(); 28 | } 29 | 30 | double diff = (mach_absolute_time() - mt_timestart) * mt_timebase; 31 | tp->tv_sec = diff * MT_NANO; 32 | tp->tv_nsec = diff - (tp->tv_sec * MT_GIGA); 33 | } 34 | else { // other clk_ids are mapped to the coresponding mach clock_service 35 | clock_serv_t cclock; 36 | mach_timespec_t mts; 37 | 38 | host_get_clock_service(mach_host_self(), clk_id, &cclock); 39 | retval = clock_get_time(cclock, &mts); 40 | mach_port_deallocate(mach_task_self(), cclock); 41 | 42 | tp->tv_sec = mts.tv_sec; 43 | tp->tv_nsec = mts.tv_nsec; 44 | } 45 | 46 | return retval; 47 | } 48 | 49 | #endif 50 | 51 | #endif 52 | 53 | /* Copyright (c) 2015-2018 Alf Watt - Open Source - https://opensource.org/licenses/MIT */ 54 | -------------------------------------------------------------------------------- /src/vcf_vblock.c: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // vcf_vblock.c 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | // vb stands for VBlock - it started its life as VBlockVCF when genozip could only compress VCFs, but now 10 | // it means a block of lines from the text file. 11 | 12 | #include "vcf_private.h" 13 | 14 | unsigned vcf_vb_size (DataType dt) { return sizeof (VBlockVCF); } 15 | unsigned vcf_vb_zip_dl_size (void) { return sizeof (ZipDataLineVCF); } 16 | 17 | // ZIP/PIZ: called before segging / reconstructing each line 18 | void vcf_reset_line (VBlockP vb_) 19 | { 20 | VBlockVCFP vb = (VBlockVCFP)vb_; 21 | 22 | vb->sample_i = 0; 23 | vb->deferred_q_len = 0; 24 | vb->mate_line_i = NO_LINE; 25 | ii_buf.len32 = 0; 26 | 27 | N_ALTS = 0; // = ALT not parsed yet 28 | CTX(FORMAT_GT_HT)->use_HT_matrix = false; 29 | CTX(FORMAT_RGQ)->line_has_RGQ = unknown; 30 | 31 | Did reset_ctx_specific[] = { 32 | FORMAT_SB, INFO_AN, INFO_DP, INFO_QD, INFO_RU, VCF_QUAL, 33 | }; 34 | for (int i=0; i < ARRAY_LEN(reset_ctx_specific); i++) 35 | CTX(reset_ctx_specific[i])->ctx_specific = 0; 36 | 37 | if (IS_ZIP) { 38 | memset (B1ST16(CTX(VCF_FORMAT)->sf_i), 0xff, MAX_DICTS * sizeof (uint16_t)); // set all sf_i to NO_SF_I 39 | memset (&vb->first_idx, 0xff, (char*)&vb->after_idx - (char*)&vb->first_idx); // set all idx's to -1 40 | } 41 | 42 | else { 43 | memset (vb->is_deferred_, 0, sizeof (vb->is_deferred_)); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/aligner.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // aligner.h 3 | // Copyright (C) 2020-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "genozip.h" 12 | 13 | #define declare_seq_contexts ContextP __attribute__((unused)) \ 14 | bitmap_ctx = CTX(SAM_SQBITMAP), \ 15 | nonref_ctx = CTX(SAM_NONREF), \ 16 | gpos_ctx = CTX(SAM_GPOS), \ 17 | gpos_d_ctx = CTX(SAM_GPOS_DELTA), \ 18 | gpos_r2_ctx = CTX(SAM_GPOS_R2), \ 19 | strand_ctx = CTX(SAM_STRAND), \ 20 | strand_r2_ctx = CTX(SAM_STRAND_R2), \ 21 | seqmis_ctx = CTX(SAM_SEQMIS_A), \ 22 | seqins_ctx = CTX(SAM_SEQINS_A) 23 | 24 | 25 | typedef enum { MAPPING_NO_MAPPING, MAPPING_ALIGNED, MAPPING_PERFECT } MappingType; 26 | 27 | extern MappingType aligner_seg_seq (VBlockP vb, STRp(seq), bool is_pair_2, PosType64 pair_gpos, bool pair_is_forward); 28 | extern void aligner_seg_gpos_and_fwd (VBlockP vb, PosType64 gpos, bool is_forward, bool is_pair_2, PosType64 pair_gpos, bool pair_is_forward); 29 | extern void aligner_reconstruct_seq (VBlockP vb, uint32_t seq_len, bool is_pair_2, bool is_perfect_alignment, ReconType reconstruct, int mismatches_len, char *mismatch_base, uint32_t *mismatch_offset, uint32_t *num_mismatches); 30 | extern void aligner_recon_get_gpos_and_fwd (VBlockP vb, bool is_pair_2, PosType64 *gpos, bool *is_forward); 31 | -------------------------------------------------------------------------------- /src/arch.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // arch.h 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | extern void arch_initialize (rom argv0); 12 | extern unsigned arch_get_num_cores (void); 13 | extern double arch_get_physical_mem_size (void); 14 | extern StrText arch_get_filesystem_type (FileP file); 15 | extern StrText arch_get_txt_filesystem (void); 16 | extern StrText arch_get_z_filesystem (void); 17 | 18 | extern rom arch_get_endianity (void); 19 | extern void arch_set_locale (void); 20 | 21 | #define NET_ID_SIZE 32 22 | extern rom arch_get_os (void); 23 | extern rom arch_get_scheduler (void); 24 | extern rom arch_get_glibc (void); 25 | extern StrTextSuperLong arch_get_executable (void); 26 | extern StrTextSuperLong arch_get_genozip_executable (void); 27 | extern rom arch_get_argv0 (void); 28 | extern bool arch_is_valgrind (void); 29 | extern bool arch_is_docker (void); 30 | extern bool arch_is_first_compression (void); 31 | extern Timestamp arch_timestamp (void); 32 | extern bool arch_is_process_alive (uint32_t pid); 33 | extern uint64_t arch_get_max_resident_set (void); 34 | extern bool wget_available (void); 35 | extern bool curl_available (void); 36 | 37 | extern rom get_distribution (void); 38 | extern bool dist_is_conda (void); 39 | extern bool dist_is_installforge (void); 40 | extern bool dist_is_github (void); 41 | 42 | static inline uint32_t arch_time_lap (uint128_t ts_start) // in msec 43 | { return (uint32_t)((arch_timestamp() - ts_start) / 1000000); } 44 | 45 | extern Timestamp arch_start_time; 46 | -------------------------------------------------------------------------------- /src/chrom.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // chrom.h 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited, 7 | // under penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "file.h" 12 | 13 | extern void chrom_2ref_load (void); 14 | 15 | // seg 16 | extern WordIndex chrom_seg_ex (VBlockP vb, Did did_i, STRp(chrom), PosType64 LN, int add_bytes, bool *is_new); 17 | static inline WordIndex chrom_seg (VBlockP vb, STRp(chrom)) { return chrom_seg_ex (vb, CHROM, STRa(chrom), 0, chrom_len+1, NULL); } 18 | static inline WordIndex chrom_seg_by_did_i (VBlockP vb, Did did_i, STRp(chrom), unsigned add_bytes) 19 | { return chrom_seg_ex (vb, did_i, STRa(chrom), 0, add_bytes, NULL); } 20 | extern bool chrom_seg_cb (VBlockP vb, ContextP ctx, STRp (chrom), uint32_t repeat); 21 | extern WordIndex chrom_seg_no_b250 (VBlockP vb, STRp(chrom), bool *is_new); 22 | 23 | // sorter 24 | extern void chrom_index_by_name (Did chrom_did_i); // ZIP and PIZ 25 | extern WordIndex chrom_get_by_name (STRp (chrom_name)); // ZIP and PIZ 26 | 27 | // chrom2ref 28 | extern void chrom_calculate_ref2chrom (uint64_t num_ref_contigs); 29 | extern void chrom_2ref_compress (void); 30 | extern WordIndex chrom_2ref_seg_get (ConstVBlockP vb, WordIndex chrom_index); 31 | #define chrom_2ref_seg_is_needed(did_i) ((did_i) == CHROM && IS_REF_CHROM2REF) 32 | 33 | extern void chrom_2ref_load (void); 34 | static inline WordIndex chrom_2ref_piz_get (WordIndex chrom_index) 35 | { return ZCTX(CHROM)->chrom2ref_map.len ? *B(WordIndex, ZCTX(CHROM)->chrom2ref_map, chrom_index) : chrom_index; } 36 | 37 | extern void chrom_finalize (void); -------------------------------------------------------------------------------- /src/stream.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // stream.h 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include 12 | #include "genozip.h" 13 | 14 | typedef struct stream_ *StreamP; 15 | 16 | #define MAX_ARGC 64 // maximum number of arguments including exec_name 17 | 18 | #define DEFAULT_PIPE_SIZE 65536 19 | #define SKIP_ARG ((rom )-1) 20 | extern StreamP stream_create (StreamP parent_stream, uint32_t from_stream_stdout, uint32_t from_stream_stderr, uint32_t to_stream, 21 | FILE *redirect_stdout_file, rom input_url_name, bool input_stdin, rom reason, 22 | rom exec_name, ...); 23 | 24 | extern void stream_set_inheritability (int fd, bool is_inheritable); 25 | 26 | extern void stream_close_pipes (StreamP stream); 27 | 28 | typedef enum { STREAM_KILL_PROCESS, STREAM_WAIT_FOR_PROCESS, STREAM_DONT_WAIT_FOR_PROCESS } StreamCloseMode; 29 | extern int stream_close (StreamP *stream, StreamCloseMode close_mode); 30 | 31 | extern int stream_wait_for_exit (StreamP stream, bool killed); 32 | 33 | extern void stream_abort_if_cannot_run (rom exec_name, rom reason); 34 | 35 | extern FILE *stream_from_stream_stdout (StreamP stream); 36 | extern FILE *stream_from_stream_stderr (StreamP stream); 37 | extern FILE *stream_to_stream_stdin (StreamP stream); 38 | 39 | extern rom stream_get_exec_name (StreamP stream); 40 | 41 | extern bool stream_exec_is (StreamP stream, rom exec_name); 42 | 43 | extern rom genozip_exit_code_name (ExitCode exit_code); 44 | 45 | -------------------------------------------------------------------------------- /src/bed.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // bed.h 3 | // Copyright (C) 2023-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "genozip.h" 12 | 13 | #pragma GENDICT_PREFIX BED 14 | 15 | #pragma GENDICT BED_CHROM=DTYPE_FIELD=CHROM // must be 1st as this is BED's CHROM. 16 | #pragma GENDICT BED_START=DTYPE_FIELD=START 17 | #pragma GENDICT BED_END=DTYPE_FIELD=END 18 | #pragma GENDICT BED_NAME=DTYPE_FIELD=NAME 19 | #pragma GENDICT BED_SCORE=DTYPE_FIELD=SCORE 20 | #pragma GENDICT BED_STRAND=DTYPE_FIELD=STRAND 21 | #pragma GENDICT BED_TSTART=DTYPE_FIELD=TSTART 22 | #pragma GENDICT BED_TEND=DTYPE_FIELD=TEND 23 | #pragma GENDICT BED_RGB=DTYPE_FIELD=RGB 24 | #pragma GENDICT BED_BCOUNT=DTYPE_FIELD=BCOUNT 25 | #pragma GENDICT BED_BSIZES=DTYPE_FIELD=BSIZES 26 | #pragma GENDICT BED_BSTARTS=DTYPE_FIELD=BSTARTS 27 | #pragma GENDICT BED_EOL=DTYPE_FIELD=EOL 28 | #pragma GENDICT BED_TOPLEVEL=DTYPE_FIELD=TOPLEVEL 29 | #pragma GENDICT BED_DEBUG_LINES=DTYPE_FIELD=DBGLINES // used by --debug-lines 30 | 31 | // ZIP Stuff 32 | extern void bed_zip_initialize (void); 33 | extern bool is_bed (STRp(header), bool *need_more); 34 | extern int32_t bed_is_header_done (bool is_eof); 35 | extern rom bed_seg_txt_line (VBlockP vb_, rom field_start_line, uint32_t remaining_txt_len, bool *has_special_eol); 36 | extern void bed_seg_initialize (VBlockP vb_); 37 | extern void bed_segconf_finalize (VBlockP vb); 38 | extern void bed_seg_finalize (VBlockP vb); 39 | extern bool bed_seg_is_small (ConstVBlockP vb, DictId dict_id); 40 | 41 | // SPECIALs 42 | SPECIAL (BED, 0, BCOUNT, bed_piz_special_BCOUNT); 43 | -------------------------------------------------------------------------------- /src/sam_hisat2.c: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // sam_gem3.c 3 | // Copyright (C) 2022-2025 Genozip Limited. Patent pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited, 7 | // under penalties specified in the license. 8 | 9 | // Compresses auxilliary fields generated by hisat2 mapper 10 | 11 | #include "sam_private.h" 12 | 13 | // HISAT2 Zs:Z field: "When the alignment of a read involves SNPs that are in the index, this option is used to indicate where exactly the read 14 | // involves the SNPs. This optional field is similar to the above MD:Z field. For example, Zs:Z:1|S|rs3747203,97|S|rs16990981 15 | // indicates the second base of the read corresponds to a known SNP (ID: rs3747203). 97 bases after the third base (the base 16 | // after the second one), the read at 100th base involves another known SNP (ID: rs16990981). ‘S’ indicates a single nucleotide 17 | // polymorphism. ‘D’ and ‘I’ indicate a deletion and an insertion, respectively." (http://daehwankimlab.github.io/hisat2/manual/) 18 | void sam_seg_HISAT2_Zs_Z (VBlockSAMP vb, STRp(zs), unsigned add_bytes) 19 | { 20 | static const MediumContainer con = { 21 | .nitems_lo = 3, 22 | .drop_final_repsep = true, 23 | .repsep = {','}, 24 | .items = { { .dict_id.num = _OPTION_Zs_POS, .separator = {'|'} }, 25 | { .dict_id.num = _OPTION_Zs_TYPE, .separator = {'|'} }, 26 | { .dict_id.num = _OPTION_Zs_RS } } 27 | }; 28 | 29 | seg_array_of_struct (VB, CTX(OPTION_Zs_Z), con, STRa(zs), (SegCallback[]){0, 0, seg_id_field_varlen_int_cb}, NULL, add_bytes); 30 | } 31 | 32 | -------------------------------------------------------------------------------- /src/threads.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // threads.h 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | extern void threads_initialize (void); 12 | extern void threads_finalize (void); 13 | 14 | extern ThreadId threads_create (void (*func)(VBlockP), VBlockP vb); 15 | extern void threads_join_do (ThreadId *thread_id, rom expected_task, rom expected_task2, rom func); 16 | #define threads_join(threads_id, expected_task) threads_join_do((threads_id), (expected_task), (expected_task), __FUNCTION__) 17 | #define threads_join2(threads_id, expected_task, expected_task2) threads_join_do((threads_id), (expected_task), (expected_task2), __FUNCTION__) 18 | extern void threads_cancel_other_threads (void); 19 | extern void threads_print_call_stack (void); 20 | 21 | extern pthread_t main_thread; 22 | static inline bool threads_am_i_main_thread (void) { return pthread_self() == main_thread; } 23 | 24 | // writer / zriter thread stuff 25 | extern void threads_set_writer_thread (void); 26 | extern void threads_set_zriter_thread (void); 27 | extern void threads_unset_writer_thread (void); 28 | extern void threads_unset_zriter_thread (void); 29 | extern bool threads_am_i_writer_thread (void); 30 | extern bool threads_am_i_zriter_thread (void); 31 | 32 | // for debugging thread issues, activated with --debug-threads or --show-threads 33 | void threads_log_by_vb (ConstVBlockP vb, rom task_name, rom event, int time_usec); 34 | void threads_write_log (bool to_info_stream); 35 | 36 | #define ASSERTMAINTHREAD ASSERT (threads_am_i_main_thread(), "%s can only be called in main thread", __FUNCTION__) 37 | 38 | -------------------------------------------------------------------------------- /src/igzip/aarch64/igzip_inflate_multibinary_arm64.S: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | Copyright(c) 2019 Arm Corporation All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in 11 | the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Arm Corporation nor the names of its 14 | contributors may be used to endorse or promote products derived 15 | from this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | **********************************************************************/ 29 | 30 | #include "aarch64_multibinary.h" 31 | 32 | mbin_interface decode_huffman_code_block_stateless 33 | -------------------------------------------------------------------------------- /src/ref_iupacs.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // ref_iupacs.h 3 | // Copyright (C) 2021-2025 Genozip Limited. Patent pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited, 7 | // under penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "genozip.h" 12 | 13 | // make-reference side 14 | extern void ref_iupacs_compress (void); 15 | extern void ref_iupacs_after_compute (VBlockP vb); 16 | 17 | extern void ref_iupacs_add_do (VBlockP vb, uint64_t idx, char iupac); 18 | static inline void ref_iupacs_add (VBlockP vb, uint64_t idx, char base) 19 | { 20 | // true for multi-base iupac codes, except N: http://www.bioinformatics.org/sms/iupac.html 21 | static const char base2iupac[256] = { ['U']='U', ['R']='R', ['Y']='Y', ['S']='S', ['W']='W', ['K']='K', ['M']='M', ['B']='B', ['D']='D', ['H']='H', ['V']='V', 22 | ['u']='U', ['r']='R', ['y']='Y', ['s']='S', ['w']='W', ['k']='K', ['m']='M', ['b']='B', ['d']='D', ['h']='H', ['v']='V' }; 23 | if (base2iupac[(int)base]) ref_iupacs_add_do (vb, idx, base2iupac[(int)base]); 24 | } 25 | 26 | #define IUPAC_IS_INCLUDED(ref_base,vcf_base) hxcgcb 27 | 28 | // using with --chain side 29 | extern void ref_iupacs_load (void); 30 | 31 | #define ref_iupacs_is_included(vb, range, pos, vcf_base) \ 32 | (((range) == (vb)->iupacs_last_range && (pos) > (vb)->iupacs_last_pos && (pos) < (vb)->iupacs_next_pos) ? false /* quick negative */ \ 33 | : ref_iupacs_is_included_do ((vb), (range), (pos), (vcf_base))) 34 | extern bool ref_iupacs_is_included_do (VBlockP vb, const Range *range, PosType64 pos, char vcf_base); 35 | extern char ref_iupacs_get (const Range *r, PosType64 pos, bool reverse, PosType64 *next_pos); 36 | -------------------------------------------------------------------------------- /src/lzma/LzHash.h: -------------------------------------------------------------------------------- 1 | /* LzHash.h -- HASH functions for LZ algorithms 2 | 2015-04-12 : Igor Pavlov : Public domain */ 3 | 4 | #pragma once 5 | 6 | #define kHash2Size (1 << 10) 7 | #define kHash3Size (1 << 16) 8 | #define kHash4Size (1 << 20) 9 | 10 | #define kFix3HashSize (kHash2Size) 11 | #define kFix4HashSize (kHash2Size + kHash3Size) 12 | #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) 13 | 14 | #define HASH2_CALC hv = cur[0] | ((UInt32)cur[1] << 8); 15 | 16 | #define HASH3_CALC { \ 17 | UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ 18 | h2 = temp & (kHash2Size - 1); \ 19 | hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; } 20 | 21 | #define HASH4_CALC { \ 22 | UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ 23 | h2 = temp & (kHash2Size - 1); \ 24 | temp ^= ((UInt32)cur[2] << 8); \ 25 | h3 = temp & (kHash3Size - 1); \ 26 | hv = (temp ^ (p->crc[cur[3]] << 5)) & p->hashMask; } 27 | 28 | #define HASH5_CALC { \ 29 | UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ 30 | h2 = temp & (kHash2Size - 1); \ 31 | temp ^= ((UInt32)cur[2] << 8); \ 32 | h3 = temp & (kHash3Size - 1); \ 33 | temp ^= (p->crc[cur[3]] << 5); \ 34 | h4 = temp & (kHash4Size - 1); \ 35 | hv = (temp ^ (p->crc[cur[4]] << 3)) & p->hashMask; } 36 | 37 | /* #define HASH_ZIP_CALC hv = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */ 38 | #define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF; 39 | 40 | 41 | #define MT_HASH2_CALC \ 42 | h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1); 43 | 44 | #define MT_HASH3_CALC { \ 45 | UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ 46 | h2 = temp & (kHash2Size - 1); \ 47 | h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } 48 | 49 | #define MT_HASH4_CALC { \ 50 | UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ 51 | h2 = temp & (kHash2Size - 1); \ 52 | temp ^= ((UInt32)cur[2] << 8); \ 53 | h3 = temp & (kHash3Size - 1); \ 54 | h4 = (temp ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); } 55 | 56 | -------------------------------------------------------------------------------- /src/sam_tmap.c: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // sam_tmap.c 3 | // Copyright (C) 2022-2025 Genozip Limited. Patent pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited, 7 | // under penalties specified in the license. 8 | 9 | // Compresses auxilliary fields generated by tmap (IonXpress) mapper 10 | 11 | #include 12 | #include "sam_private.h" 13 | 14 | void sam_seg_tmap_XM_i (VBlockSAMP vb, int64_t xm, unsigned add_bytes) 15 | { 16 | decl_ctx (OPTION_XM_i); 17 | 18 | // xm is predicted to be ref_consumed 19 | if (xm == vb->ref_consumed) 20 | seg_special0 (VB, SAM_SPECIAL_REF_CONSUMED, ctx, add_bytes); 21 | 22 | else 23 | seg_integer (VB, ctx, xm, true, add_bytes); 24 | } 25 | 26 | static int64_t xt_prediction (int64_t as) 27 | { 28 | return round ((double)as * 0.2510174851 - 3.192195876); // based on AS vs XT regression in test.ion-cardiomyopathy.bam 29 | } 30 | 31 | void sam_seg_tmap_XT_i (VBlockSAMP vb, ZipDataLineSAMP dl, int64_t xt, unsigned add_bytes) 32 | { 33 | decl_ctx (OPTION_XT_i); 34 | 35 | if (has(AS_i)) { 36 | int64_t delta = xt - xt_prediction (dl->AS); 37 | 38 | seg_special1 (VB, SAM_SPECIAL_TMAP_XT, '1'/*method version*/, ctx, add_bytes); 39 | seg_integer (VB, ctx, delta, false, 0); 40 | } 41 | 42 | else 43 | seg_integer (VB, ctx, xt, true, add_bytes); 44 | } 45 | 46 | SPECIAL_RECONSTRUCTOR (sam_piz_special_TMAP_XT) 47 | { 48 | int64_t as = reconstruct_peek (VB, CTX(OPTION_AS_i), 0, 0).i; 49 | int64_t delta = reconstruct_from_local_int (vb, ctx, 0, RECON_OFF); 50 | 51 | new_value->i = xt_prediction (as) + delta; 52 | 53 | if (reconstruct) RECONSTRUCT_INT (new_value->i); 54 | 55 | return HAS_NEW_VALUE; 56 | } 57 | 58 | -------------------------------------------------------------------------------- /src/libdeflate_1.19/deflate_constants.h: -------------------------------------------------------------------------------- 1 | /* 2 | * deflate_constants.h - constants for the DEFLATE compression format 3 | */ 4 | 5 | #ifndef LIB_DEFLATE_CONSTANTS_H 6 | #define LIB_DEFLATE_CONSTANTS_H 7 | 8 | /* Valid block types */ 9 | #define DEFLATE_BLOCKTYPE_UNCOMPRESSED 0 10 | #define DEFLATE_BLOCKTYPE_STATIC_HUFFMAN 1 11 | #define DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN 2 12 | 13 | /* Minimum and maximum supported match lengths (in bytes) */ 14 | #define DEFLATE_MIN_MATCH_LEN 3 15 | #define DEFLATE_MAX_MATCH_LEN 258 16 | 17 | /* Maximum supported match offset (in bytes) */ 18 | #define DEFLATE_MAX_MATCH_OFFSET 32768 19 | 20 | /* log2 of DEFLATE_MAX_MATCH_OFFSET */ 21 | #define DEFLATE_WINDOW_ORDER 15 22 | 23 | /* Number of symbols in each Huffman code. Note: for the literal/length 24 | * and offset codes, these are actually the maximum values; a given block 25 | * might use fewer symbols. */ 26 | #define DEFLATE_NUM_PRECODE_SYMS 19 27 | #define DEFLATE_NUM_LITLEN_SYMS 288 28 | #define DEFLATE_NUM_OFFSET_SYMS 32 29 | 30 | /* The maximum number of symbols across all codes */ 31 | #define DEFLATE_MAX_NUM_SYMS 288 32 | 33 | /* Division of symbols in the literal/length code */ 34 | #define DEFLATE_NUM_LITERALS 256 35 | #define DEFLATE_END_OF_BLOCK 256 36 | #define DEFLATE_FIRST_LEN_SYM 257 37 | 38 | /* Maximum codeword length, in bits, within each Huffman code */ 39 | #define DEFLATE_MAX_PRE_CODEWORD_LEN 7 40 | #define DEFLATE_MAX_LITLEN_CODEWORD_LEN 15 41 | #define DEFLATE_MAX_OFFSET_CODEWORD_LEN 15 42 | 43 | /* The maximum codeword length across all codes */ 44 | #define DEFLATE_MAX_CODEWORD_LEN 15 45 | 46 | /* Maximum possible overrun when decoding codeword lengths */ 47 | #define DEFLATE_MAX_LENS_OVERRUN 137 48 | 49 | /* 50 | * Maximum number of extra bits that may be required to represent a match 51 | * length or offset. 52 | */ 53 | #define DEFLATE_MAX_EXTRA_LENGTH_BITS 5 54 | #define DEFLATE_MAX_EXTRA_OFFSET_BITS 13 55 | 56 | #endif /* LIB_DEFLATE_CONSTANTS_H */ 57 | -------------------------------------------------------------------------------- /src/igzip/aarch64/crc32_gzip_refl_pmull.S: -------------------------------------------------------------------------------- 1 | ######################################################################## 2 | # Copyright(c) 2019 Arm Corporation All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions 6 | # are met: 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above copyright 10 | # notice, this list of conditions and the following disclaimer in 11 | # the documentation and/or other materials provided with the 12 | # distribution. 13 | # * Neither the name of Arm Corporation nor the names of its 14 | # contributors may be used to endorse or promote products derived 15 | # from this software without specific prior written permission. 16 | # 17 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | ######################################################################### 29 | 30 | #include "aarch64_label.h" 31 | #include "crc32_gzip_refl_pmull.h" 32 | #include "crc32_refl_common_pmull.h" 33 | 34 | crc32_refl_func crc32_gzip_refl_pmull 35 | -------------------------------------------------------------------------------- /src/hash.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // hash.h 3 | // Copyright (C) 2020-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "genozip.h" 12 | 13 | extern uint32_t hash_get_estimated_entries (VBlockP merging_vb, ContextP zctx, ConstContextP first_merging_vb_ctx); 14 | 15 | extern void hash_alloc_global (ContextP zctx, uint32_t estimated_entries); 16 | extern uint32_t hash_next_size_up (uint64_t size, bool allow_huge); 17 | 18 | extern WordIndex hash_global_get_entry (ContextP zctx, STRp(snip), bool allow_singleton, bool snip_is_definitely_new, CtxNodeP *please_update_index); 19 | 20 | extern WordIndex hash_find_snip_in_ol_nodes (VBlockP vb, ContextP vctx, STRp(snip), rom *snip_in_dict_out); 21 | 22 | extern WordIndex hash_get_entry_for_seg (VBlockP segging_vb, ContextP vctx, STRp(snip), rom *snip_in_dict_out); 23 | 24 | // tested hash table sizes up to 5M. turns out smaller tables (up to a point) are faster, despite having longer 25 | // average linked lists. probably bc the CPU can store the entire hash and nodes arrays in L1 or L2 26 | // memory cache during segmentation 27 | #define NO_NEXT 0xffffffff 28 | static inline uint32_t hash_do (uint32_t hash_len, STRp(snip)) 29 | { 30 | uint32_t hash; 31 | if (!hash_len) 32 | hash = NO_NEXT; // hash table does not exist 33 | 34 | // spread the snip throughout the 64bit word before taking a mod - to ensure about-even distribution 35 | // across the hash table 36 | else { 37 | uint64_t result=0; 38 | for (unsigned i=0; i < snip_len; i++) 39 | result = ((result << 23) | (result >> 41)) ^ (uint64_t)((uint8_t)snip[i]); 40 | 41 | hash = result % hash_len; 42 | } 43 | 44 | return hash; 45 | } 46 | -------------------------------------------------------------------------------- /src/igzip/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright(c) 2011-2024 Intel Corporation All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions 5 | are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in 10 | the documentation and/or other materials provided with the 11 | distribution. 12 | * Neither the name of Intel Corporation nor the names of its 13 | contributors may be used to endorse or promote products derived 14 | from this software without specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | SPDX-License-Identifier: BSD-3-Clause 29 | 30 | ---------------------- 31 | 32 | The original ISA-L source code was modified extensively to fit Genozip's needs. 33 | All these modifications are: 34 | 35 | Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 36 | Please see terms and conditions in the file LICENSE.txt 37 | -------------------------------------------------------------------------------- /src/piz.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // piz.h 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "reconstruct.h" 12 | #include "file.h" 13 | 14 | extern bool piz_default_skip_section (SectionType st, DictId dict_id); 15 | 16 | #define piz_is_skip_section(dt,st,comp_i,dict_id,f,preprocessing) \ 17 | (vb->data_type != DT_NONE && (piz_default_skip_section ((st), (dict_id)) || \ 18 | (dt_props[dt].is_skip_section && dt_props[dt].is_skip_section ((st), (comp_i), (dict_id), (f), (preprocessing))))) 19 | 20 | #define piz_is_skip_undicted_section(st) (z_file->data_type != DT_NONE && DTPZ(is_skip_section) && DTPZ(is_skip_section)((st), COMP_NONE, DICT_ID_NONE, 0, false)) 21 | 22 | extern Dispatcher piz_z_file_initialize (void); 23 | extern DataType piz_read_global_area (void); 24 | extern void piz_one_txt_file (Dispatcher dispatcher, bool is_first_z_file, bool is_last_z_file, CompIType first_comp_i, CompIType last_comp_i, bool allow_skip_cleaning); 25 | extern void piz_read_all_ctxs (VBlockP vb, Section *sec, bool is_pair_data); 26 | typedef enum { PUR_RECON, PUR_FASTA_WRITER_INIT, PUR_FASTQ_READ_R1, PUR_SAM_LOAD_SAG } PizUncompressReason; 27 | extern void piz_uncompress_all_ctxs (VBlockP vb, PizUncompressReason reason); 28 | extern SectionHeaderVbHeader piz_read_vb_header (VBlockP vb); 29 | extern bool piz_read_one_vb (VBlockP vb, bool for_reconstruction); 30 | extern void piz_set_main_dispatcher (Dispatcher dispatcher); 31 | extern void piz_allow_out_of_order (void); 32 | 33 | extern bool piz_grep_match (rom start, rom after); 34 | 35 | extern TRANSLATOR_FUNC (piz_obsolete_translator); 36 | 37 | #define ASSISLOADED(ctx) ASSPIZ((ctx)->is_loaded, "%s is not loaded", ctx->tag_name) 38 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | private 2 | docs 3 | test 4 | scripts 5 | launch.json 6 | *.back 7 | .conda-timestamp 8 | .archive.tar.gz 9 | autocomplete.sh 10 | *.o 11 | !src/objdir.*/secure/license.o 12 | !src/objdir.mac/secure/license.old_mac.o 13 | *.debug-o 14 | *.opt-o 15 | *.d 16 | *.S 17 | !src/igzip/aarch64/*.S 18 | *.genozip 19 | *.gz 20 | *.bgz 21 | *.bz2 22 | *.xz 23 | *.ora 24 | *.exe 25 | *.mac 26 | !installers/*.exe 27 | *.new 28 | *.crdownload 29 | genozip-linux-x86_64 30 | genozip-osx-x86 31 | genozip-osx-arm 32 | genozip-installer.ifp.save 33 | geno*-prod 34 | genozip 35 | genounzip* 36 | genocat* 37 | genols* 38 | !genols.[ch] 39 | genozip-debug* 40 | genozip-opt* 41 | genozip-latest* 42 | 1000genomes/* 43 | public/* 44 | support/* 45 | benchmark.txt 46 | notes.txt 47 | to do.txt 48 | junk* 49 | meta.yaml 50 | src/windows/genozip-installer.ifp 51 | src/windows/LICENSE.for-installer.txt 52 | src/mac/darwinpkg/Library/genozip/* 53 | !src/mac/darwinpkg/Library/genozip 54 | src/mac/Resources/* 55 | src/mac/scripts/* 56 | !.gitkeep 57 | .DS_Store 58 | *.vcf* 59 | *.bcf* 60 | *.sam* 61 | *.bam 62 | *.cram 63 | *.crai 64 | *.fastq* 65 | *.fq* 66 | *.fa 67 | *.fa.gz 68 | *.fasta* 69 | *.fna* 70 | *.gvf* 71 | *.gtf* 72 | *.gff* 73 | *.chain* 74 | *.bai 75 | *.fai 76 | *.gzi 77 | *.pac 78 | *.sa 79 | *.amb 80 | *.ann 81 | *.bwt 82 | *.overlaps 83 | *bug* 84 | *.piz 85 | *.zip 86 | *stackdump 87 | .altool_app_specifc_password 88 | src/mac/.[a-z_]* 89 | *.swp 90 | .notarize.out 91 | src/mac/*.pkg 92 | genome_*Full* 93 | unix-nl.* 94 | windows-nl.* 95 | test.* 96 | src/conda/README.md 97 | *.local 98 | *.b250 99 | *.bad 100 | *.bad.gz 101 | *.gcache 102 | *.hcache 103 | *.biopsy 104 | unused-code 105 | genozip.threads-log.* 106 | .genozip_license* 107 | dict_id_gen.c 108 | src/secure 109 | core 110 | SEC_*.body 111 | SEC_*.header 112 | digest.*.log 113 | failed-line.* 114 | *.tar 115 | !installers/*.tar 116 | *.log 117 | *.dict 118 | out 119 | *.backup 120 | vgcore.* 121 | P226339* -------------------------------------------------------------------------------- /src/igzip/noarch/crc_base_aliases.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | Copyright(c) 2011-2017 Intel Corporation All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in 11 | the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Intel Corporation nor the names of its 14 | contributors may be used to endorse or promote products derived 15 | from this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | **********************************************************************/ 29 | 30 | #include "../crc.h" 31 | #include 32 | 33 | uint32_t crc32_gzip_refl(uint32_t seed, const unsigned char *buf, uint64_t len) 34 | { 35 | return crc32_gzip_refl_base(seed, (uint8_t *) buf, len); 36 | } 37 | 38 | -------------------------------------------------------------------------------- /src/bzlib/LICENSE.bzlib: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------- 3 | 4 | This program, "bzip2", the associated library "libbzip2", and all 5 | documentation, are copyright (C) 1996-2019 Julian R Seward. All 6 | rights reserved. 7 | 8 | Redistribution and use in source and binary forms, with or without 9 | modification, are permitted provided that the following conditions 10 | are met: 11 | 12 | 1. Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | 15 | 2. The origin of this software must not be misrepresented; you must 16 | not claim that you wrote the original software. If you use this 17 | software in a product, an acknowledgment in the product 18 | documentation would be appreciated but is not required. 19 | 20 | 3. Altered source versions must be plainly marked as such, and must 21 | not be misrepresented as being the original software. 22 | 23 | 4. The name of the author may not be used to endorse or promote 24 | products derived from this software without specific prior written 25 | permission. 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 28 | OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 29 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 31 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 33 | GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 35 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 36 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 37 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 | 39 | Julian Seward, jseward@acm.org 40 | bzip2/libbzip2 version 1.0.8 of 13 July 2019 41 | 42 | -------------------------------------------------------------------------- 43 | -------------------------------------------------------------------------------- /src/libdeflate_1.19/x86/decompress_impl.h: -------------------------------------------------------------------------------- 1 | #ifndef LIB_X86_DECOMPRESS_IMPL_H 2 | #define LIB_X86_DECOMPRESS_IMPL_H 3 | 4 | #include "cpu_features.h" 5 | 6 | /* 7 | * BMI2 optimized version 8 | * 9 | * FIXME: with MSVC, this isn't actually compiled with BMI2 code generation 10 | * enabled yet. That would require that this be moved to its own .c file. 11 | */ 12 | #if HAVE_BMI2_INTRIN 13 | # define deflate_decompress_bmi2 deflate_decompress_bmi2 14 | # define FUNCNAME deflate_decompress_bmi2 15 | # if !HAVE_BMI2_NATIVE 16 | # define ATTRIBUTES _target_attribute("bmi2") 17 | # endif 18 | /* 19 | * Even with __attribute__((target("bmi2"))), gcc doesn't reliably use the 20 | * bzhi instruction for 'word & BITMASK(count)'. So use the bzhi intrinsic 21 | * explicitly. EXTRACT_VARBITS() is equivalent to 'word & BITMASK(count)'; 22 | * EXTRACT_VARBITS8() is equivalent to 'word & BITMASK((u8)count)'. 23 | * Nevertheless, their implementation using the bzhi intrinsic is identical, 24 | * as the bzhi instruction truncates the count to 8 bits implicitly. 25 | */ 26 | # ifndef __clang__ 27 | # include 28 | # ifdef ARCH_X86_64 29 | # define EXTRACT_VARBITS(word, count) _bzhi_u64((word), (count)) 30 | # define EXTRACT_VARBITS8(word, count) _bzhi_u64((word), (count)) 31 | # else 32 | # define EXTRACT_VARBITS(word, count) _bzhi_u32((word), (count)) 33 | # define EXTRACT_VARBITS8(word, count) _bzhi_u32((word), (count)) 34 | # endif 35 | # endif 36 | # include "../decompress_template.h" 37 | #endif /* HAVE_BMI2_INTRIN */ 38 | 39 | #if defined(deflate_decompress_bmi2) && HAVE_BMI2_NATIVE 40 | #define DEFAULT_IMPL deflate_decompress_bmi2 41 | #else 42 | static inline decompress_func_t 43 | arch_select_decompress_func(void) 44 | { 45 | #ifdef deflate_decompress_bmi2 46 | if (HAVE_BMI2(get_x86_cpu_features())) 47 | return deflate_decompress_bmi2; 48 | #endif 49 | return NULL; 50 | } 51 | #define arch_select_decompress_func arch_select_decompress_func 52 | #endif 53 | 54 | #endif /* LIB_X86_DECOMPRESS_IMPL_H */ 55 | -------------------------------------------------------------------------------- /src/windows/generate-ifp.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash --norc 2 | 3 | # ------------------------------------------------------------------ 4 | # generate-ifp.sh 5 | # Copyright (C) 2024-2025 Genozip Limited. Patent Pending. 6 | # Please see terms and conditions in the file LICENSE.txt 7 | # 8 | # WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 9 | # and subject to penalties specified in the license. 10 | 11 | # This script is invoked by Makefile 12 | 13 | if [[ `pwd` != *windows* ]]; then 14 | echo "$0 must be run from the src/windows directory" 15 | exit 1 16 | fi 17 | 18 | template=genozip-installer.template.ifp 19 | 20 | if [ ! -f $template ]; then 21 | echo "$template: file not found" 22 | exit 1 23 | fi 24 | 25 | version=$(head -n1 ../version.h |cut -d\" -f2) 26 | 27 | lines=`wc -l $template | cut -d" " -f1` 28 | head=$(( `grep -n __LICENSE__ $template | cut -d: -f1` - 1 )) # __LICENSE__ must be on a stand-alone line 29 | tail=$(( $lines - $head - 1 )) 30 | windows_dir=`echo $PWD | sed "s/^\/mnt\/c/C:/g" | sed "s/\//\\\\\\\\\\\\\\\\/g"` 31 | 32 | cd ../.. 33 | root_dir=`echo $PWD | sed "s/^\/mnt\/c/C:/g" | sed "s/\//\\\\\\\\\\\\\\\\/g"` 34 | cd - > /dev/null 35 | 36 | cd ../../installers 37 | installers_dir=`echo $PWD | sed "s/^\/mnt\/c/C:/g" | sed "s/\//\\\\\\\\\\\\\\\\/g"` 38 | cd - > /dev/null 39 | 40 | exe_mb=5.4 # not sure if this needs to be accurate 41 | license_kb=11.0 42 | 43 | rm -f genozip-installer.ifp 44 | 45 | # NOTE: if making any change, verify that the Registry section shows up as expected in 46 | # InstallForge. It is very sensitive to any change in the License section. 47 | 48 | (head -$head $template ; sed "s/'/\\\\f1\\\\rquote\\\\f0 /g" LICENSE.for-installer.txt | sed "s/$/\\\\par\r/g"; tail -$tail $template) \ 49 | | sed s/__VERSION__/${version}/g \ 50 | | sed "s/__ROOT_DIR__/${root_dir}/g" \ 51 | | sed "s/__WINDOWS_DIR__/${windows_dir}/g" \ 52 | | sed "s/__INSTALLERS__/${installers_dir}/g" \ 53 | | sed "s/__EXE_MB__/${exe_mb}/g" \ 54 | | sed "s/__LICENSE_KB__/${license_kb}/g" \ 55 | > genozip-installer.ifp 56 | -------------------------------------------------------------------------------- /src/bsc/adler32.h: -------------------------------------------------------------------------------- 1 | /*-----------------------------------------------------------*/ 2 | /* Block Sorting, Lossless Data Compression Library. */ 3 | /* Interface to Adler-32 checksum functions */ 4 | /*-----------------------------------------------------------*/ 5 | 6 | /*-- 7 | 8 | This file is a part of bsc and/or libbsc, a program and a library for 9 | lossless, block-sorting data compression. 10 | 11 | Copyright (c) 2009-2024 Ilya Grebnov 12 | 13 | Licensed under the Apache License, Version 2.0 (the "License"); 14 | you may not use this file except in compliance with the License. 15 | You may obtain a copy of the License at 16 | 17 | http://www.apache.org/licenses/LICENSE-2.0 18 | 19 | Unless required by applicable law or agreed to in writing, software 20 | distributed under the License is distributed on an "AS IS" BASIS, 21 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 | See the License for the specific language governing permissions and 23 | limitations under the License. 24 | 25 | Please see the file LICENSE for full copyright information and file AUTHORS 26 | for full list of contributors. 27 | 28 | See also the bsc and libbsc web site: 29 | http://libbsc.com/ for more information. 30 | 31 | --*/ 32 | 33 | #ifndef _LIBBSC_ADLER32_H 34 | #define _LIBBSC_ADLER32_H 35 | 36 | #ifdef __cplusplus 37 | extern "C" { 38 | #endif 39 | 40 | /** 41 | * Calculates Adler-32 checksum for input memory block. 42 | * @param T - the input memory block of n bytes. 43 | * @param n - the length of the input memory block. 44 | * @param features - the set of additional features. 45 | * @return the value of cyclic redundancy check. 46 | */ 47 | unsigned int bsc_adler32(const unsigned char * T, int n, int features); 48 | 49 | #ifdef __cplusplus 50 | } 51 | #endif 52 | 53 | #endif 54 | 55 | /*-----------------------------------------------------------*/ 56 | /* End adler32.h */ 57 | /*-----------------------------------------------------------*/ 58 | -------------------------------------------------------------------------------- /src/text_help.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // help-text.h 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #include "website.h" 10 | 11 | static rom help_genozip[] = { 12 | "", 13 | "Try (for example): genozip myfile.bam", 14 | "", 15 | "Please see the genozip manual here: "WEBSITE_GENOZIP 16 | }; 17 | 18 | static rom help_genounzip[] = { 19 | "", 20 | "Try (for example): genounzip myfile.bam.genozip", 21 | "or: genounzip --replace myfile.bam.genozip", 22 | "", 23 | "Please see the genounzip manual here: "WEBSITE_GENOUNZIP 24 | }; 25 | 26 | static rom help_genocat[] = { 27 | "", 28 | "Please see the genocat manual here: "WEBSITE_GENOCAT 29 | }; 30 | 31 | static rom help_genols[] = { 32 | "", 33 | "Please see the genols manual here: "WEBSITE_GENOLS 34 | }; 35 | 36 | static rom help_attributions[] = { 37 | "", 38 | "Please see the attributions list here: "WEBSITE_ATTRIBUTIONS 39 | }; 40 | 41 | static rom help_footer[] = { 42 | "", 43 | "Technical questions, bug reports and feature requests: " EMAIL_SUPPORT, 44 | "License inquiries: " EMAIL_SALES, 45 | "", 46 | "THIS SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE AUTHORS, COPYRIGHT HOLDERS OR DISTRIBUTORS OF THIS SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE." 47 | }; 48 | 49 | -------------------------------------------------------------------------------- /src/multiplexer.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // multiplexer.h 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | // note: separated from seg.h, to avoid including seg.h everywhere 10 | 11 | #pragma once 12 | 13 | #include "genozip.h" 14 | 15 | #define BASE64_DICT_ID_LEN 14 16 | #define MULTIPLEXER(n_channels) \ 17 | struct { \ 18 | /* all fields ara aligned - padding only at end */ \ 19 | ContextP ctx; \ 20 | bool no_stons; \ 21 | uint8_t special_code; \ 22 | uint16_t num_channels; \ 23 | uint32_t snip_len; \ 24 | DictId dict_ids[n_channels]; \ 25 | ContextP channel_ctx[n_channels]; \ 26 | char snip[BASE64_DICT_ID_LEN * (n_channels) + 6]; \ 27 | } 28 | #define MUX ((MultiplexerP)mux) 29 | #define MUX_CAPACITY(mux) (sizeof((mux).dict_ids)/sizeof(DictId)) // max number of channels this mux can contain 30 | #define MUX_CHANNEL_CTX(mux) ((ContextP *)((mux)->dict_ids + (mux)->num_channels)) 31 | #define MUX_SNIP(mux) ((char *)(MUX_CHANNEL_CTX(mux) + (mux)->num_channels)) 32 | #define MUX_SIZEOF_SNIP(mux) (BASE64_DICT_ID_LEN * ((mux)->num_channels) + 6) 33 | 34 | typedef MULTIPLEXER(1000) *MultiplexerP; 35 | typedef const MULTIPLEXER(1000) *ConstMultiplexerP; 36 | 37 | typedef MULTIPLEXER(2) Multiplexer2, *Multiplexer2P; 38 | typedef MULTIPLEXER(3) Multiplexer3, *Multiplexer3P; 39 | typedef MULTIPLEXER(4) Multiplexer4, *Multiplexer4P; 40 | typedef MULTIPLEXER(5) Multiplexer5, *Multiplexer5P; 41 | typedef MULTIPLEXER(6) Multiplexer6, *Multiplexer6P; 42 | typedef MULTIPLEXER(7) Multiplexer7, *Multiplexer7P; 43 | typedef MULTIPLEXER(10) Multiplexer10, *Multiplexer10P; 44 | -------------------------------------------------------------------------------- /src/vcf_melt.c: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // vcf_melt.c 3 | // Copyright (C) 2024-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #include "vcf_private.h" 10 | 11 | sSTRl(END_minus_SVLEN_snip, 32); 12 | sSTRl(copy_END_snip, 32); 13 | 14 | void vcf_me_zip_initialize (void) 15 | { 16 | DO_ONCE { 17 | seg_prepare_minus_snip (VCF, _VCF_POS, _INFO_SVLEN, END_minus_SVLEN_snip); 18 | seg_prepare_snip_other (SNIP_OTHER_DELTA, _VCF_POS, true, 0, copy_END_snip); // END is an alias of POS 19 | } 20 | } 21 | 22 | void vcf_seg_melt_ADJLEFT (VBlockVCFP vb, ContextP ctx, STRp(adjleft_str)) 23 | { 24 | int64_t svlen = CTX(INFO_SVLEN)->last_value.i; 25 | int64_t end = CTX(VCF_POS)->last_value.i; // END is an alias of POS 26 | int64_t adjleft; 27 | 28 | if (ctx_encountered_in_line (VB, INFO_SVLEN) && 29 | ctx_encountered_in_line (VB, INFO_END) && 30 | str_get_int (STRa(adjleft_str), &adjleft) && 31 | adjleft == end - svlen) 32 | 33 | seg_by_ctx (VB, STRa(END_minus_SVLEN_snip), ctx, adjleft_str_len); 34 | 35 | else if (str_issame_(STRa(adjleft_str), "0", 1)) 36 | seg_by_ctx (VB, "0", 1, ctx, 1); // seg as snip 37 | 38 | else 39 | seg_integer_or_not (VB, ctx, STRa(adjleft_str), adjleft_str_len); 40 | } 41 | 42 | void vcf_seg_melt_ADJRIGHT (VBlockVCFP vb, ContextP ctx, STRp(adjright_str)) 43 | { 44 | int64_t end = CTX(VCF_POS)->last_value.i; // END is an alias of POS 45 | int64_t adjright; 46 | 47 | if (ctx_encountered_in_line (VB, INFO_END) && 48 | str_get_int (STRa(adjright_str), &adjright) && 49 | adjright == end) 50 | 51 | seg_by_ctx (VB, STRa(copy_END_snip), ctx, adjright_str_len); 52 | 53 | else if (str_issame_(STRa(adjright_str), "0", 1)) 54 | seg_by_ctx (VB, "0", 1, ctx, 1); // seg as snip 55 | 56 | else 57 | seg_integer_or_not (VB, ctx, STRa(adjright_str), adjright_str_len); 58 | } 59 | -------------------------------------------------------------------------------- /src/igzip/aarch64/crc_multibinary_arm.S: -------------------------------------------------------------------------------- 1 | ######################################################################## 2 | # Copyright(c) 2019 Arm Corporation All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions 6 | # are met: 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above copyright 10 | # notice, this list of conditions and the following disclaimer in 11 | # the documentation and/or other materials provided with the 12 | # distribution. 13 | # * Neither the name of Arm Corporation nor the names of its 14 | # contributors may be used to endorse or promote products derived 15 | # from this software without specific prior written permission. 16 | # 17 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | ######################################################################### 29 | #include 30 | 31 | 32 | ; mbin_interface crc32_iscsi 33 | ; mbin_interface crc16_t10dif 34 | ; mbin_interface crc16_t10dif_copy 35 | ; mbin_interface crc32_ieee 36 | mbin_interface crc32_gzip_refl 37 | ; mbin_interface crc64_ecma_refl 38 | ; mbin_interface crc64_ecma_norm 39 | ; mbin_interface crc64_iso_refl 40 | ; mbin_interface crc64_iso_norm 41 | ; mbin_interface crc64_jones_refl 42 | ; mbin_interface crc64_jones_norm 43 | -------------------------------------------------------------------------------- /src/random_access.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // random_access.h 3 | // Copyright (C) 2020-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include 12 | #include "genozip.h" 13 | #include "sections.h" 14 | 15 | #define RA_MISSING_RA_MIN (MAX_POS + 1) 16 | #define RA_MISSING_RA_MAX (-1) 17 | 18 | // ZIP 19 | extern void random_access_initialize(void); 20 | extern void random_access_update_chrom (VBlockP vb, WordIndex chrom_node_index, STRp(chrom_name)); 21 | extern void random_access_update_pos (VBlockP vb, Did did_i_pos); 22 | extern void random_access_increment_last_pos (VBlockP vb, PosType64 increment); 23 | extern void random_access_update_last_pos (VBlockP vb, PosType64 last_pos); 24 | extern void random_access_update_first_last_pos (VBlockP vb, WordIndex chrom_node_index, STRp (first_pos), STRp (last_pos)); 25 | extern void random_access_update_to_entire_chrom (VBlockP vb, PosType64 first_pos_of_chrom, PosType64 last_pos_of_chrom); 26 | extern void random_access_merge_in_vb (VBlockP vb); 27 | extern void random_access_finalize_entries (BufferP ra_buf); 28 | extern Codec random_access_compress (ConstBufferP ra_buf, SectionType sec_type, Codec codec, rom msg); 29 | extern void random_access_get_ra_info (VBIType vblock_i, WordIndex *chrom_index, PosType64 *min_pos, PosType64 *max_pos); 30 | 31 | // PIZ 32 | extern bool random_access_has_filter (void); 33 | extern bool random_access_is_vb_included (VBIType vb_i); 34 | extern uint32_t random_access_num_chroms_start_in_this_vb (VBIType vb_i); 35 | extern void random_access_alloc_ra_buf (VBlockP vb, WordIndex chrom_node_index); 36 | extern void random_access_load_ra_section (SectionType section_type, Did chrom_did_i, BufferP ra_buf, rom buf_name, rom show_index_msg); 37 | 38 | // PIZ - FASTA 39 | extern bool random_access_does_last_chrom_continue_in_next_vb (VBIType vb_i); 40 | 41 | #define RA_MSG_PRIM "Random-access index contents (result of --show-index)" 42 | #define RA_MSG_REF "Reference random-access index contents (result of --show-ref-index)" 43 | -------------------------------------------------------------------------------- /src/htscodecs/LICENSE.htscodecs.md: -------------------------------------------------------------------------------- 1 | All files except those explicitly listed below are copyright Genome 2 | Research Limited and are made available under the BSD license. 3 | 4 | > Redistribution and use in source and binary forms, with or without 5 | > modification, are permitted provided that the following conditions 6 | > are met: 7 | > 8 | > (1) Redistributions of source code must retain the above copyright 9 | > notice, this list of conditions and the following disclaimer. 10 | > 11 | > (2) Redistributions in binary form must reproduce the above copyright 12 | > notice, this list of conditions and the following disclaimer in 13 | > the documentation and/or other materials provided with the distribution. 14 | > 15 | > (3)The name of the author may not be used to endorse or promote 16 | > products derived from this software without specific prior written 17 | > permission. 18 | > 19 | > THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 | > IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 | > WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | > DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, 23 | > INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 | > (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | > SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 | > HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 27 | > STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 28 | > IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 | > POSSIBILITY OF SUCH DAMAGE. 30 | 31 | c_range_coder.h is Public Domain, derived from work by Eugene 32 | Shelwien. 33 | 34 | rANS_byte.h and rANS_word.h are derived from Fabien Giesen's work and 35 | is Public Domain. https://github.com/rygorous/ryg_rans This work was 36 | in turn based on the ANS family of entropy encoders as described by 37 | Jarek Duda's paper: http://arxiv.org/abs/1311.2540 38 | 39 | > To the extent possible under law, Fabian Giesen has waived all 40 | > copyright and related or neighboring rights to ryg_rans, as 41 | > per the terms of the CC0 license: 42 | > 43 | > https://creativecommons.org/publicdomain/zero/1.0 44 | > 45 | > This work is published from the United States. 46 | -------------------------------------------------------------------------------- /src/igzip/igzip_wrapper.h: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | Copyright(c) 2011-2018 Intel Corporation All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in 11 | the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Intel Corporation nor the names of its 14 | contributors may be used to endorse or promote products derived 15 | from this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | **********************************************************************/ 29 | 30 | #ifndef IGZIP_WRAPPER_H 31 | 32 | #define DEFLATE_METHOD 8 33 | #define ZLIB_DICT_FLAG (1 << 5) 34 | #define TEXT_FLAG (1 << 0) 35 | #define HCRC_FLAG (1 << 1) 36 | #define EXTRA_FLAG (1 << 2) 37 | #define NAME_FLAG (1 << 3) 38 | #define COMMENT_FLAG (1 << 4) 39 | #define UNDEFINED_FLAG (-1) 40 | 41 | #define GZIP_HDR_BASE 10 42 | #define GZIP_EXTRA_LEN 2 43 | #define GZIP_HCRC_LEN 2 44 | #define GZIP_TRAILER_LEN 8 45 | 46 | #define ZLIB_HDR_BASE 2 47 | #define ZLIB_DICT_LEN 4 48 | #define ZLIB_INFO_OFFSET 4 49 | #define ZLIB_LEVEL_OFFSET 6 50 | #define ZLIB_TRAILER_LEN 4 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /src/igzip/x86_64/igzip_inflate_multibinary.asm: -------------------------------------------------------------------------------- 1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2 | ; Copyright(c) 2011-2016 Intel Corporation All rights reserved. 3 | ; 4 | ; Redistribution and use in source and binary forms, with or without 5 | ; modification, are permitted provided that the following conditions 6 | ; are met: 7 | ; * Redistributions of source code must retain the above copyright 8 | ; notice, this list of conditions and the following disclaimer. 9 | ; * Redistributions in binary form must reproduce the above copyright 10 | ; notice, this list of conditions and the following disclaimer in 11 | ; the documentation and/or other materials provided with the 12 | ; distribution. 13 | ; * Neither the name of Intel Corporation nor the names of its 14 | ; contributors may be used to endorse or promote products derived 15 | ; from this software without specific prior written permission. 16 | ; 17 | ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 29 | 30 | default rel 31 | [bits 64] 32 | 33 | %include "reg_sizes.asm" 34 | 35 | extern decode_huffman_code_block_stateless_base 36 | extern decode_huffman_code_block_stateless_01 37 | extern decode_huffman_code_block_stateless_04 38 | 39 | section .text 40 | 41 | %include "multibinary.asm" 42 | 43 | 44 | mbin_interface decode_huffman_code_block_stateless 45 | mbin_dispatch_init5 decode_huffman_code_block_stateless, decode_huffman_code_block_stateless_base, decode_huffman_code_block_stateless_01, decode_huffman_code_block_stateless_01, decode_huffman_code_block_stateless_04 46 | -------------------------------------------------------------------------------- /src/buf_list.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // buf_list.h 3 | // Copyright (C) 2019-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "genozip.h" 12 | 13 | typedef struct { 14 | BufferP buf; // address of Buffer structure (not the data!). Buffer is removed if removed bit is set with BL_SET_REMOVED 15 | rom func; // function in which this Buffer was first allocated (at which time it was added to the buffer_list) 16 | rom name; // buffer name 17 | uint32_t code_line; 18 | } BufListEnt; 19 | 20 | // buflist modifications 21 | extern void buflist_add_buf (VBlockP vb, BufferP buf, FUNCLINE); 22 | extern void buflist_remove_buf (BufferP buf, FUNCLINE); 23 | extern void buflist_move_buf (VBlockP vb, BufferP new_buf, rom new_name, ConstBufferP old_buf, FUNCLINE); 24 | extern void buflist_update_vb_addr_change (VBlockP new_vb, ConstVBlockP old_vb); 25 | extern void buflist_compact (VBlockP vb); 26 | extern void buflist_sort (VBlockP vb, bool already_locked); // ahead of destroying 27 | 28 | // iterators 29 | extern BufListEnt *buflist_find_buf (VBlockP vb, ConstBufferP buf, FailType soft_fail); 30 | extern void buflist_display (VBlockP vb); 31 | extern void buflist_who_is_locked (void); 32 | 33 | // freers & destroyers 34 | extern void buflist_free_vb (VBlockP vb); 35 | extern void buflist_free_ctx (VBlockP vb, ContextP ctx); 36 | extern void buflist_destroy_vb_bufs (VBlockP vb, bool only_if_unused); 37 | extern void buflist_destroy_private_and_context_vb_bufs (VBlockP vb); 38 | extern void buflist_destroy_file_bufs (FileP file); 39 | extern void buflist_destroy_bufs_by_name (rom name, bool compact_after_destroying); 40 | 41 | // verifiers 42 | extern bool buflist_test_overflows (VBlockP vb, rom msg); 43 | extern void buflist_test_overflows_all_other_vb (VBlockP caller_vb, rom msg, bool force, bool on_exit); 44 | extern void buflist_test_overflows_all_vbs (rom msg, bool on_exit); 45 | extern bool buflist_locate (ConstBufferP buf, rom prefix); 46 | 47 | // memory usage reporting 48 | extern uint64_t buflist_get_memory_usage (void); 49 | extern void buflist_show_memory (bool memory_full, unsigned max_threads, unsigned used_threads); 50 | -------------------------------------------------------------------------------- /src/vcf_local_alleles.c: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // vcf_local_alleles.c 3 | // Copyright (C) 2024-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #include "vcf_private.h" 10 | 11 | static void vcf_LAA_predict (VBlockVCFP vb, qSTRp (pred)) 12 | { 13 | STRlast(gt, FORMAT_GT); 14 | 15 | if (gt_len > *pred_len) { 16 | *pred_len = 0; 17 | return; 18 | } 19 | 20 | char *out = pred; 21 | 22 | char sep = memchr (gt, '/', gt_len) ? '/' : '|'; 23 | str_split (gt, gt_len, vb->ploidy, sep, ht, false); 24 | 25 | // 0/1 → 0 ; 0/0 → ∅ ; ./. → ∅ 26 | int first_ht = 0; 27 | while (first_ht < n_hts && ht_lens[first_ht]==1 && (hts[first_ht][0] == '0' || hts[first_ht][0] == '.')) 28 | first_ht++; 29 | 30 | // ∅ → . 31 | if (first_ht == n_hts) { 32 | pred[0] = '.'; 33 | *pred_len = 1; 34 | } 35 | 36 | // remove redundant alleles: 23/23 → 23 ; 2/500/2 → 2,500 37 | else { 38 | // copy alleles one by one to out, skipping 2nd+ occurance of the same allele 39 | for (int i=first_ht; i < n_hts; i++) { 40 | for (int j=first_ht; j < i; j++) 41 | if (str_issame_(STRi(ht,i), STRi(ht,j))) 42 | goto skip_i; // case: the allele on ht=i was already encountered in an earlier ht=j 43 | 44 | memcpy (out, hts[i], ht_lens[i]); 45 | out += ht_lens[i]; 46 | *out++ = ','; 47 | skip_i: {} 48 | } 49 | out--; // remove final , 50 | 51 | *pred_len = out - pred; 52 | } 53 | } 54 | 55 | void vcf_seg_FORMAT_LAA (VBlockVCFP vb, ContextP ctx, STRp(laa)) 56 | { 57 | STRlic(prediction, 100); 58 | vcf_LAA_predict (vb, qSTRa (prediction)); 59 | 60 | if (str_issame (laa, prediction)) 61 | seg_special0 (VB, VCF_SPECIAL_LAA, ctx, laa_len); 62 | else 63 | seg_by_ctx (VB, STRa(laa), ctx, laa_len); 64 | } 65 | 66 | SPECIAL_RECONSTRUCTOR (vcf_piz_special_LAA) 67 | { 68 | if (reconstruct) { 69 | STRlic(prediction, 100); 70 | vcf_LAA_predict (VB_VCF, qSTRa (prediction)); 71 | 72 | RECONSTRUCT_str (prediction); 73 | } 74 | 75 | return NO_NEW_VALUE; 76 | } 77 | 78 | -------------------------------------------------------------------------------- /src/igzip/x86_64/crc_multibinary.asm: -------------------------------------------------------------------------------- 1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2 | ; Copyright(c) 2011-2015 Intel Corporation All rights reserved. 3 | ; 4 | ; Redistribution and use in source and binary forms, with or without 5 | ; modification, are permitted provided that the following conditions 6 | ; are met: 7 | ; * Redistributions of source code must retain the above copyright 8 | ; notice, this list of conditions and the following disclaimer. 9 | ; * Redistributions in binary form must reproduce the above copyright 10 | ; notice, this list of conditions and the following disclaimer in 11 | ; the documentation and/or other materials provided with the 12 | ; distribution. 13 | ; * Neither the name of Intel Corporation nor the names of its 14 | ; contributors may be used to endorse or promote products derived 15 | ; from this software without specific prior written permission. 16 | ; 17 | ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 29 | 30 | default rel 31 | [bits 64] 32 | 33 | %include "reg_sizes.asm" 34 | 35 | extern crc32_gzip_refl_by8 36 | extern crc32_gzip_refl_by8_02 37 | extern crc32_gzip_refl_base 38 | 39 | %if (AS_FEATURE_LEVEL) >= 10 40 | extern crc32_gzip_refl_by16_10 41 | %endif 42 | 43 | %include "multibinary.asm" 44 | 45 | section .data 46 | ;;; *_mbinit are initial values for *_dispatched; is updated on first call. 47 | ;;; Therefore, *_dispatch_init is only executed on first call. 48 | 49 | section .text 50 | 51 | mbin_interface crc32_gzip_refl 52 | mbin_dispatch_init_clmul crc32_gzip_refl, crc32_gzip_refl_base, crc32_gzip_refl_by8, crc32_gzip_refl_by8_02, crc32_gzip_refl_by16_10 53 | -------------------------------------------------------------------------------- /src/igzip/adler32_base.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | Copyright(c) 2011-2017 Intel Corporation All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in 11 | the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Intel Corporation nor the names of its 14 | contributors may be used to endorse or promote products derived 15 | from this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | **********************************************************************/ 29 | 30 | #include 31 | #include "igzip_checksums.h" 32 | 33 | uint32_t adler32_base(uint32_t adler32, uint8_t * start, uint32_t length) 34 | { 35 | uint8_t *end, *next = start; 36 | uint64_t A, B; 37 | 38 | A = adler32 & 0xffff; 39 | B = adler32 >> 16; 40 | 41 | while (length > MAX_ADLER_BUF) { 42 | end = next + MAX_ADLER_BUF; 43 | for (; next < end; next++) { 44 | A += *next; 45 | B += A; 46 | } 47 | 48 | A = A % ADLER_MOD; 49 | B = B % ADLER_MOD; 50 | length -= MAX_ADLER_BUF; 51 | } 52 | 53 | end = next + length; 54 | for (; next < end; next++) { 55 | A += *next; 56 | B += A; 57 | } 58 | 59 | A = A % ADLER_MOD; 60 | B = B % ADLER_MOD; 61 | 62 | return B << 16 | A; 63 | } 64 | -------------------------------------------------------------------------------- /src/libdeflate_1.7/deflate_constants.h: -------------------------------------------------------------------------------- 1 | /* 2 | * deflate_constants.h - constants for the DEFLATE compression format 3 | */ 4 | 5 | #ifndef LIB_DEFLATE_CONSTANTS_H 6 | #define LIB_DEFLATE_CONSTANTS_H 7 | 8 | /* Valid block types */ 9 | #define DEFLATE_BLOCKTYPE_UNCOMPRESSED 0 10 | #define DEFLATE_BLOCKTYPE_STATIC_HUFFMAN 1 11 | #define DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN 2 12 | 13 | /* Minimum and maximum supported match lengths (in bytes) */ 14 | #define DEFLATE_MIN_MATCH_LEN 3 15 | #define DEFLATE_MAX_MATCH_LEN 258 16 | 17 | /* Minimum and maximum supported match offsets (in bytes) */ 18 | #define DEFLATE_MIN_MATCH_OFFSET 1 19 | #define DEFLATE_MAX_MATCH_OFFSET 32768 20 | 21 | #define DEFLATE_MAX_WINDOW_SIZE 32768 22 | 23 | /* Number of symbols in each Huffman code. Note: for the literal/length 24 | * and offset codes, these are actually the maximum values; a given block 25 | * might use fewer symbols. */ 26 | #define DEFLATE_NUM_PRECODE_SYMS 19 27 | #define DEFLATE_NUM_LITLEN_SYMS 288 28 | #define DEFLATE_NUM_OFFSET_SYMS 32 29 | 30 | /* The maximum number of symbols across all codes */ 31 | #define DEFLATE_MAX_NUM_SYMS 288 32 | 33 | /* Division of symbols in the literal/length code */ 34 | #define DEFLATE_NUM_LITERALS 256 35 | #define DEFLATE_END_OF_BLOCK 256 36 | #define DEFLATE_NUM_LEN_SYMS 31 37 | 38 | /* Maximum codeword length, in bits, within each Huffman code */ 39 | #define DEFLATE_MAX_PRE_CODEWORD_LEN 7 40 | #define DEFLATE_MAX_LITLEN_CODEWORD_LEN 15 41 | #define DEFLATE_MAX_OFFSET_CODEWORD_LEN 15 42 | 43 | /* The maximum codeword length across all codes */ 44 | #define DEFLATE_MAX_CODEWORD_LEN 15 45 | 46 | /* Maximum possible overrun when decoding codeword lengths */ 47 | #define DEFLATE_MAX_LENS_OVERRUN 137 48 | 49 | /* 50 | * Maximum number of extra bits that may be required to represent a match 51 | * length or offset. 52 | * 53 | * TODO: are we going to have full DEFLATE64 support? If so, up to 16 54 | * length bits must be supported. 55 | */ 56 | #define DEFLATE_MAX_EXTRA_LENGTH_BITS 5 57 | #define DEFLATE_MAX_EXTRA_OFFSET_BITS 14 58 | 59 | /* The maximum number of bits in which a match can be represented. This 60 | * is the absolute worst case, which assumes the longest possible Huffman 61 | * codewords and the maximum numbers of extra bits. */ 62 | #define DEFLATE_MAX_MATCH_BITS \ 63 | (DEFLATE_MAX_LITLEN_CODEWORD_LEN + DEFLATE_MAX_EXTRA_LENGTH_BITS + \ 64 | DEFLATE_MAX_OFFSET_CODEWORD_LEN + DEFLATE_MAX_EXTRA_OFFSET_BITS) 65 | 66 | #endif /* LIB_DEFLATE_CONSTANTS_H */ 67 | -------------------------------------------------------------------------------- /src/igzip/aarch64/stdmac_aarch64.h: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | Copyright(c) 2019 Arm Corporation All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in 11 | the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Arm Corporation nor the names of its 14 | contributors may be used to endorse or promote products derived 15 | from this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | **********************************************************************/ 29 | 30 | #ifndef __STDMAC_AARCH64_H__ 31 | #define __STDMAC_AARCH64_H__ 32 | 33 | #ifdef __ASSEMBLY__ 34 | 35 | #define DEBUG_STACK 144 36 | 37 | .macro push_stack 38 | stp x29, x30,[sp,0-DEBUG_STACK]! 39 | mov x29, sp 40 | stp x19, x20, [sp, 16] 41 | stp x21, x22, [sp, 32] 42 | stp x23, x24, [sp, 48] 43 | stp x25, x26, [sp, 64] 44 | stp x27, x28, [sp, 80] 45 | .endm 46 | .macro pop_stack 47 | ldp x19, x20, [sp, 16] 48 | ldp x21, x22, [sp, 32] 49 | ldp x23, x24, [sp, 48] 50 | ldp x25, x26, [sp, 64] 51 | ldp x27, x28, [sp, 80] 52 | 53 | ldp x29, x30, [sp], DEBUG_STACK 54 | .endm 55 | 56 | #endif 57 | #endif 58 | -------------------------------------------------------------------------------- /src/lzma/LzmaEnc.h: -------------------------------------------------------------------------------- 1 | /* LzmaEnc.h -- LZMA Encoder 2 | 2017-07-27 : Igor Pavlov : Public domain */ 3 | 4 | #pragma once 5 | 6 | #include "7zTypes.h" 7 | 8 | #define LZMA_PROPS_SIZE 5 9 | 10 | typedef struct _CLzmaEncProps 11 | { 12 | int level; /* 0 <= level <= 9 */ 13 | UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version 14 | (1 << 12) <= dictSize <= (3 << 29) for 64-bit version 15 | default = (1 << 24) */ 16 | int lc; /* 0 <= lc <= 8, default = 3 */ 17 | int lp; /* 0 <= lp <= 4, default = 0 */ 18 | int pb; /* 0 <= pb <= 4, default = 2 */ 19 | int algo; /* 0 - fast, 1 - normal, default = 1 */ 20 | int fb; /* 5 <= fb <= 273, default = 32 */ 21 | int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */ 22 | int numHashBytes; /* 2, 3 or 4, default = 4 */ 23 | UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */ 24 | unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */ 25 | int numThreads; /* 1 or 2, default = 2 */ 26 | 27 | UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1. 28 | Encoder uses this value to reduce dictionary size */ 29 | } CLzmaEncProps; 30 | 31 | extern void LzmaEncProps_Init(CLzmaEncProps *p); 32 | 33 | /* ---------- CLzmaEncHandle Interface ---------- */ 34 | 35 | /* LzmaEnc* functions can return the following exit codes: 36 | SRes: 37 | SZ_OK - OK 38 | SZ_ERROR_MEM - Memory allocation error 39 | SZ_ERROR_PARAM - Incorrect paramater in props 40 | SZ_ERROR_WRITE - ISeqOutStream write callback error 41 | SZ_ERROR_OUTPUT_EOF - output buffer overflow - version with (Byte *) output 42 | SZ_ERROR_PROGRESS - some break from progress callback 43 | SZ_ERROR_THREAD - error in multithreading functions (only for Mt version) 44 | */ 45 | 46 | typedef void * CLzmaEncHandle; 47 | 48 | extern size_t LzmaEnc_LzmaHandleSize (void); // divon 49 | 50 | extern void LzmaEnc_Create(CLzmaEncHandle p, VBlockP vb, ContextP ctx); 51 | extern void LzmaEnc_Destroy(CLzmaEncHandle p); 52 | 53 | extern SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props); 54 | extern SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size); 55 | 56 | extern SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream); 57 | extern SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, int writeEndMark); 58 | 59 | -------------------------------------------------------------------------------- /src/igzip/aarch64/igzip_multibinary_arm64.S: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | Copyright(c) 2019 Arm Corporation All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in 11 | the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Arm Corporation nor the names of its 14 | contributors may be used to endorse or promote products derived 15 | from this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | **********************************************************************/ 29 | 30 | #include "aarch64_multibinary.h" 31 | 32 | 33 | mbin_interface isal_deflate_icf_body_lvl1 34 | mbin_interface isal_deflate_icf_body_lvl2 35 | mbin_interface isal_deflate_icf_body_lvl3 36 | mbin_interface isal_deflate_icf_finish_lvl1 37 | mbin_interface isal_deflate_icf_finish_lvl2 38 | mbin_interface isal_deflate_icf_finish_lvl3 39 | mbin_interface isal_update_histogram 40 | mbin_interface encode_deflate_icf 41 | mbin_interface set_long_icf_fg 42 | mbin_interface gen_icf_map_lh1 43 | mbin_interface isal_deflate_hash_lvl0 44 | mbin_interface isal_deflate_hash_lvl1 45 | mbin_interface isal_deflate_hash_lvl2 46 | mbin_interface isal_deflate_hash_lvl3 47 | 48 | mbin_interface isal_deflate_body 49 | mbin_interface isal_deflate_finish 50 | mbin_interface isal_adler32 51 | -------------------------------------------------------------------------------- /src/igzip/aarch64/bitbuf2_aarch64.h: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | Copyright(c) 2019 Arm Corporation All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in 11 | the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Arm Corporation nor the names of its 14 | contributors may be used to endorse or promote products derived 15 | from this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | **********************************************************************/ 29 | 30 | #ifndef __BITBUF2_AARCH64_H__ 31 | #define __BITBUF2_AARCH64_H__ 32 | #include "options_aarch64.h" 33 | 34 | #ifdef __ASSEMBLY__ 35 | .macro update_bits stream:req,code:req,code_len:req,m_bits:req,m_bit_count:req \ 36 | m_out_buf:req 37 | 38 | lsl x_\code,x_\code,x_\m_bit_count 39 | orr x_\m_bits,x_\code,x_\m_bits 40 | add x_\m_bit_count,x_\code_len,x_\m_bit_count 41 | 42 | str x_\m_bits,[x_\m_out_buf] 43 | 44 | and w_\code,w_\m_bit_count,-8 45 | lsr w_\code_len,w_\m_bit_count,3 46 | add x_\m_out_buf,x_\m_out_buf,w_\code_len,uxtw 47 | sub w_\m_bit_count,w_\m_bit_count,w_\code 48 | lsr x_\m_bits,x_\m_bits,x_\code 49 | 50 | str x_\m_bits,[stream,_internal_state_bitbuf_m_bits] 51 | str w_\m_bit_count,[stream,_internal_state_bitbuf_m_bit_count] 52 | str x_\m_out_buf,[stream,_internal_state_bitbuf_m_out_buf] 53 | 54 | 55 | .endm 56 | #endif 57 | #endif 58 | -------------------------------------------------------------------------------- /src/igzip/x86_64/bitbuf2.asm: -------------------------------------------------------------------------------- 1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2 | ; Copyright(c) 2011-2016 Intel Corporation All rights reserved. 3 | ; 4 | ; Redistribution and use in source and binary forms, with or without 5 | ; modification, are permitted provided that the following conditions 6 | ; are met: 7 | ; * Redistributions of source code must retain the above copyright 8 | ; notice, this list of conditions and the following disclaimer. 9 | ; * Redistributions in binary form must reproduce the above copyright 10 | ; notice, this list of conditions and the following disclaimer in 11 | ; the documentation and/or other materials provided with the 12 | ; distribution. 13 | ; * Neither the name of Intel Corporation nor the names of its 14 | ; contributors may be used to endorse or promote products derived 15 | ; from this software without specific prior written permission. 16 | ; 17 | ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 29 | 30 | %include "options.asm" 31 | %include "stdmac.asm" 32 | 33 | ; Assumes m_out_buf is a register 34 | ; Clobbers RCX 35 | ; code is clobbered 36 | ; write_bits_always m_bits, m_bit_count, code, count, m_out_buf 37 | %macro write_bits 5 38 | %define %%m_bits %1 39 | %define %%m_bit_count %2 40 | %define %%code %3 41 | %define %%count %4 42 | %define %%m_out_buf %5 43 | 44 | SHLX %%code, %%code, %%m_bit_count 45 | 46 | or %%m_bits, %%code 47 | add %%m_bit_count, %%count 48 | 49 | mov [%%m_out_buf], %%m_bits 50 | mov rcx, %%m_bit_count 51 | shr rcx, 3 ; rcx = bytes 52 | add %%m_out_buf, rcx 53 | shl rcx, 3 ; rcx = bits 54 | and %%m_bit_count, 0x7 55 | 56 | SHRX %%m_bits, %%m_bits, rcx 57 | %endm 58 | 59 | %macro write_dword 2 60 | %define %%data %1d 61 | %define %%addr %2 62 | mov [%%addr], %%data 63 | add %%addr, 4 64 | %endm 65 | -------------------------------------------------------------------------------- /src/htscodecs/rANS_static4x16.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017-2019 Genome Research Ltd. 3 | * Author(s): James Bonfield 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above 12 | * copyright notice, this list of conditions and the following 13 | * disclaimer in the documentation and/or other materials provided 14 | * with the distribution. 15 | * 16 | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 17 | * Institute nor the names of its contributors may be used to endorse 18 | * or promote products derived from this software without specific 19 | * prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 22 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 25 | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #pragma once 35 | 36 | #include "../genozip.h" 37 | 38 | #ifndef X_PACK 39 | #define X_PACK 0x80 // Pack 2,4,8 or infinite symbols into a byte. 40 | #define X_RLE 0x40 // Run length encoding with runs & lits encoded separately 41 | #define X_CAT 0x20 // Nop; for tiny segments where rANS overhead is too big 42 | #define X_NOSZ 0x10 // Don't store the original size; used by STRIPE mode 43 | #define X_STRIPE 0x08 // For N-byte integer data; rotate & encode N streams. 44 | #endif 45 | 46 | unsigned int rans_compress_bound_4x16(unsigned int size, int order); 47 | unsigned char *rans_compress_to_4x16(VBlockP vb, unsigned char *in, unsigned int in_size, 48 | unsigned char *out, unsigned int *out_size, 49 | int order); 50 | unsigned char *rans_uncompress_to_4x16(VBlockP vb, unsigned char *in, unsigned int in_size, 51 | unsigned char *out, unsigned int *out_size); 52 | -------------------------------------------------------------------------------- /src/lzma/LzFind.h: -------------------------------------------------------------------------------- 1 | /* LzFind.h -- Match finder for LZ algorithms 2 | 2017-06-10 : Igor Pavlov : Public domain */ 3 | 4 | #pragma once 5 | 6 | #include "7zTypes.h" 7 | 8 | typedef UInt32 CLzRef; 9 | 10 | typedef struct _CMatchFinder 11 | { 12 | Byte *buffer; 13 | UInt32 pos; 14 | UInt32 posLimit; 15 | UInt32 streamPos; 16 | UInt32 lenLimit; 17 | 18 | UInt32 cyclicBufferPos; 19 | UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */ 20 | 21 | Byte streamEndWasReached; 22 | Byte btMode; 23 | Byte bigHash; 24 | Byte directInput; 25 | 26 | UInt32 matchMaxLen; 27 | CLzRef *hash; 28 | CLzRef *son; 29 | UInt32 hashMask; 30 | UInt32 cutValue; 31 | 32 | Byte *bufferBase; 33 | ISeqInStream *stream; 34 | 35 | UInt32 blockSize; 36 | UInt32 keepSizeBefore; 37 | UInt32 keepSizeAfter; 38 | 39 | UInt32 numHashBytes; 40 | size_t directInputRem; 41 | UInt32 historySize; 42 | UInt32 fixedHashSize; 43 | UInt32 hashSizeSum; 44 | SRes result; 45 | UInt32 crc[256]; 46 | size_t numRefs; 47 | 48 | UInt64 expectedDataSize; 49 | } CMatchFinder; 50 | 51 | #define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer) 52 | 53 | #define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos) 54 | 55 | #define Inline_MatchFinder_IsFinishedOK(p) \ 56 | ((p)->streamEndWasReached \ 57 | && (p)->streamPos == (p)->pos \ 58 | && (!(p)->directInput || (p)->directInputRem == 0)) 59 | 60 | void MatchFinder_Construct(CMatchFinder *p); 61 | 62 | /* Conditions: 63 | historySize <= 3 GB 64 | keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB 65 | */ 66 | int MatchFinder_Create(VBlockP vb, CMatchFinder *p, UInt32 historySize, 67 | UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter); 68 | void MatchFinder_Free(VBlockP vb, CMatchFinder *p); 69 | 70 | /* 71 | Conditions: 72 | Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func. 73 | Mf_GetPointerToCurrentPos_Func's result must be used only before any other function 74 | */ 75 | 76 | typedef void (*Mf_Init_Func)(void *object); 77 | typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object); 78 | typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object); 79 | typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances); 80 | typedef void (*Mf_Skip_Func)(void *object, UInt32); 81 | 82 | typedef struct _IMatchFinder 83 | { 84 | Mf_Init_Func Init; 85 | Mf_GetNumAvailableBytes_Func GetNumAvailableBytes; 86 | Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos; 87 | Mf_GetMatches_Func GetMatches; 88 | Mf_Skip_Func Skip; 89 | } IMatchFinder; 90 | 91 | void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable); 92 | 93 | -------------------------------------------------------------------------------- /src/igzip/aarch64/options_aarch64.h: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | Copyright(c) 2019 Arm Corporation All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in 11 | the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Arm Corporation nor the names of its 14 | contributors may be used to endorse or promote products derived 15 | from this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | **********************************************************************/ 29 | 30 | #ifndef __OPTIONS_AARCH64_H__ 31 | #define __OPTIONS_AARCH64_H__ 32 | 33 | 34 | #ifdef __ASSEMBLY__ 35 | 36 | /// Options:dir 37 | /// m - reschedule mem reads 38 | /// e b - bitbuff style 39 | /// t s x - compare style 40 | /// h - limit hash updates 41 | /// l - use longer huffman table 42 | /// f - fix cache read 43 | 44 | #ifndef IGZIP_HIST_SIZE 45 | #define IGZIP_HIST_SIZE (32 * 1024) 46 | #endif 47 | 48 | #if (IGZIP_HIST_SIZE > (32 * 1024)) 49 | #undef IGZIP_HIST_SIZE 50 | #define IGZIP_HIST_SIZE (32 * 1024) 51 | #endif 52 | 53 | #ifdef LONGER_HUFFTABLE 54 | #if (IGZIP_HIST_SIZE > 8 * 1024) 55 | #undef IGZIP_HIST_SIZE 56 | #define IGZIP_HIST_SIZE (8 * 1024) 57 | #endif 58 | #endif 59 | 60 | /// (h) limit hash update 61 | #define LIMIT_HASH_UPDATE 62 | 63 | /// (f) fix cache read problem 64 | #define FIX_CACHE_READ 65 | 66 | #define ISAL_DEF_MAX_HDR_SIZE 328 67 | 68 | 69 | 70 | #endif 71 | #endif 72 | -------------------------------------------------------------------------------- /src/igzip/aarch64/crc_aarch64_dispatcher.c: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | Copyright(c) 2019-2020 Arm Corporation All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in 11 | the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Arm Corporation nor the names of its 14 | contributors may be used to endorse or promote products derived 15 | from this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | **********************************************************************/ 29 | #include "aarch64_multibinary.h" 30 | 31 | DEFINE_INTERFACE_DISPATCHER(crc32_gzip_refl) 32 | { 33 | #if defined(__linux__) 34 | unsigned long auxval = getauxval(AT_HWCAP); 35 | 36 | if (auxval & HWCAP_CRC32) { 37 | switch (get_micro_arch_id()) { 38 | case MICRO_ARCH_ID(ARM, NEOVERSE_N1): 39 | case MICRO_ARCH_ID(ARM, CORTEX_A57): 40 | case MICRO_ARCH_ID(ARM, CORTEX_A72): 41 | return PROVIDER_INFO(crc32_gzip_refl_crc_ext); 42 | } 43 | } 44 | if ((HWCAP_CRC32 | HWCAP_PMULL) == (auxval & (HWCAP_CRC32 | HWCAP_PMULL))) { 45 | return PROVIDER_INFO(crc32_gzip_refl_3crc_fold); 46 | } 47 | 48 | if (auxval & HWCAP_PMULL) 49 | return PROVIDER_INFO(crc32_gzip_refl_pmull); 50 | #elif defined(__APPLE__) 51 | if (sysctlEnabled(SYSCTL_CRC32_KEY)) 52 | return PROVIDER_INFO(crc32_gzip_refl_3crc_fold); 53 | if (sysctlEnabled(SYSCTL_PMULL_KEY)) 54 | return PROVIDER_INFO(crc32_gzip_refl_pmull); 55 | #endif 56 | return PROVIDER_BASIC(crc32_gzip_refl); 57 | 58 | } 59 | -------------------------------------------------------------------------------- /src/igzip/aarch64/crc32_gzip_refl_crc_ext.S: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | Copyright(c) 2020 Arm Corporation All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in 11 | the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Arm Corporation nor the names of its 14 | contributors may be used to endorse or promote products derived 15 | from this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | **********************************************************************/ 29 | 30 | .text 31 | .align 6 32 | .arch armv8-a+crc 33 | 34 | 35 | #include "crc32_aarch64_common.h" 36 | 37 | BUF .req x1 38 | LEN .req x2 39 | wCRC .req w0 40 | data0 .req x4 41 | data1 .req x5 42 | data2 .req x6 43 | data3 .req x7 44 | wdata .req w3 45 | .macro crc32_u64 dst,src,data 46 | crc32x \dst,\src,\data 47 | .endm 48 | .macro crc32_u32 dst,src,data 49 | crc32w \dst,\src,\data 50 | .endm 51 | .macro crc32_u16 dst,src,data 52 | crc32h \dst,\src,\data 53 | .endm 54 | .macro crc32_u8 dst,src,data 55 | crc32b \dst,\src,\data 56 | .endm 57 | 58 | /** 59 | * uint32_t crc32_gzip_refl_crc_ext(const unsigned char *BUF, 60 | * uint64_t LEN,uint32_t wCRC); 61 | */ 62 | .global cdecl(crc32_gzip_refl_crc_ext) 63 | #ifndef __APPLE__ 64 | .type crc32_gzip_refl_crc_ext, %function 65 | #endif 66 | cdecl(crc32_gzip_refl_crc_ext): 67 | crc32_hw_common crc32 68 | #ifndef __APPLE__ 69 | .size crc32_gzip_refl_crc_ext, .-crc32_gzip_refl_crc_ext 70 | #endif 71 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. Hover to view descriptions of existing attributes. 3 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 4 | "version": "0.2.0", 5 | "configurations": [ 6 | { 7 | "name": "Run Debug", 8 | "type": "cppdbg", 9 | "request": "launch", 10 | "program": "${workspaceFolder}/src/genozip-debug.exe", 11 | // "program": "${workspaceFolder}/src/genounzip-debug.exe", 12 | // "program" : "${workspaceFolder}/src/genocat-debug.exe", 13 | // "program": "${workspaceFolder}/src/genozip-opt.exe", 14 | // "program": "${workspaceFolder}/src/genocat-opt.exe", 15 | // "program": "${workspaceFolder}/src/genozip.exe", 16 | // "program": "${workspaceFolder}/src/genounzip.exe", 17 | // "program": "${workspaceFolder}/src/genols-debug.exe", 18 | 19 | "args" : ["--echo", "-fX", "--truncate", "./private/test/gz.bgzf.truncated.fq.gz"], 20 | 21 | "environment": [ 22 | { "name": "GENOZIP_REFERENCE", "value": "c:\\Users\\divon\\genozip/public", }, 23 | ], 24 | 25 | "stopAtEntry": false, 26 | "cwd": "${workspaceFolder}", 27 | "externalConsole": false, 28 | "MIMode": "gdb", 29 | "miDebuggerPath": "c:\\msys64\\mingw64\\bin\\gdb.exe", 30 | "setupCommands": [ 31 | { 32 | 33 | "description": "Enable pretty-printing for gdb", 34 | "text": "-enable-pretty-printing", 35 | "ignoreFailures": true 36 | }, 37 | { 38 | "text": "source ${workspaceFolder}/.vscode/.gdbinit" 39 | } 40 | ], 41 | }, 42 | 43 | // { 44 | // "name": "(gdb) Attach", 45 | // "type": "cppdbg", 46 | // "request": "attach", 47 | // "program": "enter program name, for example ${workspaceFolder}/a.exe", 48 | // "MIMode": "gdb", 49 | // "miDebuggerPath": "/path/to/gdb", 50 | // "setupCommands": [ 51 | // { 52 | // "description": "Enable pretty-printing for gdb", 53 | // "text": "-enable-pretty-printing", 54 | // "ignoreFailures": true 55 | // }, 56 | // { 57 | // "description": "Set Disassembly Flavor to Intel", 58 | // "text": "-gdb-set disassembly-flavor intel", 59 | // "ignoreFailures": true 60 | // } 61 | // ] 62 | // }, 63 | 64 | ] 65 | } 66 | -------------------------------------------------------------------------------- /src/sam_gem3.c: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // sam_gem3.c 3 | // Copyright (C) 2022-2025 Genozip Limited. Patent pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited, 7 | // under penalties specified in the license. 8 | 9 | // Compresses auxilliary fields generated by gem3 mapper 10 | 11 | #include "sam_private.h" 12 | #include "lookback.h" 13 | 14 | // ---------------------------------------------------------------------------------------------- 15 | // XB:A gem3: the version of the reference to which the read is mapped (either CT or GA) 16 | // Two possible values: 'C' or 'G' http://dcc.blueprint-epigenome.eu/#/md/bs_seq_grch38 17 | // ---------------------------------------------------------------------------------------------- 18 | 19 | // 2 possible values: G C 20 | void sam_seg_gem3_XB_A (VBlockSAMP vb, ZipDataLineSAMP dl, STRp(xb), unsigned add_bytes) 21 | { 22 | decl_ctx (OPTION_XB_A); 23 | 24 | ASSSEG (xb_len==1 && (xb[0]=='C' || xb[0]=='G'), "Invalid XB:A=%.*s, expecting C or G", STRf(xb)); 25 | 26 | if (vb->bisulfite_strand) 27 | seg_special0 (VB, SAM_SPECIAL_GEM3_XB, ctx, add_bytes); 28 | 29 | else 30 | seg_by_ctx (VB, xb, 1, ctx, add_bytes); 31 | } 32 | 33 | SPECIAL_RECONSTRUCTOR (sam_piz_special_GEM3_XB) 34 | { 35 | ASSPIZ0 (VB_SAM->bisulfite_strand, "XB:A cannot be reconstructed because bisulfite_strand is not known - likely SEQ was not reconstructed"); 36 | 37 | if (reconstruct) 38 | RECONSTRUCT1 (VB_SAM->bisulfite_strand); 39 | 40 | return NO_NEW_VALUE; 41 | } 42 | 43 | // ------------------------------------------------------------------------ 44 | // XA:Z "Alternative alignments" (a BWA, gem3 feature) 45 | // In gem3, when used with --bisulfite XA includes the XB of each alignment 46 | // Example: XA:Z:chr1,+G10006,103M1I36M1D11M,9; 47 | // ^ 48 | // ------------------------------------------------------------------------ 49 | 50 | // split the pos strand-XB-pos string (generated with --bisulfite), eg "-C10000" to strand "-" XB "C" and pos "10000" 51 | bool sam_seg_gem3_XA_strand_cb (VBlockP vb, ContextP ctx, STRp(field), uint32_t rep) 52 | { 53 | char strand = field[0]; 54 | char xb = field[1]; 55 | 56 | if (field_len != 2 || (strand != '+' && strand != '-') || (xb != 'C' && xb != 'G')) 57 | return false; // invalid XA format 58 | 59 | // seg normally for now, we might change it in sam_seg_BWA_XA_pos 60 | WordIndex strand_index = (2 + ((strand=='+') | (xb=='G') << 1)); 61 | seg_known_node_index (vb, CTX (OPTION_XA_STRAND), strand_index, 2); 62 | 63 | return true; // segged successfully 64 | } 65 | -------------------------------------------------------------------------------- /src/libdeflate_1.19/arm/matchfinder_impl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * arm/matchfinder_impl.h - ARM implementations of matchfinder functions 3 | * 4 | * Copyright 2016 Eric Biggers 5 | * 6 | * Permission is hereby granted, free of charge, to any person 7 | * obtaining a copy of this software and associated documentation 8 | * files (the "Software"), to deal in the Software without 9 | * restriction, including without limitation the rights to use, 10 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the 12 | * Software is furnished to do so, subject to the following 13 | * conditions: 14 | * 15 | * The above copyright notice and this permission notice shall be 16 | * included in all copies or substantial portions of the Software. 17 | * 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 20 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 22 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 23 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 25 | * OTHER DEALINGS IN THE SOFTWARE. 26 | */ 27 | 28 | #ifndef LIB_ARM_MATCHFINDER_IMPL_H 29 | #define LIB_ARM_MATCHFINDER_IMPL_H 30 | 31 | #include "cpu_features.h" 32 | 33 | #if HAVE_NEON_NATIVE 34 | # include 35 | static forceinline void 36 | matchfinder_init_neon(mf_pos_t *data, size_t size) 37 | { 38 | int16x8_t *p = (int16x8_t *)data; 39 | int16x8_t v = vdupq_n_s16(MATCHFINDER_INITVAL); 40 | 41 | STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0); 42 | STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0); 43 | STATIC_ASSERT(sizeof(mf_pos_t) == 2); 44 | 45 | do { 46 | p[0] = v; 47 | p[1] = v; 48 | p[2] = v; 49 | p[3] = v; 50 | p += 4; 51 | size -= 4 * sizeof(*p); 52 | } while (size != 0); 53 | } 54 | #define matchfinder_init matchfinder_init_neon 55 | 56 | static forceinline void 57 | matchfinder_rebase_neon(mf_pos_t *data, size_t size) 58 | { 59 | int16x8_t *p = (int16x8_t *)data; 60 | int16x8_t v = vdupq_n_s16((u16)-MATCHFINDER_WINDOW_SIZE); 61 | 62 | STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0); 63 | STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0); 64 | STATIC_ASSERT(sizeof(mf_pos_t) == 2); 65 | 66 | do { 67 | p[0] = vqaddq_s16(p[0], v); 68 | p[1] = vqaddq_s16(p[1], v); 69 | p[2] = vqaddq_s16(p[2], v); 70 | p[3] = vqaddq_s16(p[3], v); 71 | p += 4; 72 | size -= 4 * sizeof(*p); 73 | } while (size != 0); 74 | } 75 | #define matchfinder_rebase matchfinder_rebase_neon 76 | 77 | #endif /* HAVE_NEON_NATIVE */ 78 | 79 | #endif /* LIB_ARM_MATCHFINDER_IMPL_H */ 80 | -------------------------------------------------------------------------------- /src/huffman.h: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // huffman.h 3 | // Copyright (C) 2023-2025 Genozip Limited. Patent Pending. 4 | // Please see terms and conditions in the file LICENSE.txt 5 | // 6 | // WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 7 | // and subject to penalties specified in the license. 8 | 9 | #pragma once 10 | 11 | #include "genozip.h" 12 | #include "context.h" 13 | 14 | // compression 15 | extern void huffman_start_chewing (Did did_i, STRp(master), uint8_t max_prefix_len, int n_channels); 16 | extern void huffman_chew_one_sample (Did did_i, STRp(sample), bool skip_if_same_as_prev); 17 | extern uint32_t huffman_get_num_chewed (Did did_i); 18 | typedef bool HuffmanMask[256]; 19 | extern void huffman_produce_compressor (Did did_i, const HuffmanMask *mask); 20 | extern void huffman_get_master (Did did_i, pSTRp(master)); 21 | extern uint32_t huffman_compress (VBlockP vb, Did did_i, STRp (uncomp), qSTR8p(comp)); 22 | extern uint32_t huffman_get_theoretical_max_comp_len (Did did_i, uint32_t uncomp_len); 23 | extern uint32_t huffman_compressed_len (Did did_i, STRp(uncomp)); 24 | 25 | extern int huffman_uncompress (Did did_i, bytes comp, STRc(uncomp)); 26 | extern int RECONSTRUCT_huffman (VBlockP vb, Did did_i, uint32_t uncomp_len, bytes comp); 27 | extern int huffman_uncompress_len (Did did_i, bytes comp, uint32_t uncomp_len); 28 | extern bool huffman_issame (Did did_i, bytes comp, uint32_t uncomp_len, STRp(uncomp_compare_to)); 29 | extern void huffman_compress_section (Did did_i); 30 | extern void huffman_piz_read_all (void); 31 | extern void huffman_piz_backcomp_produce_qual (STRp(qual)); 32 | 33 | typedef enum { HUFF_NOT_PRODUCED, HUFF_PRODUCED_BY_ZIP, HUFF_PRODUCED_BY_PIZ } HuffProducedStatus; // values of huffman.param 34 | #define huffman_exists(did_i) (ZCTX(did_i)->huffman.param != HUFF_NOT_PRODUCED) 35 | 36 | // NICO stuff 37 | extern void nico_start_chewing (Did did_i); 38 | extern void nico_chew_one_cigar (Did did_i, BamCigarOpP cigar, uint32_t n_cigar_op); 39 | extern void nico_chew_one_textual_cigar (VBlockP vb, Did did_i, STRp(textual_cigar)); 40 | extern void nico_produce_compressor (Did did_i, bool is_deep_cigar); 41 | extern uint32_t nico_max_comp_len (Did did_i, uint32_t n_cigar_op); 42 | extern uint32_t nico_compress_cigar (VBlockP vb, Did did_i, BamCigarOpP cigar, uint32_t n_cigar_op, STR8c(comp)); 43 | extern uint32_t nico_compress_textual_cigar (VBlockP vb, Did did_i, STRp(textual_cigar), STR8c(comp)); 44 | extern uint32_t nico_compressed_len_textual_cigar (VBlockP vb, Did did_i, STRp(textual_cigar)); 45 | extern uint32_t nico_uncompress_cigar (VBlockP vb, Did did_i, bytes comp, BufferP cigar, rom buf_name); 46 | extern uint32_t nico_uncompress_textual_cigar (Did did_i, bytes comp, BufferP textual_cigar, uint32_t textual_len, bool do_htos); 47 | -------------------------------------------------------------------------------- /src/mac-pkg-build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash --norc 2 | 3 | # ------------------------------------------------------------------ 4 | # mac-pkg-build.sh 5 | # Copyright (C) 2020-2025 Genozip Limited where applies 6 | # Please see terms and conditions in the file LICENSE.txt 7 | # 8 | # WARNING: Genozip is proprietary, not open source software. Modifying the source code is strictly prohibited 9 | # and subject to penalties specified in the license. 10 | 11 | # loosely based on https://github.com/KosalaHerath/macos-installer-builder which is licensed under Apache 2.0 license 12 | 13 | if [[ `uname` != 'Darwin' ]]; then 14 | echo " " 15 | echo "To create the Mac installer, you must be on a Mac:" 16 | echo " 1. git pull OR git clone" 17 | echo " 2. make clean" 18 | echo " 3. make macos" 19 | echo " " 20 | exit 1 21 | fi 22 | 23 | MAC_DIR=mac 24 | TARGET_DIR=${MAC_DIR}/target 25 | VERSION=`head -n1 version.h |cut -d\" -f2` 26 | FILES=(genozip genounzip genols genocat) # array 27 | FILES_STR=${FILES[@]} # string 28 | 29 | # copy files to target directory 30 | rm -rf $TARGET_DIR || exit 1 31 | mkdir -p ${TARGET_DIR}/darwinpkg/Library/genozip ${TARGET_DIR}/Resources ${TARGET_DIR}/scripts || exit 1 32 | 33 | cp ${FILES[@]} ${TARGET_DIR}/darwinpkg/Library/genozip || exit 1 34 | cp ${MAC_DIR}/Distribution ${TARGET_DIR} || exit 1 35 | cp ${MAC_DIR}/uninstall.sh ${TARGET_DIR}/Resources || exit 1 36 | sed -e "s/__VERSION__/${VERSION}/g" ${MAC_DIR}/welcome.html > ${TARGET_DIR}/Resources/welcome.html || exit 1 37 | cp README.md ${TARGET_DIR}/Resources/README.html || exit 1 38 | cp LICENSE.txt ${TARGET_DIR}/Resources/ || exit 1 39 | sed -e "s/__FILES__/${FILES_STR}/g" ${MAC_DIR}/postinstall > ${TARGET_DIR}/scripts/postinstall || exit 1 40 | sed -e "s/__VERSION__/${VERSION}/g" ${MAC_DIR}/uninstall.sh | sed -e "s/__FILES__/${FILES_STR}/g" > ${TARGET_DIR}/darwinpkg/Library/genozip/uninstall.sh || exit 1 41 | 42 | chmod -R 755 ${TARGET_DIR} || exit 1 43 | 44 | # build package 45 | pkgbuild --identifier org.genozip.${VERSION} --version ${VERSION} --scripts ${TARGET_DIR}/scripts --root ${TARGET_DIR}/darwinpkg ${MAC_DIR}/genozip.pkg || exit 1 46 | 47 | PRODUCT=${MAC_DIR}/genozip-installer.pkg 48 | PRODUCT_SIGNED=${MAC_DIR}/genozip-installer.signed.pkg 49 | productbuild --distribution ${TARGET_DIR}/Distribution --resources ${TARGET_DIR}/Resources --package-path ${MAC_DIR} ${PRODUCT} || exit 1 50 | 51 | # sign product - note: I need a "3rd party mac developer" certificate installed in the keychain to run this. 52 | # see how to obtain a certificate: https://developer.apple.com/developer-id/ 53 | productsign --sign "Divon Lan" ${PRODUCT} ${PRODUCT_SIGNED} || exit 1 54 | pkgutil --check-signature ${PRODUCT_SIGNED} # doesn't exit with 0 on success 55 | rm ${PRODUCT} || exit 1 56 | mv ${PRODUCT_SIGNED} ${PRODUCT} || exit 1 57 | 58 | echo Built ${PRODUCT_SIGNED} 59 | 60 | exit 0 61 | --------------------------------------------------------------------------------