├── .cirrus.yml ├── .gitattributes ├── .github └── workflows │ ├── unix-build.yml │ └── windows-build.yml ├── .gitignore ├── BENCHMARKS.md ├── LICENSE.md ├── MAINTAINERS.md ├── Makefile.am ├── NEWS.md ├── README.md ├── configure.ac ├── htscodecs ├── Makefile.am ├── arith_dynamic.c ├── arith_dynamic.h ├── c_range_coder.h ├── c_simple_model.h ├── fqzcomp_qual.c ├── fqzcomp_qual.h ├── htscodecs.c ├── htscodecs.h ├── htscodecs_endian.h ├── pack.c ├── pack.h ├── permute.h ├── pooled_alloc.h ├── rANS_byte.h ├── rANS_static.c ├── rANS_static.h ├── rANS_static16_int.h ├── rANS_static32x16pr.c ├── rANS_static32x16pr.h ├── rANS_static32x16pr_avx2.c ├── rANS_static32x16pr_avx512.c ├── rANS_static32x16pr_neon.c ├── rANS_static32x16pr_sse4.c ├── rANS_static4x16.h ├── rANS_static4x16pr.c ├── rANS_word.h ├── rle.c ├── rle.h ├── tokenise_name3.c ├── tokenise_name3.h ├── utils.c ├── utils.h ├── varint.h └── varint2.h ├── javascript ├── Makefile ├── README.md ├── arith_gen.js ├── arith_sh.js ├── byte_model.js ├── fqzcomp.js ├── index.js ├── iostream.js ├── main_arith_gen.js ├── main_fqzcomp.js ├── main_rans.js ├── main_rans4x16.js ├── main_tok3.js ├── rans.js ├── rans4x16.js └── tok3.js ├── m4 ├── ax_search_libs_rev.m4 ├── hts_check_compile_flags_needed.m4 └── vl_prog_warnings.m4 └── tests ├── Makefile.am ├── arith.test ├── arith_dynamic_fuzz.c ├── arith_dynamic_test.c ├── benchmark.sh ├── dat ├── arith │ ├── q4.0 │ ├── q4.1 │ ├── q4.128 │ ├── q4.129 │ ├── q4.192 │ ├── q4.193 │ ├── q4.64 │ ├── q4.65 │ ├── q4.8 │ ├── q4.9 │ ├── q40+dir.0 │ ├── q40+dir.1 │ ├── q40+dir.64 │ ├── q40+dir.65 │ ├── q40+dir.8 │ ├── q40+dir.9 │ ├── q8.0 │ ├── q8.1 │ ├── q8.128 │ ├── q8.129 │ ├── q8.192 │ ├── q8.193 │ ├── q8.64 │ ├── q8.65 │ ├── qvar.0 │ ├── qvar.1 │ ├── qvar.64 │ ├── qvar.65 │ ├── u32.1 │ ├── u32.4 │ ├── u32.65 │ └── u32.9 ├── fqzcomp │ ├── q4.0 │ ├── q4.1 │ ├── q4.2 │ ├── q4.3 │ ├── q40+dir.0 │ ├── q40+dir.1 │ ├── q40+dir.2 │ ├── q40+dir.3 │ ├── q8.0 │ ├── q8.1 │ ├── q8.2 │ ├── q8.3 │ ├── qvar.0 │ ├── qvar.1 │ ├── qvar.2 │ └── qvar.3 ├── q4 ├── q40+dir ├── q8 ├── qvar ├── r4x16 │ ├── q4.0 │ ├── q4.1 │ ├── q4.128 │ ├── q4.129 │ ├── q4.192 │ ├── q4.193 │ ├── q4.4 │ ├── q4.5 │ ├── q4.64 │ ├── q4.65 │ ├── q4.8 │ ├── q4.9 │ ├── q40+dir.0 │ ├── q40+dir.1 │ ├── q40+dir.4 │ ├── q40+dir.5 │ ├── q40+dir.8 │ ├── q40+dir.9 │ ├── q8.0 │ ├── q8.1 │ ├── q8.128 │ ├── q8.129 │ ├── q8.192 │ ├── q8.193 │ ├── q8.4 │ ├── q8.5 │ ├── q8.64 │ ├── q8.65 │ ├── qvar.0 │ ├── qvar.1 │ ├── qvar.4 │ └── qvar.5 ├── r4x8 │ ├── q4.0 │ ├── q4.1 │ ├── q40+dir.0 │ ├── q40+dir.1 │ ├── q8.0 │ ├── q8.1 │ ├── qvar.0 │ └── qvar.1 └── u32 ├── entropy.c ├── entropy.test ├── entropy_fuzz.c ├── fqzcomp.test ├── fqzcomp_qual_fuzz.c ├── fqzcomp_qual_fuzzrt.c ├── fqzcomp_qual_test.c ├── names ├── 01.names ├── 02.names ├── 03.names ├── 05.names ├── 08.names ├── 09.names ├── 10.names ├── 20.names ├── nv.names ├── nv2.names ├── rr.names └── tok3 │ ├── 01.names.1 │ ├── 01.names.11 │ ├── 01.names.13 │ ├── 01.names.15 │ ├── 01.names.17 │ ├── 01.names.19 │ ├── 01.names.3 │ ├── 01.names.5 │ ├── 01.names.7 │ ├── 01.names.9 │ ├── 02.names.1 │ ├── 02.names.11 │ ├── 02.names.13 │ ├── 02.names.15 │ ├── 02.names.17 │ ├── 02.names.19 │ ├── 02.names.3 │ ├── 02.names.5 │ ├── 02.names.7 │ ├── 02.names.9 │ ├── 03.names.1 │ ├── 03.names.11 │ ├── 03.names.13 │ ├── 03.names.15 │ ├── 03.names.17 │ ├── 03.names.19 │ ├── 03.names.3 │ ├── 03.names.5 │ ├── 03.names.7 │ ├── 03.names.9 │ ├── 05.names.1 │ ├── 05.names.11 │ ├── 05.names.13 │ ├── 05.names.15 │ ├── 05.names.17 │ ├── 05.names.19 │ ├── 05.names.3 │ ├── 05.names.5 │ ├── 05.names.7 │ ├── 05.names.9 │ ├── 08.names.1 │ ├── 08.names.11 │ ├── 08.names.13 │ ├── 08.names.15 │ ├── 08.names.17 │ ├── 08.names.19 │ ├── 08.names.3 │ ├── 08.names.5 │ ├── 08.names.7 │ ├── 08.names.9 │ ├── 09.names.1 │ ├── 09.names.11 │ ├── 09.names.13 │ ├── 09.names.15 │ ├── 09.names.17 │ ├── 09.names.19 │ ├── 09.names.3 │ ├── 09.names.5 │ ├── 09.names.7 │ ├── 09.names.9 │ ├── 10.names.1 │ ├── 10.names.11 │ ├── 10.names.13 │ ├── 10.names.15 │ ├── 10.names.17 │ ├── 10.names.19 │ ├── 10.names.3 │ ├── 10.names.5 │ ├── 10.names.7 │ ├── 10.names.9 │ ├── 20.names.1 │ ├── 20.names.11 │ ├── 20.names.13 │ ├── 20.names.15 │ ├── 20.names.17 │ ├── 20.names.19 │ ├── 20.names.3 │ ├── 20.names.5 │ ├── 20.names.7 │ ├── 20.names.9 │ ├── nv.names.1 │ ├── nv.names.11 │ ├── nv.names.13 │ ├── nv.names.15 │ ├── nv.names.17 │ ├── nv.names.19 │ ├── nv.names.3 │ ├── nv.names.5 │ ├── nv.names.7 │ ├── nv.names.9 │ ├── nv2.names.1 │ ├── nv2.names.11 │ ├── nv2.names.13 │ ├── nv2.names.15 │ ├── nv2.names.17 │ ├── nv2.names.19 │ ├── nv2.names.3 │ ├── nv2.names.5 │ ├── nv2.names.7 │ ├── nv2.names.9 │ ├── rr.names.1 │ ├── rr.names.11 │ ├── rr.names.13 │ ├── rr.names.15 │ ├── rr.names.17 │ ├── rr.names.19 │ ├── rr.names.3 │ ├── rr.names.5 │ ├── rr.names.7 │ └── rr.names.9 ├── rANS_static4x16pr_fuzz.c ├── rANS_static4x16pr_test.c ├── rANS_static_fuzz.c ├── rANS_static_test.c ├── rans4x16.test ├── rans4x8.test ├── tok3.test ├── tokenise_name3_fuzz.c ├── tokenise_name3_fuzzrt.c ├── tokenise_name3_test.c └── varint_test.c /.cirrus.yml: -------------------------------------------------------------------------------- 1 | # Standard environment setup. 2 | env_template: &ENVIRONMENT 3 | environment: 4 | LC_ALL: C 5 | CIRRUS_CLONE_DEPTH: 1 6 | timeout_in: 10m 7 | 8 | # Standard compilation and testing rules 9 | compile_template: &COMPILE 10 | compile_script: 11 | - autoreconf -i 12 | - ./configure 13 | - make -j4 14 | 15 | test_script: 16 | - make check 17 | 18 | # on_failure: 19 | # - cat tests/test-suite.log 20 | 21 | # ---------- 22 | # Linux 23 | 24 | task: 25 | << : *ENVIRONMENT 26 | name: linux-arm 27 | arm_container: 28 | image: ubuntu:latest 29 | cpu: 2 30 | memory: 1G 31 | 32 | # NB: we could consider building a docker image with these 33 | # preinstalled and specifying that instead, to speed up testing. 34 | install_script: | 35 | apt-get update 36 | apt-get install -y --no-install-suggests --no-install-recommends \ 37 | ca-certificates clang libc-dev make git autoconf automake libtool \ 38 | libbz2-dev 39 | 40 | compile_script: 41 | - autoreconf -i 42 | - ./configure CC="clang" --disable-shared 43 | - make -j4 CFLAGS="-g -O3 -Wall -Werror -Wextra -Wno-sign-compare -Wno-unused-parameter" 44 | 45 | test_script: 46 | - make check CFLAGS="-g -O3 -Wall -Werror -Wextra -Wno-sign-compare -Wno-unused-parameter" 47 | - make distcheck 48 | 49 | # Rocky Linux 50 | rocky_task: 51 | << : *ENVIRONMENT 52 | name: rockylinux-gcc 53 | container: 54 | image: rockylinux:9 55 | cpu: 2 56 | memory: 1G 57 | 58 | # # See https://cirrus-ci.org/guide/writing-tasks/#cache-instruction 59 | # # Not worth it as it takes longer to unpack the /usr updates than 60 | # # to just install the bits we need. No easy way to install elsewhere 61 | # # either. :/ 62 | # install_cache: 63 | # folder: /usr # Will this work!? It's ~150Mb. Yes, but don't bother 64 | # populate_script: | 65 | # yum install -y \ 66 | # autoconf automake libtool make gcc zlib-devel \ 67 | # bzip2 bzip2-devel git diffutils 68 | 69 | install_script: | 70 | yum install -y autoconf automake libtool make gcc zlib-devel \ 71 | bzip2 bzip2-devel git diffutils 72 | 73 | << : *COMPILE 74 | 75 | # ---------- 76 | # FreeBSD 77 | task: 78 | name: freebsd 79 | freebsd_instance: 80 | image_family: freebsd-14-2 81 | 82 | pkginstall_script: 83 | - pkg update -f 84 | - pkg install -y gcc autoconf automake libdeflate libtool 85 | 86 | compile_script: 87 | - autoreconf -i 88 | - ./configure 89 | - make -j4 CFLAGS="-g -O3 -Wall -Werror" 90 | 91 | test_script: 92 | - make check CFLAGS="-g -O3 -Wall -Werror" 93 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Github's git changes NL for NL-CR on Windows. 2 | # We pretend everything is binary so any comparisons of test files 3 | # work. 4 | tests/** -text 5 | tests/*.c text 6 | tests/*.sh text 7 | tests/*.test text 8 | -------------------------------------------------------------------------------- /.github/workflows/unix-build.yml: -------------------------------------------------------------------------------- 1 | # Various unix builds 2 | name: unix CI 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | runs-on: ${{ matrix.os }} 8 | strategy: 9 | fail-fast: false 10 | matrix: 11 | os: [ubuntu-latest, macos-latest] 12 | 13 | steps: 14 | - name: Checkout 15 | uses: actions/checkout@v4 16 | 17 | # MacOS. We validate compilation of x86_64 and arm64, but only 18 | # test arm64. This will also be using clang by default 19 | - name: macOS specific 20 | if: runner.os == 'macOS' 21 | run: | 22 | brew install autoconf automake libtool 23 | autoreconf -i 24 | ./configure CFLAGS="-g -O3 -Wall -Werror -arch arm64 -arch x86_64" 25 | 26 | # A slower build and test with address and undefined behaviour sanitizers 27 | - name: Ubuntu-latest using gcc with sanitizers 28 | if: runner.os == 'Linux' 29 | run: | 30 | sudo apt-get update 31 | sudo apt-get install -y --no-install-suggests --no-install-recommends libbz2-dev 32 | autoreconf -i 33 | ./configure CC="gcc -fsanitize=address,undefined" 34 | 35 | - name: Compile 36 | run: | 37 | make -j6 38 | 39 | - name: Check 40 | run: | 41 | make check || (cat tests/test-suite.log; false) 42 | 43 | -------------------------------------------------------------------------------- /.github/workflows/windows-build.yml: -------------------------------------------------------------------------------- 1 | # A faster alternative to AppVeyor 2 | name: Windows/MinGW-W64 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | runs-on: windows-latest 8 | steps: 9 | - name: Checkout 10 | uses: actions/checkout@v4 11 | - name: Set up MSYS2 MinGW-W64 12 | uses: msys2/setup-msys2@v2 13 | with: 14 | msystem: mingw64 15 | update: false 16 | install: >- 17 | zlib-devel 18 | libbz2-devel 19 | liblzma-devel 20 | mingw-w64-x86_64-toolchain 21 | mingw-w64-x86_64-autotools 22 | mingw-w64-x86_64-tools-git 23 | - name: Compile htscodecs 24 | shell: msys2 {0} 25 | run: | 26 | export PATH=/mingw64/bin:$PATH 27 | export MSYSTEM=MINGW64 28 | autoreconf -i 29 | ./configure --disable-shared CFLAGS='-g -O3 -D_XOPEN_SOURCE=600 -Wall -Werror' 30 | make -j8 31 | - name: Check htscodecs 32 | shell: msys2 {0} 33 | run: | 34 | export PATH=/mingw64/bin:$PATH 35 | export MSYSTEM=MINGW64 36 | make check || (cat tests/test-suite.log; false) 37 | 38 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.in 2 | *.lo 3 | *.la 4 | *.o 5 | *.pico 6 | 7 | aclocal.m4 8 | autom4te.cache 9 | compile 10 | config.* 11 | configure 12 | depcomp 13 | missing 14 | install-sh 15 | libtool 16 | ltmain.sh 17 | m4/libtool.m4 18 | m4/lt*.m4 19 | Makefile 20 | stamp-h1 21 | test-driver 22 | 23 | htscodecs/.deps/ 24 | htscodecs/.libs/ 25 | htscodecs/Makefile 26 | 27 | htscodecs/version.h 28 | 29 | tests/*.log 30 | tests/*.trs 31 | tests/.deps/ 32 | tests/.libs/ 33 | tests/arith_dynamic 34 | tests/arith_dynamic_fuzz 35 | tests/entropy 36 | tests/entropy_fuzz 37 | tests/fqzcomp_qual 38 | tests/fqzcomp_qual_fuzz 39 | tests/Makefile 40 | tests/rans4x16pr 41 | tests/rans4x16pr_fuzz 42 | tests/rans4x8 43 | tests/rans4x8_fuzz 44 | tests/test.out/ 45 | tests/tokenise_name3 46 | tests/tokenise_name3_fuzz 47 | tests/varint 48 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | All files except those explicitly listed below are copyright Genome 2 | Research Limited and are made available under the BSD license. 3 | 4 | > Redistribution and use in source and binary forms, with or without 5 | > modification, are permitted provided that the following conditions 6 | > are met: 7 | > 8 | > (1) Redistributions of source code must retain the above copyright 9 | > notice, this list of conditions and the following disclaimer. 10 | > 11 | > (2) Redistributions in binary form must reproduce the above copyright 12 | > notice, this list of conditions and the following disclaimer in 13 | > the documentation and/or other materials provided with the distribution. 14 | > 15 | > (3)The name of the author may not be used to endorse or promote 16 | > products derived from this software without specific prior written 17 | > permission. 18 | > 19 | > THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 | > IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 | > WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | > DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, 23 | > INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 | > (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | > SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 | > HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 27 | > STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 28 | > IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 | > POSSIBILITY OF SUCH DAMAGE. 30 | 31 | c_range_coder.h is Public Domain, derived from work by Eugene 32 | Shelwien. 33 | 34 | rANS_byte.h and rANS_word.h are derived from Fabien Giesen's work and 35 | is Public Domain. https://github.com/rygorous/ryg_rans This work was 36 | in turn based on the ANS family of entropy encoders as described by 37 | Jarek Duda's paper: http://arxiv.org/abs/1311.2540 38 | 39 | > To the extent possible under law, Fabian Giesen has waived all 40 | > copyright and related or neighboring rights to ryg_rans, as 41 | > per the terms of the CC0 license: 42 | > 43 | > https://creativecommons.org/publicdomain/zero/1.0 44 | > 45 | > This work is published from the United States. 46 | -------------------------------------------------------------------------------- /MAINTAINERS.md: -------------------------------------------------------------------------------- 1 | Notes to maintainers for building releases. 2 | This is best done as a release PR so we can check it first. 3 | 4 | 1. Places to update the version number include: 5 | 6 | - htscodecs/htscodecs.h (used for program introspection) 7 | 8 | - configure.ac AC_INIT macro 9 | 10 | - configure.ac VERS_CURRENT, VERS_REVISION and VERS_AGE variables. 11 | See the long comment above for instructions of how these change. 12 | 13 | - NEWS files. 14 | 15 | 16 | 2. Ensure NEWS and README files are up to date. NEWS is a git log 17 | summary. README likely doesn't change unless something major needs 18 | mentioning. 19 | 20 | - At time of merging, set the date at the top of NEWS. 21 | 22 | 23 | 3. Test it all. 24 | - Push to github PR so the CI can validate for us. 25 | 26 | - make distcheck 27 | This also makes the tarball htscodecs-${vers}.tar.gz. 28 | 29 | 30 | 4. Merge into master 31 | 32 | 33 | 5. Add an annotated tag with minimal message, eg: 34 | 35 | - git tag -a v1.1 -m v1.1 36 | 37 | 38 | 6. Push master and --tags upstream to github 39 | 40 | 41 | 7. Make a new release on github. 42 | 43 | - Title: "htscodecs ${vers}" 44 | 45 | - Message: this is just a copy of NEWS. 46 | It's already in Markdown format, but double check the preview panel. 47 | 48 | - Upload the tarball produced from distcheck to the assets. 49 | 50 | 51 | 8. Finally, consider updating any packages that use this as a 52 | submodule to ensure they have the latest tagged release. 53 | 54 | This will invariably help OS distributions keep their package 55 | dependencies neatly in sync. 56 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2003, 2005-2007, 2009-2010, 2013 Genome Research Ltd. 2 | # Author(s): James Bonfield 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, 8 | # this list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above 11 | # copyright notice, this list of conditions and the following 12 | # disclaimer in the documentation and/or other materials provided 13 | # with the distribution. 14 | # 15 | # 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 16 | # Institute nor the names of its contributors may be used to endorse 17 | # or promote products derived from this software without specific 18 | # prior written permission. 19 | # 20 | # THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 21 | # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 | # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 23 | # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 24 | # LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | # 32 | AUTOMAKE_OPTIONS = foreign no-dependencies 33 | ACLOCAL_AMFLAGS = -I m4 34 | 35 | SUBDIRS = htscodecs . tests 36 | 37 | nobase_include_HEADERS = \ 38 | htscodecs/arith_dynamic.h \ 39 | htscodecs/fqzcomp_qual.h \ 40 | htscodecs/rANS_static.h \ 41 | htscodecs/rANS_static4x16.h \ 42 | htscodecs/tokenise_name3.h \ 43 | htscodecs/pack.h \ 44 | htscodecs/rle.h \ 45 | htscodecs/varint.h \ 46 | htscodecs/htscodecs.h 47 | 48 | EXTRA_DIST = README.md LICENSE.md NEWS.md 49 | -------------------------------------------------------------------------------- /htscodecs/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2003, 2005-2007, 2009-2010, 2013, 2019, 2022-2023 2 | # Genome Research Ltd. 3 | # Author(s): James Bonfield 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, 9 | # this list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above 12 | # copyright notice, this list of conditions and the following 13 | # disclaimer in the documentation and/or other materials provided 14 | # with the distribution. 15 | # 16 | # 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 17 | # Institute nor the names of its contributors may be used to endorse 18 | # or promote products derived from this software without specific 19 | # prior written permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 22 | # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 | # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 | # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 25 | # LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | # 33 | lib_LTLIBRARIES = libhtscodecs.la 34 | 35 | libhtscodecs_base_src = \ 36 | pack.c \ 37 | pack.h \ 38 | rle.c \ 39 | rle.h \ 40 | fqzcomp_qual.c \ 41 | fqzcomp_qual.h \ 42 | rANS_static.c \ 43 | rANS_static.h \ 44 | rANS_byte.h \ 45 | rANS_static4x16pr.c \ 46 | rANS_static4x16.h \ 47 | rANS_word.h \ 48 | rANS_static32x16pr.c \ 49 | rANS_static32x16pr.h \ 50 | rANS_static32x16pr_neon.c \ 51 | rANS_static16_int.h \ 52 | permute.h \ 53 | tokenise_name3.c \ 54 | tokenise_name3.h \ 55 | pooled_alloc.h \ 56 | arith_dynamic.c \ 57 | arith_dynamic.h \ 58 | c_range_coder.h \ 59 | c_simple_model.h \ 60 | varint.h \ 61 | htscodecs.c \ 62 | htscodecs.h \ 63 | htscodecs_endian.h \ 64 | utils.c \ 65 | utils.h 66 | 67 | libhtscodecs_la_SOURCES = $(libhtscodecs_base_src) 68 | libhtscodecs_la_LIBADD = 69 | noinst_LTLIBRARIES = 70 | 71 | # SIMD optional extras 72 | if RANS_32x16_SSE4 73 | noinst_LTLIBRARIES += librANS_static32x16pr_sse4.la 74 | librANS_static32x16pr_sse4_la_SOURCES = rANS_static32x16pr_sse4.c 75 | librANS_static32x16pr_sse4_la_CFLAGS = @MSSE4_1@ 76 | libhtscodecs_la_LIBADD += librANS_static32x16pr_sse4.la 77 | endif 78 | if RANS_32x16_AVX2 79 | noinst_LTLIBRARIES += librANS_static32x16pr_avx2.la 80 | librANS_static32x16pr_avx2_la_SOURCES = rANS_static32x16pr_avx2.c 81 | librANS_static32x16pr_avx2_la_CFLAGS = @MAVX2@ 82 | libhtscodecs_la_LIBADD += librANS_static32x16pr_avx2.la 83 | endif 84 | if RANS_32x16_AVX512 85 | noinst_LTLIBRARIES += librANS_static32x16pr_avx512.la 86 | librANS_static32x16pr_avx512_la_SOURCES = rANS_static32x16pr_avx512.c 87 | librANS_static32x16pr_avx512_la_CFLAGS = @MAVX512@ 88 | libhtscodecs_la_LIBADD += librANS_static32x16pr_avx512.la 89 | endif 90 | 91 | libhtscodecs_la_LDFLAGS = -version-info @VERS_CURRENT@:@VERS_REVISION@:@VERS_AGE@ 92 | libhtscodecs_la_LIBADD += -lm 93 | 94 | # Fuzz testing version of the library. This is build using -fsanitize=fuzzer 95 | # and defines FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION to prevent things like 96 | # large allocations that would otherwise upset the fuzzer. 97 | fuzz: $(EXTRA_LIBRARIES) 98 | 99 | # Note that we build several libraries here, so we can get automake to 100 | # use the right options for the various parts. 101 | # See https://www.gnu.org/software/automake/manual/html_node/Per_002dObject-Flags.html 102 | EXTRA_LIBRARIES = libcodecsfuzz.a libcodecsfuzz_sse4.a libcodecsfuzz_avx2.a libcodecsfuzz_avx512.a 103 | libcodecsfuzz_a_SOURCES = $(libhtscodecs_base_src) 104 | libcodecsfuzz_a_CFLAGS = -fsanitize=fuzzer -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION 105 | libcodecsfuzz_a-htscodecs.$(OBJEXT): version.h 106 | libcodecsfuzz_sse4_a_SOURCES = rANS_static32x16pr_sse4.c 107 | libcodecsfuzz_sse4_a_CFLAGS = $(libcodecsfuzz_a_CFLAGS) @MSSE4_1@ @MSSSE3@ @MPOPCNT@ 108 | libcodecsfuzz_avx2_a_SOURCES = rANS_static32x16pr_avx2.c 109 | libcodecsfuzz_avx2_a_CFLAGS = $(libcodecsfuzz_a_CFLAGS) @MAVX2@ 110 | libcodecsfuzz_avx512_a_SOURCES = rANS_static32x16pr_avx512.c 111 | libcodecsfuzz_avx512_a_CFLAGS = $(libcodecsfuzz_a_CFLAGS) @MAVX512@ 112 | 113 | version.h: force 114 | @ if `git describe 2>/dev/null >/dev/null`; then \ 115 | echo '#define HTSCODECS_VERSION_TEXT "'`git describe --match 'v[0-9]\.[0-9]*' --dirty|sed 's/^v//'`'"' > _version.h; \ 116 | else \ 117 | echo '#define HTSCODECS_VERSION_TEXT "@PACKAGE_VERSION@"' > _version.h; \ 118 | fi; 119 | @ if ( [ ! -e version.h ] || ! cmp -s version.h _version.h); then \ 120 | echo "Update version.h: `cat _version.h`"; \ 121 | mv _version.h version.h; \ 122 | else \ 123 | rm _version.h; \ 124 | fi 125 | 126 | force: 127 | 128 | .PHONY: force 129 | 130 | # Manually added as the auto-dependency generation won't pick up a file that 131 | # may not yet exist. 132 | BUILT_SOURCES = version.h 133 | 134 | distclean-local: 135 | rm -f version.h 136 | -------------------------------------------------------------------------------- /htscodecs/arith_dynamic.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 Genome Research Ltd. 3 | * Author(s): James Bonfield 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above 12 | * copyright notice, this list of conditions and the following 13 | * disclaimer in the documentation and/or other materials provided 14 | * with the distribution. 15 | * 16 | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 17 | * Institute nor the names of its contributors may be used to endorse 18 | * or promote products derived from this software without specific 19 | * prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 22 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 25 | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #ifndef ARITH_DYNAMIC_H 35 | #define ARITH_DYNAMIC_H 36 | 37 | #ifdef __cplusplus 38 | extern "C" { 39 | #endif 40 | 41 | unsigned char *arith_compress(unsigned char *in, unsigned int in_size, 42 | unsigned int *out_size, int order); 43 | 44 | unsigned char *arith_uncompress(unsigned char *in, unsigned int in_size, 45 | unsigned int *out_size); 46 | 47 | unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size, 48 | unsigned char *out, unsigned int *out_size, 49 | int order); 50 | 51 | unsigned char *arith_uncompress_to(unsigned char *in, unsigned int in_size, 52 | unsigned char *out, unsigned int *out_sz); 53 | 54 | unsigned int arith_compress_bound(unsigned int size, int order); 55 | 56 | #ifdef __cplusplus 57 | } 58 | #endif 59 | 60 | #endif /* ARITH_DYNAMIC_H */ 61 | -------------------------------------------------------------------------------- /htscodecs/c_range_coder.h: -------------------------------------------------------------------------------- 1 | // Copyright Eugene Shelwien. 2 | // Release into public domain. 3 | 4 | // Modifications by James Bonfield (2019) 5 | 6 | 7 | /* 8 | * Note it is up to the calling code to ensure that no overruns on input and 9 | * output buffers occur. 10 | * 11 | * Call the input() and output() functions to set and query the current 12 | * buffer locations. 13 | * 14 | 15 | */ 16 | 17 | #ifndef C_RANGER_CODER_H 18 | #define C_RANGER_CODER_H 19 | 20 | #define DO(n) int _;for (_=0; _out_buf = rc->in_buf = (uc *)in; 40 | rc->in_end = (uc *)in_end; 41 | } 42 | 43 | // NB: call RC_SetOutput first, and then RC_SetOutputEnd 44 | static inline void RC_SetOutput(RangeCoder *rc, char *out) { rc->in_buf = rc->out_buf = (uc *)out; rc->out_end = NULL;} 45 | static inline void RC_SetOutputEnd(RangeCoder *rc, char *out_end) { rc->out_end = (uc *)out_end; } 46 | static inline char *RC_GetInput(RangeCoder *rc) { return (char *)rc->in_buf; } 47 | static inline char *RC_GetOutput(RangeCoder *rc) { return (char *)rc->out_buf; } 48 | static inline size_t RC_OutSize(RangeCoder *rc) { return rc->out_buf - rc->in_buf; } 49 | static inline size_t RC_InSize(RangeCoder *rc) { return rc->in_buf - rc->out_buf; } 50 | 51 | static inline void RC_StartEncode(RangeCoder *rc) 52 | { 53 | rc->range = 0xFFFFFFFF; 54 | rc->low = 0; 55 | rc->FFNum = 0; 56 | rc->Carry = 0; 57 | rc->Cache = 0; 58 | rc->code = 0; 59 | rc->err = 0; 60 | } 61 | 62 | static inline void RC_StartDecode(RangeCoder *rc) 63 | { 64 | rc->range = 0xFFFFFFFF; 65 | rc->low = 0; 66 | rc->FFNum = 0; 67 | rc->Carry = 0; 68 | rc->Cache = 0; 69 | rc->code = 0; 70 | rc->err = 0; 71 | if (rc->in_buf+5 > rc->in_end) { 72 | rc->in_buf = rc->in_end; // prevent decode 73 | return; 74 | } 75 | DO(5) rc->code = (rc->code<<8) | *rc->in_buf++; 76 | } 77 | 78 | static inline void RC_ShiftLowCheck(RangeCoder *rc) { 79 | if (rc->low < Thres || rc->Carry) { 80 | if (rc->out_end && rc->FFNum >= rc->out_end - rc->out_buf) { 81 | rc->err = -1; 82 | return; 83 | } 84 | 85 | *rc->out_buf++ = rc->Cache + rc->Carry; 86 | 87 | // Flush any stored FFs 88 | while (rc->FFNum) { 89 | *rc->out_buf++ = rc->Carry-1; // (Carry-1)&255; 90 | rc->FFNum--; 91 | } 92 | 93 | // Take copy of top byte ready for next flush 94 | rc->Cache = rc->low >> 24; 95 | rc->Carry = 0; 96 | } else { 97 | // Low if FFxx xxxx. Bump FF count and shift in as before 98 | rc->FFNum++; 99 | } 100 | rc->low = rc->low<<8; 101 | } 102 | 103 | static inline void RC_ShiftLow(RangeCoder *rc) { 104 | if (rc->low < Thres || rc->Carry) { 105 | *rc->out_buf++ = rc->Cache + rc->Carry; 106 | 107 | // Flush any stored FFs 108 | while (rc->FFNum) { 109 | *rc->out_buf++ = rc->Carry-1; // (Carry-1)&255; 110 | rc->FFNum--; 111 | } 112 | 113 | // Take copy of top byte ready for next flush 114 | rc->Cache = rc->low >> 24; 115 | rc->Carry = 0; 116 | } else { 117 | // Low if FFxx xxxx. Bump FF count and shift in as before 118 | rc->FFNum++; 119 | } 120 | rc->low = rc->low<<8; 121 | } 122 | 123 | static inline int RC_FinishEncode(RangeCoder *rc) 124 | { 125 | DO(5) RC_ShiftLowCheck(rc); 126 | return rc->err; 127 | } 128 | 129 | static inline int RC_FinishDecode(RangeCoder *rc) { 130 | return rc->err; 131 | } 132 | 133 | static inline void RC_Encode (RangeCoder *rc, uint32_t cumFreq, uint32_t freq, uint32_t totFreq) 134 | { 135 | uint32_t tmp = rc->low; 136 | rc->low += cumFreq * (rc->range/= totFreq); 137 | rc->range*= freq; 138 | 139 | rc->Carry += rc->lowrange < TOP) { 142 | rc->range <<= 8; 143 | RC_ShiftLowCheck(rc); 144 | } 145 | } 146 | 147 | static inline uint32_t RC_GetFreq (RangeCoder *rc, uint32_t totFreq) { 148 | //return rc->code/(rc->range/=totFreq); 149 | return (totFreq && rc->range >= totFreq) ? rc->code/(rc->range/=totFreq) : 0; 150 | } 151 | 152 | static inline void RC_Decode (RangeCoder *rc, uint32_t cumFreq, uint32_t freq, uint32_t totFreq) 153 | { 154 | rc->code -= cumFreq * rc->range; 155 | rc->range *= freq; 156 | while (rc->range < TOP) { 157 | if (rc->in_buf >= rc->in_end) { 158 | rc->err = -1; 159 | return; 160 | } 161 | rc->code = (rc->code<<8) + *rc->in_buf++; 162 | rc->range <<= 8; 163 | } 164 | } 165 | 166 | #endif /* C_RANGER_CODER_H */ 167 | -------------------------------------------------------------------------------- /htscodecs/c_simple_model.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012, 2018-2019 Genome Research Ltd. 3 | * Author(s): James Bonfield 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above 12 | * copyright notice, this list of conditions and the following 13 | * disclaimer in the documentation and/or other materials provided 14 | * with the distribution. 15 | * 16 | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 17 | * Institute nor the names of its contributors may be used to endorse 18 | * or promote products derived from this software without specific 19 | * prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 22 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 25 | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include 35 | #include "c_range_coder.h" 36 | 37 | /* 38 | *-------------------------------------------------------------------------- 39 | * A simple frequency model. 40 | * 41 | * Define NSYM to be an integer value before including this file. 42 | * It will then generate types and functions specific to that 43 | * maximum number of symbols. 44 | * 45 | * This keeps a list of symbols and their frequencies, approximately 46 | * sorted by symbol frequency. We allow for a single symbol to periodically 47 | * move up the list when emitted, effectively doing a single step of 48 | * bubble sort periodically. This means it's largely the same complexity 49 | * irrespective of alphabet size. 50 | * It's more efficient on strongly biased distributions than random data. 51 | * 52 | * There is no escape symbol, so the model is tailored to relatively 53 | * stationary samples (although we do have occasional normalisation to 54 | * avoid frequency counters getting too high). 55 | *-------------------------------------------------------------------------- 56 | */ 57 | 58 | //----------------------------------------------------------------------------- 59 | // Bits we want included once only - constants, types, etc 60 | #ifndef C_SIMPLE_MODEL_H 61 | #define C_SIMPLE_MODEL_H 62 | 63 | #define MAX_FREQ (1<<16)-17 64 | #define PASTE3(a,b,c) a##b##c 65 | #define SIMPLE_MODEL(a,b) PASTE3(SIMPLE_MODEL,a,b) 66 | #define STEP 16 67 | typedef struct { 68 | uint16_t Freq; 69 | uint16_t Symbol; 70 | } SymFreqs; 71 | #endif /* C_SIMPLE_MODEL_H */ 72 | 73 | 74 | //----------------------------------------------------------------------------- 75 | // Bits we regenerate for each NSYM value. 76 | 77 | typedef struct { 78 | uint32_t TotFreq; // Total frequency 79 | 80 | // Array of Symbols approximately sorted by Freq. 81 | SymFreqs sentinel, F[NSYM+1], terminal; 82 | } SIMPLE_MODEL(NSYM,_); 83 | 84 | 85 | static inline void SIMPLE_MODEL(NSYM,_init)(SIMPLE_MODEL(NSYM,_) *m, int max_sym) { 86 | int i; 87 | 88 | for (i=0; iF[i].Symbol = i; 90 | m->F[i].Freq = 1; 91 | } 92 | for (; iF[i].Symbol = i; 94 | m->F[i].Freq = 0; 95 | } 96 | 97 | m->TotFreq = max_sym; 98 | m->sentinel.Symbol = 0; 99 | m->sentinel.Freq = MAX_FREQ; // Always first; simplifies sorting. 100 | m->terminal.Symbol = 0; 101 | m->terminal.Freq = MAX_FREQ; 102 | m->F[NSYM].Freq = 0; // terminates normalize() loop. See below. 103 | } 104 | 105 | 106 | static inline void SIMPLE_MODEL(NSYM,_normalize)(SIMPLE_MODEL(NSYM,_) *m) { 107 | SymFreqs *s; 108 | 109 | /* Faster than F[i].Freq for 0 <= i < NSYM */ 110 | m->TotFreq=0; 111 | for (s = m->F; s->Freq; s++) { 112 | s->Freq -= s->Freq>>1; 113 | m->TotFreq += s->Freq; 114 | } 115 | } 116 | 117 | static inline void SIMPLE_MODEL(NSYM,_encodeSymbol)(SIMPLE_MODEL(NSYM,_) *m, 118 | RangeCoder *rc, uint16_t sym) { 119 | SymFreqs *s = m->F; 120 | uint32_t AccFreq = 0; 121 | 122 | while (s->Symbol != sym) 123 | AccFreq += s++->Freq; 124 | 125 | RC_Encode(rc, AccFreq, s->Freq, m->TotFreq); 126 | s->Freq += STEP; 127 | m->TotFreq += STEP; 128 | 129 | if (m->TotFreq > MAX_FREQ) 130 | SIMPLE_MODEL(NSYM,_normalize)(m); 131 | 132 | /* Keep approx sorted */ 133 | if (s[0].Freq > s[-1].Freq) { 134 | SymFreqs t = s[0]; 135 | s[0] = s[-1]; 136 | s[-1] = t; 137 | } 138 | } 139 | 140 | static inline uint16_t SIMPLE_MODEL(NSYM,_decodeSymbol)(SIMPLE_MODEL(NSYM,_) *m, RangeCoder *rc) { 141 | SymFreqs* s = m->F; 142 | uint32_t freq = RC_GetFreq(rc, m->TotFreq); 143 | uint32_t AccFreq; 144 | 145 | if (freq > MAX_FREQ) 146 | return 0; // error 147 | 148 | for (AccFreq = 0; (AccFreq += s->Freq) <= freq; s++) 149 | ; 150 | if (s - m->F > NSYM) 151 | return 0; // error 152 | 153 | AccFreq -= s->Freq; 154 | 155 | RC_Decode(rc, AccFreq, s->Freq, m->TotFreq); 156 | s->Freq += STEP; 157 | m->TotFreq += STEP; 158 | 159 | if (m->TotFreq > MAX_FREQ) 160 | SIMPLE_MODEL(NSYM,_normalize)(m); 161 | 162 | /* Keep approx sorted */ 163 | if (s[0].Freq > s[-1].Freq) { 164 | SymFreqs t = s[0]; 165 | s[0] = s[-1]; 166 | s[-1] = t; 167 | return t.Symbol; 168 | } 169 | 170 | return s->Symbol; 171 | } 172 | -------------------------------------------------------------------------------- /htscodecs/fqzcomp_qual.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013, 2018-2019 Genome Research Ltd. 3 | * Author(s): James Bonfield 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above 12 | * copyright notice, this list of conditions and the following 13 | * disclaimer in the documentation and/or other materials provided 14 | * with the distribution. 15 | * 16 | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 17 | * Institute nor the names of its contributors may be used to endorse 18 | * or promote products derived from this software without specific 19 | * prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 22 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 25 | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #ifndef FQZ_COMP_QUAL_H 35 | #define FQZ_COMP_QUAL_H 36 | 37 | #ifdef __cplusplus 38 | extern "C" { 39 | #endif 40 | 41 | #include 42 | 43 | /* Bit flags, deliberately mirroring BAM ones */ 44 | #define FQZ_FREVERSE 16 45 | #define FQZ_FREAD2 128 46 | 47 | /* Current FQZ format version */ 48 | #define FQZ_VERS 5 49 | 50 | #define FQZ_MAX_STRAT 3 51 | 52 | /* 53 | * Minimal per-record information taken from a cram slice. 54 | * 55 | * To compress we need to know the junction from one quality string to 56 | * the next (len), whether it is first/second read and whether it is 57 | * reverse complemented (flags). 58 | */ 59 | typedef struct { 60 | int num_records; 61 | uint32_t *len; // of size num_records 62 | uint32_t *flags; // of size num_records 63 | } fqz_slice; 64 | 65 | 66 | // Global flags 67 | static const int GFLAG_MULTI_PARAM = 1; 68 | static const int GFLAG_HAVE_STAB = 2; 69 | static const int GFLAG_DO_REV = 4; 70 | 71 | // Param flags 72 | // Add PFLAG_HAVE_DMAP and a dmap[] for delta incr? 73 | static const int PFLAG_DO_DEDUP = 2; 74 | static const int PFLAG_DO_LEN = 4; 75 | static const int PFLAG_DO_SEL = 8; 76 | static const int PFLAG_HAVE_QMAP = 16; 77 | static const int PFLAG_HAVE_PTAB = 32; 78 | static const int PFLAG_HAVE_DTAB = 64; 79 | static const int PFLAG_HAVE_QTAB = 128; 80 | 81 | /* 82 | * FQZ parameters. These may be simply passed in as NULL to fqz_compress 83 | * and it'll automatically choose, but if we wish to have complete control 84 | * then this (long) struct contains all the details. 85 | * 86 | * TODO: document all this! 87 | */ 88 | 89 | // A single parameter block 90 | typedef struct { 91 | // Starting context value 92 | uint16_t context; 93 | 94 | // flags 95 | unsigned int pflags; 96 | unsigned int do_sel, do_dedup, store_qmap, fixed_len; 97 | unsigned char use_qtab, use_dtab, use_ptab; 98 | 99 | // context bits and locations 100 | unsigned int qbits, qloc; 101 | unsigned int pbits, ploc; 102 | unsigned int dbits, dloc; 103 | unsigned int sbits, sloc; 104 | 105 | // models 106 | int max_sym, nsym, max_sel; 107 | 108 | // tables / maps 109 | unsigned int qmap[256]; 110 | unsigned int qtab[256]; 111 | unsigned int ptab[1024]; 112 | unsigned int dtab[256]; 113 | 114 | // Not stored paramters, but computed as part of encoder 115 | // parameterisation. 116 | int qshift; 117 | int pshift; 118 | int dshift; 119 | int sshift; 120 | unsigned int qmask; // (1<n to the expanded symbols. 73 | * The "out" buffer must be preallocated by the caller to be the correct 74 | * size. For error checking purposes, out_len is set to the size of 75 | * this buffer. 76 | * 77 | * Returns uncompressed data (out) on success, 78 | * NULL on failure. 79 | */ 80 | uint8_t *hts_unpack(uint8_t *data, int64_t len, uint8_t *out, uint64_t out_len, int nsym, uint8_t *map); 81 | 82 | #ifdef __cplusplus 83 | } 84 | #endif 85 | 86 | #endif /* HTS_PACK_H */ 87 | -------------------------------------------------------------------------------- /htscodecs/pooled_alloc.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2009-2010, 2013 Genome Research Ltd. 3 | * Author(s): James Bonfield 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above 12 | * copyright notice, this list of conditions and the following 13 | * disclaimer in the documentation and/or other materials provided 14 | * with the distribution. 15 | * 16 | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 17 | * Institute nor the names of its contributors may be used to endorse 18 | * or promote products derived from this software without specific 19 | * prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 22 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 25 | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | // Defined static here as we only use in one file for now and don't 35 | // want to pollute the library name space (io_lib has the same named 36 | // functions). 37 | 38 | #ifndef _POOLED_ALLOC_H_ 39 | #define _POOLED_ALLOC_H_ 40 | 41 | #include 42 | #include 43 | #include 44 | 45 | /* 46 | * Implements a pooled block allocator where all items are the same size, 47 | * but we need many of them. 48 | */ 49 | typedef struct { 50 | void *pool; 51 | size_t used; 52 | } pool_t; 53 | 54 | typedef struct { 55 | size_t dsize; 56 | size_t npools; 57 | pool_t *pools; 58 | void *free; 59 | } pool_alloc_t; 60 | 61 | #define PSIZE 1024*1024 62 | 63 | static pool_alloc_t *pool_create(size_t dsize) { 64 | pool_alloc_t *p; 65 | 66 | if (NULL == (p = (pool_alloc_t *)malloc(sizeof(*p)))) 67 | return NULL; 68 | 69 | /* Minimum size is a pointer, for free list */ 70 | dsize = (dsize + sizeof(void *) - 1) & ~(sizeof(void *)-1); 71 | if (dsize < sizeof(void *)) 72 | dsize = sizeof(void *); 73 | p->dsize = dsize; 74 | 75 | p->npools = 0; 76 | p->pools = NULL; 77 | p->free = NULL; 78 | 79 | return p; 80 | } 81 | 82 | static pool_t *new_pool(pool_alloc_t *p) { 83 | size_t n = PSIZE / p->dsize; 84 | pool_t *pool; 85 | 86 | pool = realloc(p->pools, (p->npools + 1) * sizeof(*p->pools)); 87 | if (NULL == pool) return NULL; 88 | p->pools = pool; 89 | pool = &p->pools[p->npools]; 90 | 91 | pool->pool = malloc(n * p->dsize); 92 | if (NULL == pool->pool) return NULL; 93 | 94 | pool->used = 0; 95 | 96 | p->npools++; 97 | 98 | return pool; 99 | } 100 | 101 | static void pool_destroy(pool_alloc_t *p) { 102 | size_t i; 103 | 104 | for (i = 0; i < p->npools; i++) { 105 | free(p->pools[i].pool); 106 | } 107 | free(p->pools); 108 | free(p); 109 | } 110 | 111 | static void *pool_alloc(pool_alloc_t *p) { 112 | pool_t *pool; 113 | void *ret; 114 | 115 | /* Look on free list */ 116 | if (NULL != p->free) { 117 | ret = p->free; 118 | p->free = *((void **)p->free); 119 | return ret; 120 | } 121 | 122 | /* Look for space in the last pool */ 123 | if (p->npools) { 124 | pool = &p->pools[p->npools - 1]; 125 | if (pool->used + p->dsize < PSIZE) { 126 | ret = ((char *) pool->pool) + pool->used; 127 | pool->used += p->dsize; 128 | return ret; 129 | } 130 | } 131 | 132 | /* Need a new pool */ 133 | pool = new_pool(p); 134 | if (NULL == pool) return NULL; 135 | 136 | pool->used = p->dsize; 137 | return pool->pool; 138 | } 139 | 140 | // static void pool_free(pool_alloc_t *p, void *ptr) { 141 | // *(void **)ptr = p->free; 142 | // p->free = ptr; 143 | // } 144 | 145 | #endif /*_POOLED_ALLOC_H_*/ 146 | -------------------------------------------------------------------------------- /htscodecs/rANS_static.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014-2019 Genome Research Ltd. 3 | * Author(s): James Bonfield 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above 12 | * copyright notice, this list of conditions and the following 13 | * disclaimer in the documentation and/or other materials provided 14 | * with the distribution. 15 | * 16 | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 17 | * Institute nor the names of its contributors may be used to endorse 18 | * or promote products derived from this software without specific 19 | * prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 22 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 25 | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #ifndef RANS_STATIC_H 35 | #define RANS_STATIC_H 36 | 37 | #ifdef __cplusplus 38 | extern "C" { 39 | #endif 40 | 41 | unsigned char *rans_compress(unsigned char *in, unsigned int in_size, 42 | unsigned int *out_size, int order); 43 | unsigned char *rans_uncompress(unsigned char *in, unsigned int in_size, 44 | unsigned int *out_size); 45 | 46 | #ifdef __cplusplus 47 | } 48 | #endif 49 | 50 | #endif /* RANS_STATIC_H */ 51 | -------------------------------------------------------------------------------- /htscodecs/rANS_static4x16.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017-2019 Genome Research Ltd. 3 | * Author(s): James Bonfield 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above 12 | * copyright notice, this list of conditions and the following 13 | * disclaimer in the documentation and/or other materials provided 14 | * with the distribution. 15 | * 16 | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 17 | * Institute nor the names of its contributors may be used to endorse 18 | * or promote products derived from this software without specific 19 | * prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 22 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 25 | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #ifndef RANS_STATIC4x16_H 35 | #define RANS_STATIC4x16_H 36 | 37 | #ifdef __cplusplus 38 | extern "C" { 39 | #endif 40 | 41 | unsigned int rans_compress_bound_4x16(unsigned int size, int order); 42 | unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, 43 | unsigned char *out, unsigned int *out_size, 44 | int order); 45 | unsigned char *rans_compress_4x16(unsigned char *in, unsigned int in_size, 46 | unsigned int *out_size, int order); 47 | unsigned char *rans_uncompress_to_4x16(unsigned char *in, unsigned int in_size, 48 | unsigned char *out, unsigned int *out_size); 49 | unsigned char *rans_uncompress_4x16(unsigned char *in, unsigned int in_size, 50 | unsigned int *out_size); 51 | 52 | // CPU detection control. Used for testing and benchmarking. 53 | // These bitfields control what methods are permitted to be used. 54 | #define RANS_CPU_ENC_SSE4 (1<<0) 55 | #define RANS_CPU_ENC_AVX2 (2<<0) 56 | #define RANS_CPU_ENC_AVX512 (4<<0) 57 | #define RANS_CPU_ENC_NEON (8<<0) 58 | 59 | #define RANS_CPU_DEC_SSE4 (1<<8) 60 | #define RANS_CPU_DEC_AVX2 (2<<8) 61 | #define RANS_CPU_DEC_AVX512 (4<<8) 62 | #define RANS_CPU_DEC_NEON (8<<8) 63 | 64 | void rans_set_cpu(int opts); 65 | 66 | // "Order" byte options. ORed into the order byte. 67 | // The bottom bits are the order itself, currently 68 | // supporting order-0 and order-1 but with expansion room 69 | // up to order-3 (unlikely). 70 | 71 | //-- 72 | // The values below are stored in the file format 73 | 74 | // Pack 2,4,8 or infinite symbols into a byte. 75 | #define RANS_ORDER_PACK 0x80 76 | 77 | // Run length encoding with runs & lits encoded separately 78 | #define RANS_ORDER_RLE 0x40 79 | 80 | // Nop; for tiny segments where rANS overhead is too big 81 | #define RANS_ORDER_CAT 0x20 82 | 83 | // Don't store the original size; used by STRIPE mode 84 | #define RANS_ORDER_NOSZ 0x10 85 | 86 | // For N-byte integer data; rotate & encode N streams. 87 | #define RANS_ORDER_STRIPE 0x08 88 | 89 | // 32-way unrolling instead of 4-way 90 | #define RANS_ORDER_X32 0x04 91 | 92 | //-- 93 | // order values below are not directly part of the file format, but control 94 | // the behaviour of the encoder. 95 | 96 | // Bit 8-15 of order hold the stripe size (N). 97 | // Note: N is stored separately after the order byte 98 | 99 | // Used to disable order-0 in the STRIPE sub-methods. 100 | #define RANS_ORDER_STRIPE_NO0 (1<<16) 101 | 102 | // Used to request automatic selection between 4-way and 32-way 103 | #define RANS_ORDER_SIMD_AUTO (1<<17) 104 | 105 | #ifdef __cplusplus 106 | } 107 | #endif 108 | 109 | #endif /* RANS_STATIC4x16_H */ 110 | -------------------------------------------------------------------------------- /htscodecs/rle.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 Genome Research Ltd. 3 | * Author(s): James Bonfield 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above 12 | * copyright notice, this list of conditions and the following 13 | * disclaimer in the documentation and/or other materials provided 14 | * with the distribution. 15 | * 16 | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 17 | * Institute nor the names of its contributors may be used to endorse 18 | * or promote products derived from this software without specific 19 | * prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 22 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 25 | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #ifndef HTS_RLE_H 35 | #define HTS_RLE_H 36 | 37 | #ifdef __cplusplus 38 | extern "C" { 39 | #endif 40 | 41 | /* 42 | * Performs run length encoding of a byte stream, turning it into a 43 | * list of lengths and a list of literals. 44 | * 45 | * The method used is a bit different to traditional run length 46 | * encoding. It always outputs run-lengths for symbols in the 47 | * 'rle_syms' list (even if that length is +0 more), and never outputs 48 | * lengths for symbols not in that list. 49 | * 50 | * "run" should be preallocated to be large enough; 51 | * e.g at least data_len bytes long as a worse case. 52 | * "rle_syms" should be allocated to be at least 256 bytes. 53 | * 54 | * If *rle_nsyms is zero this function will survey the input data 55 | * first to choose symbols automatically, writing back to rle_syms and 56 | * rle_nsyms. 57 | * 58 | * The "out" buffer may be passed in as NULL in which case it is 59 | * allocated and returned (and is up to the caller to free). 60 | * Otherwise if specified as non-NULL it will be written to, but 61 | * it is up to the caller to ensure the buffer size is large enough. 62 | * A worst case scenario is 2*data_len. 63 | * 64 | * Returns the literal buffer on success with new length in out_len, 65 | * also fills out run buffer and run_len, and potentially 66 | * updates rle_syms / rle_nsyms too. 67 | * Returns NULL of failure 68 | */ 69 | uint8_t *hts_rle_encode(uint8_t *data, uint64_t data_len, 70 | uint8_t *run, uint64_t *run_len, 71 | uint8_t *rle_syms, int *rle_nsyms, 72 | uint8_t *out, uint64_t *out_len); 73 | 74 | /* 75 | * Expands a run lengthed data steam from a pair of literal and 76 | * run-length buffers. 77 | * 78 | * On input *out_len holds the length of the supplied out 79 | * buffer. On exit, it holds the used portion of this buffer. 80 | * 81 | * Returns uncompressed data (out) on success, 82 | * NULL on failure. 83 | */ 84 | uint8_t *hts_rle_decode(uint8_t *lit, uint64_t lit_len, 85 | uint8_t *run, uint64_t run_len, 86 | uint8_t *rle_syms, int rle_nsyms, 87 | uint8_t *out, uint64_t *out_len); 88 | 89 | // TODO: Add rle scanning func to compute rle_syms. 90 | 91 | #ifdef __cplusplus 92 | } 93 | #endif 94 | 95 | #endif /* HTS_RLE_H */ 96 | -------------------------------------------------------------------------------- /htscodecs/tokenise_name3.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017, 2019 Genome Research Ltd. 3 | * Author(s): James Bonfield 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above 12 | * copyright notice, this list of conditions and the following 13 | * disclaimer in the documentation and/or other materials provided 14 | * with the distribution. 15 | * 16 | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 17 | * Institute nor the names of its contributors may be used to endorse 18 | * or promote products derived from this software without specific 19 | * prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 22 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 25 | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #ifndef _TOKENISE_NAME3_H_ 35 | #define _TOKENISE_NAME3_H_ 36 | 37 | #ifdef __cplusplus 38 | extern "C" { 39 | #endif 40 | 41 | /* 42 | * Converts a line or \0 separated block of reading names to a compressed buffer. 43 | * The code can only encode whole lines and will not attempt a partial line. 44 | * Use the "last_start_p" return value to identify the partial line start 45 | * offset, for continuation purposes. 46 | * 47 | * Returns a malloced buffer holding compressed data of size *out_len, 48 | * or NULL on failure 49 | */ 50 | uint8_t *tok3_encode_names(char *blk, int len, int level, int use_arith, 51 | int *out_len, int *last_start_p); 52 | 53 | /* 54 | * Decodes a compressed block of read names into \0 separated names. 55 | * The size of the data returned (malloced) is in *out_len. 56 | * 57 | * Returns NULL on failure. 58 | */ 59 | uint8_t *tok3_decode_names(uint8_t *in, uint32_t sz, uint32_t *out_len); 60 | 61 | #ifdef __cplusplus 62 | } 63 | #endif 64 | 65 | #endif /* _TOKENISE_NAME3_H_ */ 66 | -------------------------------------------------------------------------------- /javascript/Makefile: -------------------------------------------------------------------------------- 1 | all: test 2 | 3 | CBIN=../build/tests 4 | NODE=node 5 | NODE_OPTS=--use-strict 6 | 7 | TESTS=test_r4x8 test_r4x16 test_arith test_fqzcomp test_tok3 8 | CORPUS=../tests 9 | 10 | modules: node_modules/bzip2 node_modules/minimist 11 | 12 | node_modules/bzip2: 13 | -mkdir node_modules 14 | npm install bzip2 15 | 16 | node_modules/minimist: 17 | -mkdir node_modules 18 | npm install minimist 19 | 20 | test check: modules ${TESTS} 21 | 22 | test_r4x8: 23 | @echo 24 | @echo === Checking r4x8 25 | @if [ ! -e ${CBIN}/rans4x8 ]; then echo "Set CBIN if you wish to validate against C version"; fi 26 | @for i in ${CORPUS}/dat/r4x8/*; do \ 27 | echo $$i;\ 28 | base=`echo $$i | sed 's/\.[0-9]*$$//;s#/q#/../q#'`; \ 29 | level=`echo $$i | sed 's/.*\.//'`;\ 30 | # Decode predefined data set\ 31 | a=`${NODE} ${NODE_OPTS} main_rans.js -d -r $$i 2>/dev/null | md5sum`; \ 32 | b=`cut -f 1 < $$base | tr -d '\012' | md5sum`; \ 33 | test "$$a" = "$$b" || echo $<: Mismatch for $$i; \ 34 | # Round trip. \ 35 | cut -f 1 < $$base | tr -d '\012' > _nonl; \ 36 | ${NODE} ${NODE_OPTS} main_rans.js -o $$level -r _nonl 2>/dev/null > _; \ 37 | a=`${NODE} ${NODE_OPTS} main_rans.js -d -r _ 2>/dev/null | md5sum`; \ 38 | test "$$a" = "$$b" || echo $<: Fail round-trip for $$base with level $$level; \ 39 | if [ -e ${CBIN}/rans4x8 ]; \ 40 | then \ 41 | a=`${CBIN}/rans4x8 -d -r < _ 2>/dev/null | tr '\000' '\012' | md5sum`; \ 42 | test "$$a" = "$$b" || echo $<: Fail JS to C round-trip for $$base; \ 43 | fi; \ 44 | done 45 | 46 | test_r4x16: 47 | @echo 48 | @echo === Checking r4x16 49 | @if [ ! -e ${CBIN}/rans4x16pr ]; then echo "Set CBIN if you wish to validate against C version"; fi 50 | @for i in ${CORPUS}/dat/r4x16/*; do \ 51 | echo $$i;\ 52 | base=`echo $$i | sed 's/\.[0-9]*$$//;s#/q#/../q#'`; \ 53 | level=`echo $$i | sed 's/.*\.//'`;\ 54 | # Decode predefined data set\ 55 | a=`${NODE} ${NODE_OPTS} main_rans4x16.js -d -r $$i 2>/dev/null | md5sum`; \ 56 | b=`cut -f 1 < $$base | tr -d '\012' | md5sum`; \ 57 | test "$$a" = "$$b" || echo $<: Mismatch for $$i; \ 58 | # Round trip. \ 59 | cut -f 1 < $$base | tr -d '\012' > _nonl; \ 60 | ${NODE} ${NODE_OPTS} main_rans4x16.js -o $$level -r _nonl 2>/dev/null > _; \ 61 | a=`${NODE} ${NODE_OPTS} main_rans4x16.js -d -r _ 2>/dev/null | md5sum`; \ 62 | test "$$a" = "$$b" || echo $<: Fail round-trip for $$base with level $$level; \ 63 | if [ -e ${CBIN}/rans4x16pr ]; \ 64 | then \ 65 | a=`${CBIN}/rans4x16pr -d -r < _ 2>/dev/null | tr '\000' '\012' | md5sum`; \ 66 | test "$$a" = "$$b" || echo $<: Fail JS to C round-trip for $$base; \ 67 | fi; \ 68 | done 69 | 70 | test_arith: 71 | @echo 72 | @echo === Checking arith 73 | @if [ ! -e ${CBIN}/arith_dynamic ]; then echo "Set CBIN if you wish to validate against C version"; fi 74 | @for i in ${CORPUS}/dat/arith/*; do \ 75 | echo $$i;\ 76 | base=`echo $$i | sed 's/\.[0-9]*$$//;s#/q#/../q#'`; \ 77 | level=`echo $$i | sed 's/.*\.//'`;\ 78 | # Decode predefined data set\ 79 | a=`${NODE} ${NODE_OPTS} main_arith_gen.js -d -r $$i 2>/dev/null | md5sum`; \ 80 | b=`cut -f 1 < $$base | tr -d '\012' | md5sum`; \ 81 | test "$$a" = "$$b" || echo $<: Mismatch for $$i; \ 82 | # Round trip. \ 83 | cut -f 1 < $$base | tr -d '\012' > _nonl; \ 84 | ${NODE} ${NODE_OPTS} main_arith_gen.js -o $$level -r _nonl 2>/dev/null > _; \ 85 | a=`${NODE} ${NODE_OPTS} main_arith_gen.js -d -r _ 2>/dev/null | md5sum`; \ 86 | test "$$a" = "$$b" || echo $<: Fail round-trip for $$base with level $$level; \ 87 | if [ -e ${CBIN}/arith_dynamic ]; \ 88 | then \ 89 | a=`${CBIN}/arith_dynamic -d -r < _ 2>/dev/null | tr '\000' '\012' | md5sum`; \ 90 | test "$$a" = "$$b" || echo $<: Fail JS to C round-trip for $$base; \ 91 | fi; \ 92 | done 93 | 94 | test_fqzcomp: 95 | @echo 96 | @echo === Checking fqzcomp 97 | @if [ ! -e ${CBIN}/fqzcomp_qual ]; then echo "Set CBIN if you wish to validate against C version"; fi 98 | @for i in ${CORPUS}/dat/fqzcomp/q*; do \ 99 | echo $$i;\ 100 | base=`echo $$i | sed 's/\.[0-9]*$$//;s#/q#/../q#'`; \ 101 | level=`echo $$i | sed 's/.*\.//'`;\ 102 | # Decode predefined data set\ 103 | a=`${NODE} ${NODE_OPTS} main_fqzcomp.js -d -r $$i 2>/dev/null | md5sum`; \ 104 | b=`awk '{print $$1}' $$base | md5sum`; \ 105 | test "$$a" = "$$b" || echo $<: Mismatch for $$i; \ 106 | # Round trip. \ 107 | ${NODE} ${NODE_OPTS} main_fqzcomp.js -s $$level -r $$base 2>/dev/null > _; \ 108 | a=`${NODE} ${NODE_OPTS} main_fqzcomp -d -r _ 2>/dev/null | md5sum`; \ 109 | test "$$a" = "$$b" || echo $<: Fail round-trip for $$base with level $$level; \ 110 | if [ -e ${CBIN}/fqzcomp_qual ]; \ 111 | then \ 112 | a=`${CBIN}/fqzcomp_qual -d -r < _ 2>/dev/null | tr '\000' '\012' | md5sum`; \ 113 | test "$$a" = "$$b" || echo $<: Fail JS to C round-trip for $$base; \ 114 | fi; \ 115 | done 116 | 117 | test_tok3: 118 | @echo 119 | @echo === Checking tok3 120 | @if [ ! -e ${CBIN}/tokenise_name3 ]; then echo "Set CBIN if you wish to validate against C version"; fi 121 | @for base in ${CORPUS}/names/*.names; do \ 122 | echo -n "$$base ";\ 123 | ${NODE} ${NODE_OPTS} main_tok3.js -a -r $$base 2>/dev/null > _; \ 124 | a=`${NODE} ${NODE_OPTS} main_tok3 -d -r _ 2>/dev/null | md5sum`; \ 125 | cat _ | wc -c;\ 126 | b=`cat $$base | md5sum`; \ 127 | test "$$a" = "$$b" || echo $<: Fail round-trip for $$base; \ 128 | if [ -e ${CBIN}/tokenise_name3 ]; \ 129 | then \ 130 | a=`${CBIN}/tokenise_name3 -d -r < _ | tr '\000' '\012' | md5sum`; \ 131 | test "$$a" = "$$b" || echo $<: Fail JS to C round-trip for $$base; \ 132 | fi; \ 133 | done; 134 | @for i in ${CORPUS}/names/tok3/*; do \ 135 | echo $$i;\ 136 | base=`echo $$i | sed 's/\.[0-9]*$$//;s#/tok3##'`; \ 137 | level=`echo $$i | sed 's/.*\.//'`;\ 138 | # Decode predefined data set\ 139 | a=`${NODE} ${NODE_OPTS} main_tok3 -d -r $$i 2>/dev/null | md5sum`; \ 140 | b=`cat $$base | md5sum`; \ 141 | test "$$a" = "$$b" || echo $<: Mismatch for $$i; \ 142 | done; 143 | -------------------------------------------------------------------------------- /javascript/README.md: -------------------------------------------------------------------------------- 1 | Reference implementation files 2 | ============================== 3 | 4 | This directory contains javascript implementations of the custom 5 | codecs using in CRAM 3.1, capable of being run under node.js. 6 | 7 | These is not written for speed, but for clarity and as an exercise in 8 | checking the pseudocode in the CRAM specification. It is written as 9 | close to this pseudocode as is possible. 10 | 11 | 12 | Prerequisites: minimist package for command line parsing and bzip2 for 13 | part of the arith_gen.js code. 14 | 15 | npm install minimist 16 | npm install bzip2 17 | 18 | 19 | iostream.js 20 | ----------- 21 | 22 | Makes a buffer appear to be a stream with ReadByte, ReadITF8, etc 23 | functions. 24 | 25 | 26 | rans.js 27 | ------- 28 | 29 | Implements the order-0 and order-1 rans (4x8) decoder as used in CRAM3.0. 30 | 31 | 32 | main_rans.js 33 | ------------ 34 | 35 | A command line tool to exercise the rans.js code, included for debug 36 | purposes. 37 | 38 | 39 | rans4x16.js, main_rans4x16.js 40 | ----------------------------- 41 | 42 | A 16-bit renormalising variant of rANS above. This also includes 43 | transforms for RLE, bit-packing and 4-way interleaving. 44 | 45 | 46 | arith_sh.js 47 | ----------- 48 | 49 | Arithmetic (range) coding with Schindler carry handling. 50 | 51 | byte_model.js 52 | ------------- 53 | 54 | An adaptive model for keeping track of symbol frequencies. 55 | 56 | arith_gen.js, main_arith_gen.js 57 | ------------------------------- 58 | 59 | Wrapper around arith_sh.js to perform order-0/1 encoding with RLE and 60 | bit-packing. Plus debug command line tool 61 | 62 | 63 | fqzcomp.js, main_fqzcomp.js 64 | --------------------------- 65 | 66 | Implements the fqzcomp quality compression codec. Plus debug command 67 | line tool. 68 | 69 | 70 | tok3.js, main_tok3.js 71 | --------------------- 72 | 73 | Implements the tokenise_name3 read identifier compression codec. 74 | Plus debug command line tool. 75 | 76 | 77 | Testing 78 | ======= 79 | 80 | The various main js files can be used for adhoc testing. There is 81 | also a Makefile which performs checks against known defined data 82 | streams and does round-trip testing in both Javascript and if compiled 83 | the C variant. You can set CORPUS make variable to a larger data set 84 | such htscodecs-corpus. 85 | 86 | eg. 87 | 88 | make check CORPUS=../tests/htscodecs-corpus/ 89 | -------------------------------------------------------------------------------- /javascript/arith_sh.js: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 Genome Research Ltd. 3 | * Author(s): James Bonfield 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above 12 | * copyright notice, this list of conditions and the following 13 | * disclaimer in the documentation and/or other materials provided 14 | * with the distribution. 15 | * 16 | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 17 | * Institute nor the names of its contributors may be used to endorse 18 | * or promote products derived from this software without specific 19 | * prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 22 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 25 | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | // An arithmetic coder, based on Eugene Shelwien's reimplementation of 35 | // Michael Schindler range coder. 36 | // 37 | // Order-0 byte stream of ~/scratch/data/q40b 38 | // C: 3.1s decode (approx same vs 32-bit and 64-bit) 39 | // Arith_sh.js 6.7s decode (32-bit with carries) 40 | // Arith.js 317.0s decode (64-bit no carries); int64 crippling it. 41 | 42 | //---------------------------------------------------------------------- 43 | // Arithmetic (range) coder 44 | module.exports = class RangeCoder { 45 | constructor(src) { 46 | this.low = 0; 47 | this.range = 0xffffffff; 48 | this.code = 0; 49 | this.FFnum = 0; 50 | this.carry = 0; 51 | this.cache = 0; 52 | } 53 | 54 | RangeStartDecode(src) { 55 | for (var i = 0; i < 5; i++) 56 | this.code = (this.code << 8) + src.ReadByte(); 57 | this.code &= 0xffffffff; 58 | this.code >>>= 0; // force to be +ve int 59 | } 60 | 61 | RangeGetFrequency(tot_freq) { 62 | this.range = Math.floor(this.range / tot_freq); 63 | //return this.code / this.range; 64 | return Math.floor(this.code / this.range); 65 | 66 | // Conceptual scenario; return freq only and don't modify range yet 67 | //return Math.floor(this.code / (Math.floor(this.range / tot_freq))); 68 | } 69 | 70 | RangeDecode(src, sym_low, sym_freq, tot_freq) { 71 | // Conceptually we divide range here, but in practice we cached it earlier 72 | //this.range = Math.floor(this.range / tot_freq); 73 | 74 | this.code -= sym_low * this.range; 75 | this.range *= sym_freq; 76 | 77 | while (this.range < (1<<24)) { 78 | this.range *= 256; 79 | this.code = (this.code*256 + src.ReadByte()); 80 | } 81 | } 82 | 83 | RangeShiftLow(dst) { 84 | // We know range is < (1<<24) as we got here. We already have a 85 | // cached copy of 8 bits from low. Is this correct, or does it need 86 | // fixing? Possible scenarios. 87 | // 1. Low < 0xff000000 thus low+range < 0xffffffff and cache 88 | // cannot possibly change. Output cache and as many ffs as needed. 89 | // 2. We already detected an overflow in RangeEncode, setting carry. 90 | // In this case output cached byte + 1 and any 00s needed. 91 | // 3. Neither case - range is low but we haven't yet detected if we're 92 | // XXffffff or XY000000 scenario. Increase counter for ff/00s. 93 | 94 | if (this.low < 0xff000000 | this.carry) { 95 | // cached byte if no overflow, byte+1 otherwise 96 | dst.WriteByte(this.cache + this.carry); 97 | 98 | // Flush any tracked FFs (no carry) or 00s (carry). 99 | while (this.FFnum) { 100 | dst.WriteByte(this.carry-1); 101 | this.FFnum--; 102 | } 103 | 104 | // Take a copy of top byte ready for next flush 105 | this.cache = this.low >>> 24; 106 | this.carry = 0; 107 | } else { 108 | this.FFnum++; // keep track of number of trailing ff/00 bytes to write 109 | } 110 | this.low <<= 8; 111 | this.low >>>= 0; // force to be +ve int 112 | } 113 | 114 | RangeEncode(dst, sym_low, sym_freq, tot_freq) { 115 | var old_low = this.low 116 | this.range = Math.floor(this.range / tot_freq) 117 | this.low += sym_low * this.range; 118 | this.low >>>= 0; // Truncate to +ve int so we can spot overflow 119 | this.range *= sym_freq; 120 | 121 | // "low + sym*range < old_low" means we overflow; set carry. 122 | // NB: can this.low < old_low occur twice before range < (1<<24)? 123 | // We claim not, but prove it! 124 | if (this.low < old_low) { 125 | if (this.carry != 0) console.log("ERROR: Multiple carry") 126 | this.carry = 1 127 | } 128 | 129 | // Renormalise if range gets too small 130 | while (this.range < (1<<24)) { 131 | this.range *= 256; 132 | this.RangeShiftLow(dst); 133 | } 134 | } 135 | 136 | RangeFinishEncode(dst) { 137 | for (var i = 0; i < 5; i++) 138 | this.RangeShiftLow(dst) 139 | } 140 | }; 141 | -------------------------------------------------------------------------------- /javascript/byte_model.js: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 Genome Research Ltd. 3 | * Author(s): James Bonfield 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above 12 | * copyright notice, this list of conditions and the following 13 | * disclaimer in the documentation and/or other materials provided 14 | * with the distribution. 15 | * 16 | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 17 | * Institute nor the names of its contributors may be used to endorse 18 | * or promote products derived from this software without specific 19 | * prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 22 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 25 | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | // An adaptive probability model for encoding and decoding of symbols 35 | // within a given alphabet, using the range coder to get/put the 36 | // compressed data. 37 | 38 | const MAX_FREQ = ((1<<16)-17) 39 | const STEP = 16 40 | 41 | module.exports = class ByteModel { 42 | constructor(max_sym = 256) { 43 | this.total_freq = max_sym; 44 | this.max_sym = max_sym-1; 45 | this.S = new Array 46 | this.F = new Array 47 | 48 | for (var i = 0; i <= this.max_sym; i++) { 49 | this.S[i] = i; 50 | this.F[i] = 1; 51 | } 52 | } 53 | 54 | ModelDecode(src, rc) { 55 | // Find symbol 56 | var freq = rc.RangeGetFrequency(this.total_freq); 57 | 58 | // Linear scan to find cumulative frequency 'freq' 59 | var acc = 0; 60 | var x = 0; 61 | while (acc + this.F[x] <= freq) 62 | acc += this.F[x++]; 63 | 64 | // for (var acc = 0; (acc += this.F[x]) <= freq; x++) 65 | // ; 66 | // acc -= this.F[x]; 67 | 68 | // Update range coder 69 | rc.RangeDecode(src, acc, this.F[x], this.total_freq); 70 | 71 | // Update model 72 | this.F[x] += STEP; 73 | this.total_freq += STEP; 74 | if (this.total_freq > MAX_FREQ) 75 | this.ModelRenormalise(); 76 | 77 | 78 | // Keep symbols approximately frequency sorted 79 | var sym = this.S[x]; 80 | if (x > 0 && this.F[x] > this.F[x-1]) { 81 | var tmp = this.F[x]; 82 | this.F[x] = this.F[x-1]; 83 | this.F[x-1] = tmp; 84 | 85 | tmp = this.S[x]; 86 | this.S[x] = this.S[x-1]; 87 | this.S[x-1] = tmp; 88 | } 89 | 90 | return sym; 91 | } 92 | 93 | ModelRenormalise() { 94 | // Halve all the frequencies, being careful not to hit zero 95 | this.total_freq = 0; 96 | for (var i = 0; i <= this.max_sym; i++) { 97 | this.F[i] -= Math.floor(this.F[i] / 2); 98 | this.total_freq += this.F[i]; 99 | } 100 | } 101 | 102 | ModelEncode(dst, rc, sym) { 103 | // Find cumulative frequency 104 | var acc = 0; 105 | for (var x = 0; this.S[x] != sym; x++) 106 | acc += this.F[x]; 107 | 108 | // Encode 109 | rc.RangeEncode(dst, acc, this.F[x], this.total_freq); 110 | 111 | // Update model 112 | this.F[x] += STEP; 113 | this.total_freq += STEP; 114 | if (this.total_freq > MAX_FREQ) // FIXME x2 115 | this.ModelRenormalise(); 116 | 117 | // Keep symbols approximately frequency sorted 118 | var sym = this.S[x]; 119 | if (x > 0 && this.F[x] > this.F[x-1]) { 120 | var tmp = this.F[x]; 121 | this.F[x] = this.F[x-1]; 122 | this.F[x-1] = tmp; 123 | 124 | tmp = this.S[x]; 125 | this.S[x] = this.S[x-1]; 126 | this.S[x-1] = tmp; 127 | } 128 | } 129 | }; 130 | -------------------------------------------------------------------------------- /javascript/index.js: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020 Genome Research Ltd. 3 | * Author(s): James Bonfield 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above 12 | * copyright notice, this list of conditions and the following 13 | * disclaimer in the documentation and/or other materials provided 14 | * with the distribution. 15 | * 16 | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 17 | * Institute nor the names of its contributors may be used to endorse 18 | * or promote products derived from this software without specific 19 | * prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 22 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 25 | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | // This is an interface to the htscodecs reference implementation of 35 | // the CRAM 3.1 codecs. 36 | 37 | // This JavaScript file is not part of the reference implementation 38 | // and is simply and interface to get a consistent interface for cram-js. 39 | 40 | "use strict"; 41 | 42 | var r4x8 = require('./rans'); 43 | var r4x16 = require('./rans4x16'); 44 | var arith = require('./arith_gen'); 45 | var fqzcomp = require('./fqzcomp'); 46 | var tok3 = require('./tok3'); 47 | 48 | function r4x8_uncompress(inputBuffer, outputBuffer) { 49 | r4x8.decode(inputBuffer).copy(outputBuffer, 0, 0); 50 | } 51 | 52 | function r4x16_uncompress(inputBuffer, outputBuffer) { 53 | r4x16.decode(inputBuffer).copy(outputBuffer, 0, 0); 54 | } 55 | 56 | function arith_uncompress(inputBuffer, outputBuffer) { 57 | new arith().decode(inputBuffer).copy(outputBuffer, 0, 0); 58 | } 59 | 60 | function fqzcomp_uncompress(inputBuffer, outputBuffer) { 61 | var q_lens = new Array 62 | fqzcomp.decode(inputBuffer, q_lens).copy(outputBuffer, 0, 0); 63 | } 64 | 65 | function tok3_uncompress(inputBuffer, outputBuffer) { 66 | // Returns in string form instead of buffer 67 | var out = tok3.decode(inputBuffer, 0, '\0'); 68 | Buffer.from(out, 'binary').copy(outputBuffer, 0, 0); 69 | } 70 | 71 | module.exports = { 72 | r4x8_uncompress: r4x8_uncompress, 73 | r4x16_uncompress: r4x16_uncompress, 74 | arith_uncompress: arith_uncompress, 75 | fqzcomp_uncompress: fqzcomp_uncompress, 76 | tok3_uncompress: tok3_uncompress, 77 | }; 78 | -------------------------------------------------------------------------------- /javascript/main_arith_gen.js: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019,2020 Genome Research Ltd. 3 | * Author(s): James Bonfield 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above 12 | * copyright notice, this list of conditions and the following 13 | * disclaimer in the documentation and/or other materials provided 14 | * with the distribution. 15 | * 16 | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 17 | * Institute nor the names of its contributors may be used to endorse 18 | * or promote products derived from this software without specific 19 | * prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 22 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 25 | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | // Command line tool to manually test the arith_gen.js code. 35 | 36 | var fs = require("fs"); 37 | var RangeCoderGen = require("./arith_gen"); 38 | 39 | var argv = require('minimist')(process.argv.slice(2), { boolean: ["d", "r"] }); 40 | 41 | if (argv._.length != 1) { 42 | process.stderr.write("Usage: node main_arith_gen.js [-d] [-o order] input-file > output-file\n"); 43 | process.exit(1); 44 | } 45 | 46 | var filein = argv._[0] 47 | 48 | var buf = fs.readFileSync(filein); 49 | var blk_size = 1024*1024; 50 | var raw = argv.r 51 | 52 | var arith = new RangeCoderGen() 53 | if (!argv.d) { 54 | var order = argv.o != undefined ? argv.o : 0; 55 | // -o8.4 => 8+(256*4) 56 | order += Math.round((order - (order>>0))*10)*256 57 | var pos = 0; 58 | var out_len = 0; 59 | if (raw) 60 | blk_size = buf.length 61 | while (pos < buf.length) { 62 | var buf2 = arith.encode(buf.slice(pos, pos+blk_size), order); 63 | 64 | // Compressed buffer size. Used in multi-block format. 65 | var csize = new Buffer.allocUnsafe(4); 66 | if (!raw) { 67 | csize.writeInt32LE(buf2.length, 0); 68 | process.stdout.write(csize) 69 | } 70 | 71 | // Write compressed buffer itself 72 | process.stdout.write(buf2) 73 | 74 | pos += blk_size; 75 | out_len += buf2.length; 76 | } 77 | process.stderr.write("Compress order "+order+", "+buf.length+" => " + out_len + "\n"); 78 | 79 | } else { 80 | var pos = 0; 81 | var out_len = 0; 82 | var len = buf.length 83 | while (pos < buf.length) { 84 | if (!raw) { 85 | len = buf.readInt32LE(pos); 86 | pos += 4; 87 | } 88 | var buf2 = arith.decode(buf.slice(pos, pos+len)); 89 | process.stdout.write(buf2); 90 | out_len += buf2.length; 91 | pos += len; 92 | } 93 | process.stderr.write("Decompress " + buf.length + " => " + out_len + "\n"); 94 | } 95 | -------------------------------------------------------------------------------- /javascript/main_fqzcomp.js: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019,2020 Genome Research Ltd. 3 | * Author(s): James Bonfield 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above 12 | * copyright notice, this list of conditions and the following 13 | * disclaimer in the documentation and/or other materials provided 14 | * with the distribution. 15 | * 16 | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 17 | * Institute nor the names of its contributors may be used to endorse 18 | * or promote products derived from this software without specific 19 | * prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 22 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 25 | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | var fs = require("fs"); 35 | var fqz = require("./fqzcomp"); 36 | var argv = require('minimist')(process.argv.slice(2), { boolean: ["d", "r"] }); 37 | 38 | if (argv._.length != 1) { 39 | process.stderr.write("Usage: node main_fqzcomp.js [-d] input-file > output-file\n"); 40 | process.exit(1); 41 | } 42 | 43 | var filein = argv._[0] 44 | 45 | var buf = fs.readFileSync(filein); 46 | var raw = argv.r 47 | 48 | if (!argv.d) { 49 | // Line breaks to get sequence length, but then stitch together into 50 | // a single non-breaking buffer. 51 | var len = 0; 52 | var j = 0; 53 | var q_lens = new Array 54 | var q_dirs = new Array 55 | var q_len = 0 56 | for (var i = 0; i < buf.length; i++) { 57 | if (buf[i] == "\n".charCodeAt(0) || buf[i] == "\t".charCodeAt(0)) { 58 | q_lens.push(len) 59 | if (q_len == 0) 60 | q_len = len 61 | else if (q_len != len) 62 | q_len = -1 // marker for multiple lengths 63 | len = 0; 64 | 65 | if (buf[i] == "\t".charCodeAt(0)) { 66 | // parse 2nd token for read1/read2 status 67 | var dir = "" 68 | for (i++; i < buf.length && buf[i] != "\n".charCodeAt(0); i++) 69 | dir += String.fromCharCode(buf[i]) 70 | q_dirs.push(dir) 71 | } else { 72 | q_dirs.push(0) 73 | } 74 | } else { 75 | buf[j++] = buf[i] - 33; // ASCII phred to raw 76 | len++; 77 | } 78 | } 79 | buf = buf.slice(0, j) 80 | if (q_len > 0) 81 | q_lens = [q_lens[0]] 82 | 83 | var buf2 = fqz.encode(buf, q_lens, q_dirs); 84 | process.stderr.write("Compress " +buf.length + " => " + buf2.length + "\n"); 85 | if (!raw) { 86 | var hdr = new Buffer.allocUnsafe(8); 87 | hdr.writeInt32LE(buf.length, 0); 88 | hdr.writeInt32LE(buf2.length, 4); 89 | process.stdout.write(hdr); 90 | } 91 | process.stdout.write(buf2); 92 | 93 | } else { 94 | var q_lens = new Array 95 | // Consume ulen and clen from outer test harness (pointless as non-blocking atm) 96 | var buf2 97 | if (raw) 98 | buf2 = fqz.decode(buf, q_lens); 99 | else 100 | buf2 = fqz.decode(buf.slice(8), q_lens); 101 | 102 | // Split into newlines so we can do easy data comparison 103 | var buf3 = new Buffer.allocUnsafe(buf2.length + q_lens.length) 104 | var rec = 0; 105 | var len = q_lens[rec++] 106 | var j = 0; 107 | for (var i = 0; i < buf2.length; i++) { 108 | buf3[j++] = buf2[i] + 33; 109 | if (--len == 0) { 110 | buf3[j++] = "\n".charCodeAt(0) 111 | len = q_lens[rec++] 112 | } 113 | } 114 | 115 | process.stderr.write("Decompress " + buf.length + " => " + buf3.length + "\n"); 116 | process.stdout.write(buf3); 117 | } 118 | -------------------------------------------------------------------------------- /javascript/main_rans.js: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-2020 Genome Research Ltd. 3 | * Author(s): James Bonfield 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above 12 | * copyright notice, this list of conditions and the following 13 | * disclaimer in the documentation and/or other materials provided 14 | * with the distribution. 15 | * 16 | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 17 | * Institute nor the names of its contributors may be used to endorse 18 | * or promote products derived from this software without specific 19 | * prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 22 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 25 | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | var fs = require("fs"); 35 | var rans = require("./rans"); 36 | var argv = require('minimist')(process.argv.slice(2), { boolean: ["d", "r"] }); 37 | 38 | if (argv._.length != 1) { 39 | process.stderr.write("Usage: node main_rans.js [-d] [-o order] input-file > output-file\n"); 40 | process.exit(1); 41 | } 42 | 43 | var filein = argv._[0] 44 | 45 | var buf = fs.readFileSync(filein); 46 | var blk_size = 1024*1024; 47 | var raw = argv.r 48 | 49 | if (!argv.d) { 50 | var order = argv.o != undefined ? argv.o : 0; 51 | var pos = 0; 52 | var out_len = 0; 53 | if (raw) 54 | blk_size = buf.length 55 | while (pos < buf.length) { 56 | var buf2 = rans.encode(buf.slice(pos, pos+blk_size), order); 57 | var header = new Buffer.allocUnsafe(5); 58 | if (!raw) { 59 | header[0] = order; 60 | header.writeInt32LE(buf2.length, 1); 61 | process.stdout.write(header) 62 | } 63 | process.stdout.write(buf2) 64 | pos += blk_size; 65 | out_len += buf2.length; 66 | } 67 | process.stderr.write("Compress order "+order+", "+buf.length+" => " + out_len + "\n"); 68 | 69 | } else { 70 | var pos = 0; 71 | var out_len = 0; 72 | var len = buf.length 73 | while (pos < buf.length) { 74 | if (!raw) { 75 | var order = buf[0]; 76 | pos++; 77 | len = buf.readInt32LE(pos); 78 | pos += 4; 79 | } 80 | var buf2 = rans.decode(buf.slice(pos, pos+len)); 81 | process.stdout.write(buf2) 82 | out_len += buf2.length; 83 | pos += len; 84 | } 85 | process.stderr.write("Decompress " + buf.length + " => " + out_len + "\n"); 86 | } 87 | -------------------------------------------------------------------------------- /javascript/main_rans4x16.js: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019-2020 Genome Research Ltd. 3 | * Author(s): James Bonfield 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above 12 | * copyright notice, this list of conditions and the following 13 | * disclaimer in the documentation and/or other materials provided 14 | * with the distribution. 15 | * 16 | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 17 | * Institute nor the names of its contributors may be used to endorse 18 | * or promote products derived from this software without specific 19 | * prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 22 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 25 | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | var fs = require("fs"); 35 | var rans = require("./rans4x16"); 36 | var argv = require('minimist')(process.argv.slice(2), { boolean: ["d", "r"] }); 37 | 38 | if (argv._.length != 1) { 39 | processf.stderr.write("Usage: node main_rans4x16.js [-d] [-o order] input-file > output-file\n"); 40 | process.exit(1); 41 | } 42 | 43 | var filein = argv._[0] 44 | 45 | var buf = fs.readFileSync(filein); 46 | var blk_size = 1024*1024; 47 | var raw = argv.r 48 | 49 | if (!argv.d) { 50 | var order = argv.o != undefined ? argv.o : 0; 51 | // -o8.4 => 8+(256*4) 52 | order += Math.round((order - (order>>0))*10)*256 53 | var pos = 0; 54 | var out_len = 0; 55 | if (raw) 56 | blk_size = buf.length 57 | while (pos < buf.length) { 58 | var buf2 = rans.encode(buf.slice(pos, pos+blk_size), order); 59 | var header = new Buffer.allocUnsafe(4); 60 | if (!raw) { 61 | header.writeInt32LE(buf2.length, 0); 62 | process.stdout.write(header) 63 | } 64 | process.stdout.write(buf2) 65 | pos += blk_size; 66 | out_len += buf2.length+4; 67 | } 68 | process.stderr.write("Compress order "+order+", "+buf.length+" => " + out_len + "\n"); 69 | 70 | } else { 71 | var pos = 0; 72 | var out_len = 0; 73 | var len = buf.length 74 | while (pos < buf.length) { 75 | if (!raw) { 76 | len = buf.readInt32LE(pos); 77 | pos += 4; 78 | } 79 | var buf2 = rans.decode(buf.slice(pos, pos+len)); 80 | process.stdout.write(buf2) 81 | out_len += buf2.length; 82 | pos += len; 83 | } 84 | process.stderr.write("Decompress " + buf.length + " => " + out_len + "\n"); 85 | } 86 | -------------------------------------------------------------------------------- /javascript/main_tok3.js: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019,2020 Genome Research Ltd. 3 | * Author(s): James Bonfield 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above 12 | * copyright notice, this list of conditions and the following 13 | * disclaimer in the documentation and/or other materials provided 14 | * with the distribution. 15 | * 16 | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 17 | * Institute nor the names of its contributors may be used to endorse 18 | * or promote products derived from this software without specific 19 | * prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 22 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 25 | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | var fs = require("fs"); 35 | var tok3 = require("./tok3"); 36 | var argv = require('minimist')(process.argv.slice(2), { boolean: ["d","a", "r"] }); 37 | 38 | if (argv._.length != 1) { 39 | process.stderr.write("Usage: node main_tok3.js [-a] [-d] input-file > output-file\n"); 40 | process.exit(1); 41 | } 42 | 43 | var filein = argv._[0] 44 | 45 | var buf = fs.readFileSync(filein); 46 | var blk_size = 1024*1024; 47 | var raw = argv.r 48 | 49 | if (!argv.d) { 50 | var pos = 0; 51 | var out_len = 0; 52 | if (raw) 53 | blk_size = buf.length 54 | while (pos < buf.length) { 55 | var blk_end = blk_size; 56 | while (pos+blk_end < buf.length && buf[pos+blk_end-1] != 10) 57 | blk_end--; 58 | var buf2 = tok3.encode(buf.slice(pos, pos+blk_end), argv.a); 59 | var header = new Buffer.allocUnsafe(4); 60 | if (!raw) { 61 | header.writeInt32LE(buf2.length, 0); 62 | process.stdout.write(header) 63 | } 64 | process.stdout.write(buf2) 65 | pos += blk_end; 66 | out_len += buf2.length+4; 67 | } 68 | process.stderr.write("Compress "+buf.length+" => " + out_len + "\n"); 69 | 70 | } else { 71 | var pos = 0; 72 | var out_len = 0; 73 | var len = buf.length 74 | while (pos < buf.length) { 75 | if (!raw) { 76 | len = buf.readInt32LE(pos); 77 | pos += 4; 78 | } 79 | var buf2 = tok3.decode(buf.slice(pos, pos+len), len); 80 | process.stdout.write(buf2) 81 | out_len += buf2.length; 82 | pos += len; 83 | } 84 | process.stderr.write("Decompress " + buf.length + " => " + out_len + "\n"); 85 | } 86 | -------------------------------------------------------------------------------- /m4/ax_search_libs_rev.m4: -------------------------------------------------------------------------------- 1 | # The idea is the same used as in AC_SEARCH_LIB, but unlike that the 2 | # no-library case is the last scenario instead of the first one. 3 | # The reason for this is to handle scenarios where the external library 4 | # is preferred over the internal C library implementation. An example of 5 | # this is FreeBSD's pthread functionality, which has stub functions in the C 6 | # library that do not work. See htscodecs issue#64 7 | 8 | # AX_SEARCH_LIBS_REV(FUNCTION, SEARCH-LIB-PATH 9 | AC_DEFUN([AX_SEARCH_LIBS_REV], 10 | [ 11 | dnl Create an input to test linking a file calling $1. 12 | dnl Used by AC_LINK_IFELSE. 13 | AC_LANG_CONFTEST([AC_LANG_CALL([], [$1])]) 14 | _found=no 15 | _LIBS=$LIBS 16 | AC_MSG_CHECKING([for function $1]) 17 | for xlib in $2 "" 18 | do 19 | if test "x$xlib" != "x" 20 | then 21 | LIBS="-l$xlib $_LIBS" 22 | _res="-l$xlib" 23 | else 24 | LIBS="$_LIBS" 25 | _res="(no library needed)" 26 | fi 27 | 28 | AC_LINK_IFELSE([], [_found=yes; break]) 29 | done 30 | 31 | if test "$_found" = "yes" 32 | then 33 | AC_MSG_RESULT([$_res]) 34 | else 35 | AC_MSG_RESULT([not found]) 36 | AC_MSG_ERROR([Function $1 not found]) 37 | fi 38 | 39 | unset _found 40 | unset _res 41 | unset _LIBS 42 | ]) 43 | -------------------------------------------------------------------------------- /m4/hts_check_compile_flags_needed.m4: -------------------------------------------------------------------------------- 1 | # hts_check_compile_flags_needed.m4 2 | # 3 | # SYNOPSIS 4 | # 5 | # HTS_CHECK_COMPILE_FLAGS_NEEDED(FEATURE, FLAGS, [INPUT], [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS]) 6 | # 7 | # DESCRIPTION 8 | # 9 | # Check whether the given FLAGS are required to build and link INPUT with 10 | # the current language's compiler. Compilation and linking are first 11 | # tries without FLAGS. If that fails it then tries to compile and 12 | # link again with FLAGS. 13 | # 14 | # FEATURE describes the feature being tested, and is used when printing 15 | # messages and to name the cache entry (along with the tested flags). 16 | # 17 | # ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on 18 | # success/failure. In ACTION-SUCCESS, $flags_needed will be set to 19 | # either an empty string or FLAGS depending on the test results. 20 | # 21 | # If EXTRA-FLAGS is defined, it is added to the current language's default 22 | # flags (e.g. CFLAGS) when the check is done. The check is thus made with 23 | # the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to 24 | # force the compiler to issue an error when a bad flag is given. 25 | # 26 | # If omitted, INPUT defaults to AC_LANG_PROGRAM(), although that probably 27 | # isn't very useful. 28 | # 29 | # NOTE: Implementation based on AX_CHECK_COMPILE_FLAG. 30 | # 31 | # LICENSE 32 | # 33 | # Copyright (c) 2008 Guido U. Draheim 34 | # Copyright (c) 2011 Maarten Bosmans 35 | # Copyright (c) 2023 Robert Davies 36 | # 37 | # Copying and distribution of this file, with or without modification, are 38 | # permitted in any medium without royalty provided the copyright notice 39 | # and this notice are preserved. This file is offered as-is, without any 40 | # warranty. 41 | 42 | # AX_CHECK_COMPILE_FLAGS_NEEDED(FEATURE, FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT]) 43 | 44 | AC_DEFUN([HTS_CHECK_COMPILE_FLAGS_NEEDED], 45 | [AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF 46 | AS_VAR_PUSHDEF([CACHEVAR],[hts_cv_check_[]_AC_LANG_ABBREV[]flags_needed_$1_$6_$2])dnl 47 | AC_CACHE_CHECK([_AC_LANG compiler flags needed for $1], CACHEVAR, [ 48 | AC_LINK_IFELSE([m4_default([$3],[AC_LANG_PROGRAM()])], 49 | [AS_VAR_SET(CACHEVAR,[none])], 50 | [ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS 51 | _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $6 $2" 52 | AC_LINK_IFELSE([m4_default([$3],[AC_LANG_PROGRAM()])], 53 | [AS_VAR_SET(CACHEVAR,["$2"])], 54 | [AS_VAR_SET(CACHEVAR,[unsupported])]) 55 | _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags])]) 56 | AS_VAR_IF(CACHEVAR,unsupported, [ 57 | m4_default([$5], :) 58 | ], [ 59 | AS_VAR_IF(CACHEVAR,none,[flags_needed=""], [flags_needed="$CACHEVAR"]) 60 | m4_default([$4], :) 61 | ]) 62 | AS_VAR_POPDEF([CACHEVAR])dnl 63 | ])dnl HTS_CHECK_COMPILE_FLAGS_NEEDED 64 | -------------------------------------------------------------------------------- /m4/vl_prog_warnings.m4: -------------------------------------------------------------------------------- 1 | dnl @synopsis VL_PROG_CC_WARNINGS([ANSI]) 2 | dnl 3 | dnl From http://ac-archive.sourceforge.net/ac-archive/vl_prog_cc_warnings.html 4 | dnl 5 | dnl Enables a reasonable set of warnings for the C compiler. 6 | dnl Optionally, if the first argument is nonempty, turns on flags which 7 | dnl enforce and/or enable proper ANSI C if such are known with the 8 | dnl compiler used. 9 | dnl 10 | dnl Currently this macro knows about GCC, Solaris C compiler, Digital 11 | dnl Unix C compiler, C for AIX Compiler, HP-UX C compiler, IRIX C 12 | dnl compiler, NEC SX-5 (Super-UX 10) C compiler, and Cray J90 (Unicos 13 | dnl 10.0.0.8) C compiler. 14 | dnl 15 | dnl @category C 16 | dnl @author Ville Laurikari 17 | dnl Updated by Rob Davies 18 | dnl @version 2002-04-04 19 | dnl @license AllPermissive 20 | dnl Copying and distribution of this file, with or without modification, 21 | dnl are permitted in any medium without royalty provided the copyright notice 22 | dnl and this notice are preserved. Users of this software should generally 23 | dnl follow the principles of the MIT License including its disclaimer. 24 | dnl Original Copyright (c) Ville Laurikari 2002 25 | dnl Modifications Copyright (c) Genome Research Limited 2015 26 | 27 | AC_DEFUN([VL_PROG_CC_WARNINGS], [ 28 | AC_ARG_ENABLE([warnings], 29 | [AS_HELP_STRING([--disable-warnings], [turn off compiler warnings])], 30 | [], 31 | [enable_warnings=yes]) 32 | 33 | AS_IF([test "x$enable_warnings" != xno],[ 34 | AC_PROG_GREP 35 | 36 | ansi="$1" 37 | AS_IF([test "x$ansi" = "x"], 38 | [msg="for C compiler warning flags"], 39 | [msg="for C compiler warning and ANSI conformance flags"]) 40 | 41 | AC_CACHE_CHECK($msg, vl_cv_prog_cc_warnings, [ 42 | vl_cv_prog_cc_warnings="" 43 | AS_IF([test "x$CC" != "x"],[ 44 | cat > conftest.c <&1 | $GREP -i "WorkShop" > /dev/null 2>&1 && 65 | "$CC" -c -v -Xc conftest.c > /dev/null 2>&1 && 66 | test -f conftest.o],[ 67 | AS_IF([test "x$ansi" = "x"], 68 | [vl_cv_prog_cc_warnings="-v"], 69 | [vl_cv_prog_cc_warnings="-v -Xc"]) 70 | ], 71 | dnl Digital Unix C compiler 72 | ["$CC" -V 2>&1 | $GREP -i "Digital UNIX Compiler" > /dev/null 2>&1 && 73 | "$CC" -c -verbose -w0 -warnprotos -std1 conftest.c > /dev/null 2>&1 && 74 | test -f conftest.o], [ 75 | AS_IF([test "x$ansi" = "x"], 76 | [vl_cv_prog_cc_warnings="-verbose -w0 -warnprotos"], 77 | [vl_cv_prog_cc_warnings="-verbose -w0 -warnprotos -std1"]) 78 | ], 79 | dnl C for AIX Compiler 80 | ["$CC" 2>&1 | $GREP -i "C for AIX Compiler" > /dev/null 2>&1 && 81 | "$CC" -c -qlanglvl=ansi -qinfo=all conftest.c > /dev/null 2>&1 && 82 | test -f conftest.o],[ 83 | AS_IF([test "x$ansi" = "x"], 84 | [vl_cv_prog_cc_warnings="-qsrcmsg -qinfo=all:noppt:noppc:noobs:nocnd"], 85 | [vl_cv_prog_cc_warnings="-qsrcmsg -qinfo=all:noppt:noppc:noobs:nocnd -qlanglvl=ansi"]) 86 | ], 87 | dnl IRIX C compiler 88 | ["$CC" -version 2>&1 | $GREP -i "MIPSpro Compilers" > /dev/null 2>&1 && 89 | "$CC" -c -fullwarn -ansi -ansiE conftest.c > /dev/null 2>&1 && 90 | test -f conftest.o],[ 91 | AS_IF([test "x$ansi" = "x"], 92 | [vl_cv_prog_cc_warnings="-fullwarn"], 93 | [vl_cv_prog_cc_warnings="-fullwarn -ansi -ansiE"]) 94 | ], 95 | dnl HP-UX C compiler 96 | [what "$CC" 2>&1 | $GREP -i "HP C Compiler" > /dev/null 2>&1 && 97 | "$CC" -c -Aa +w1 conftest.c > /dev/null 2>&1 && 98 | test -f conftest.o],[ 99 | AS_IF([test "x$ansi" = "x"], 100 | [vl_cv_prog_cc_warnings="+w1"], 101 | [vl_cv_prog_cc_warnings="+w1 -Aa"]) 102 | ], 103 | dnl The NEC SX-5 (Super-UX 10) C compiler 104 | ["$CC" -V 2>&1 | $GREP "/SX" > /dev/null 2>&1 && 105 | "$CC" -c -pvctl[,]fullmsg -Xc conftest.c > /dev/null 2>&1 && 106 | test -f conftest.o],[ 107 | AS_IF([test "x$ansi" = "x"], 108 | [vl_cv_prog_cc_warnings="-pvctl[,]fullmsg"], 109 | [vl_cv_prog_cc_warnings="-pvctl[,]fullmsg -Xc"]) 110 | ], 111 | dnl The Cray C compiler (Unicos) 112 | ["$CC" -V 2>&1 | $GREP -i "Cray" > /dev/null 2>&1 && 113 | "$CC" -c -h msglevel 2 conftest.c > /dev/null 2>&1 && 114 | test -f conftest.o],[ 115 | AS_IF([test "x$ansi" = "x"], 116 | [vl_cv_prog_cc_warnings="-h msglevel 2"], 117 | [vl_cv_prog_cc_warnings="-h msglevel 2 -h conform"]) 118 | ]) 119 | rm -f conftest.* 120 | ]) 121 | ]) 122 | AS_IF([test "x$vl_cv_prog_cc_warnings" != "x"], 123 | [CFLAGS="$vl_cv_prog_cc_warnings $CFLAGS"]) 124 | ]) 125 | ])dnl 126 | -------------------------------------------------------------------------------- /tests/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 Genome Research Ltd. 2 | # Author(s): James Bonfield 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, 8 | # this list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above 11 | # copyright notice, this list of conditions and the following 12 | # disclaimer in the documentation and/or other materials provided 13 | # with the distribution. 14 | # 15 | # 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 16 | # Institute nor the names of its contributors may be used to endorse 17 | # or promote products derived from this software without specific 18 | # prior written permission. 19 | # 20 | # THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 21 | # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 | # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 23 | # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 24 | # LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | # 32 | 33 | # Standalone test programs 34 | noinst_PROGRAMS = rans4x16pr tokenise_name3 arith_dynamic rans4x8 rans4x16pr fqzcomp_qual varint entropy 35 | 36 | LDADD = $(top_builddir)/htscodecs/libhtscodecs.la 37 | AM_CPPFLAGS = -I$(top_srcdir) 38 | 39 | fqzcomp_qual_SOURCES = fqzcomp_qual_test.c 40 | rans4x8_SOURCES = rANS_static_test.c 41 | rans4x16pr_SOURCES = rANS_static4x16pr_test.c 42 | arith_dynamic_SOURCES = arith_dynamic_test.c 43 | tokenise_name3_SOURCES = tokenise_name3_test.c 44 | varint_SOURCES = varint_test.c 45 | entropy_SOURCES = entropy.c 46 | 47 | test_scripts = \ 48 | entropy.test \ 49 | rans4x8.test \ 50 | rans4x16.test \ 51 | arith.test \ 52 | tok3.test \ 53 | fqzcomp.test 54 | 55 | TESTS = $(test_scripts) \ 56 | varint 57 | 58 | EXTRA_DIST = $(test_scripts) dat names 59 | 60 | test_outdir = test.out 61 | 62 | distclean-local: 63 | -rm -rf $(test_outdir) 64 | 65 | # Fuzz testing programs, for local use. 66 | # 67 | # Best configure the package in a build subdirectory 68 | # ../configure --disable-shared CFLAGS="-g -gdwarf-2 -Wall -O3 -fsanitize=address,undefined" 69 | # Then "make fuzz" to recompile these tools. 70 | # 71 | # We can get non-fuzzing code for valgrind use, having found a bug, with e.g. 72 | # clang70 -I. -I../../tests -I.. -I../.. -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -Wall -g -Wall -gdwarf-2 ../../tests/tokenise_name3_fuzz.c -DNOFUZZ -lbz2 -lpthread 73 | 74 | 75 | # Our fuzz tools are built against a version of the library compiled 76 | # with -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION. 77 | # This limits the maximum size of some data blocks to speed up 78 | # fuzz testing, avoiding small inputs uncompressing to huge 79 | # outputs. 80 | 81 | fuzz: $(EXTRA_PROGRAMS) 82 | 83 | # Additional flags 84 | fuzzer_cflags = -fsanitize=fuzzer -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION 85 | fuzzer_ldflags = -fsanitize=fuzzer 86 | fuzzer_ldadd = $(top_builddir)/htscodecs/libcodecsfuzz.a \ 87 | $(top_builddir)/htscodecs/libcodecsfuzz_sse4.a \ 88 | $(top_builddir)/htscodecs/libcodecsfuzz_avx2.a \ 89 | $(top_builddir)/htscodecs/libcodecsfuzz_avx512.a 90 | 91 | EXTRA_PROGRAMS = \ 92 | rans4x8_fuzz \ 93 | rans4x16pr_fuzz \ 94 | arith_dynamic_fuzz \ 95 | tokenise_name3_fuzz \ 96 | tokenise_name3_fuzzrt \ 97 | fqzcomp_qual_fuzz \ 98 | fqzcomp_qual_fuzzrt \ 99 | entropy_fuzz 100 | 101 | rans4x8_fuzz_SOURCES = rANS_static_fuzz.c 102 | rans4x8_fuzz_CFLAGS = $(fuzzer_cflags) 103 | rans4x8_fuzz_LDFLAGS = $(fuzzer_ldflags) 104 | rans4x8_fuzz_LDADD = $(fuzzer_ldadd) 105 | 106 | rans4x16pr_fuzz_SOURCES = rANS_static4x16pr_fuzz.c 107 | rans4x16pr_fuzz_CFLAGS = $(fuzzer_cflags) 108 | rans4x16pr_fuzz_LDFLAGS = $(fuzzer_ldflags) 109 | rans4x16pr_fuzz_LDADD = $(fuzzer_ldadd) 110 | 111 | arith_dynamic_fuzz_SOURCES = arith_dynamic_fuzz.c 112 | arith_dynamic_fuzz_CFLAGS = $(fuzzer_cflags) 113 | arith_dynamic_fuzz_LDFLAGS = $(fuzzer_ldflags) 114 | arith_dynamic_fuzz_LDADD = $(fuzzer_ldadd) 115 | 116 | tokenise_name3_fuzz_SOURCES = tokenise_name3_fuzz.c 117 | tokenise_name3_fuzz_CFLAGS = $(fuzzer_cflags) 118 | tokenise_name3_fuzz_LDFLAGS = $(fuzzer_ldflags) 119 | tokenise_name3_fuzz_LDADD = $(fuzzer_ldadd) 120 | 121 | tokenise_name3_fuzzrt_SOURCES = tokenise_name3_fuzzrt.c 122 | tokenise_name3_fuzzrt_CFLAGS = $(fuzzer_cflags) 123 | tokenise_name3_fuzzrt_LDFLAGS = $(fuzzer_ldflags) 124 | tokenise_name3_fuzzrt_LDADD = $(fuzzer_ldadd) 125 | 126 | fqzcomp_qual_fuzz_SOURCES = fqzcomp_qual_fuzz.c 127 | fqzcomp_qual_fuzz_CFLAGS = $(fuzzer_cflags) 128 | fqzcomp_qual_fuzz_LDFLAGS = $(fuzzer_ldflags) 129 | fqzcomp_qual_fuzz_LDADD = $(fuzzer_ldadd) 130 | 131 | entropy_fuzz_SOURCES = entropy_fuzz.c 132 | entropy_fuzz_CFLAGS = $(fuzzer_cflags) 133 | entropy_fuzz_LDFLAGS = $(fuzzer_ldflags) 134 | entropy_fuzz_LDADD = $(fuzzer_ldadd) 135 | 136 | fqzcomp_qual_fuzzrt_SOURCES = fqzcomp_qual_fuzzrt.c 137 | fqzcomp_qual_fuzzrt_CFLAGS = $(fuzzer_cflags) 138 | fqzcomp_qual_fuzzrt_LDFLAGS = $(fuzzer_ldflags) 139 | fqzcomp_qual_fuzzrt_LDADD = $(fuzzer_ldadd) 140 | -------------------------------------------------------------------------------- /tests/arith.test: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | out=test.out 3 | if test ! -d $out 4 | then 5 | mkdir $out 6 | fi 7 | 8 | for f in `ls -1 $srcdir/dat/q* $srcdir/dat/u32* $srcdir/htscodecs-corpus/dat/q* 2>/dev/null` 9 | do 10 | comp=${f%/*/*}/dat/arith/${f##*/} 11 | case $f in 12 | */q*) 13 | cut -f 1 < $f | tr -d '\012' > $out/arith-nl 14 | ;; 15 | *) 16 | cp $f $out/arith-nl 17 | ;; 18 | esac 19 | for o in 0 1 64 65 128 129 192 193 8 9 4 20 | do 21 | if [ ! -e "$comp.$o" ] 22 | then 23 | continue 24 | fi 25 | printf 'Testing arith_dynamic -r -o%s on %s\t' $o "$f" 26 | 27 | # Round trip 28 | ./arith_dynamic -r -o$o $out/arith-nl $out/arith.comp 2>>$out/arith.stderr || exit 1 29 | wc -c < $out/arith.comp 30 | ./arith_dynamic -r -d $out/arith.comp $out/arith.uncomp 2>>$out/arith.stderr || exit 1 31 | cmp $out/arith-nl $out/arith.uncomp || exit 1 32 | 33 | # Precompressed data 34 | ./arith_dynamic -r -d $comp.$o $out/arith.uncomp 2>>$out/arith.stderr || exit 1 35 | cmp $out/arith-nl $out/arith.uncomp || exit 1 36 | done 37 | done 38 | -------------------------------------------------------------------------------- /tests/arith_dynamic_fuzz.c: -------------------------------------------------------------------------------- 1 | /* Fuzz testing target. */ 2 | /* 3 | * Copyright (c) 2019,2020 Genome Research Ltd. 4 | * Author(s): James Bonfield 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, 10 | * this list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above 13 | * copyright notice, this list of conditions and the following 14 | * disclaimer in the documentation and/or other materials provided 15 | * with the distribution. 16 | * 17 | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 18 | * Institute nor the names of its contributors may be used to endorse 19 | * or promote products derived from this software without specific 20 | * prior written permission. 21 | * 22 | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 23 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 25 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 26 | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | */ 34 | #include "config.h" 35 | 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | 44 | #include "htscodecs/arith_dynamic.h" 45 | 46 | int LLVMFuzzerTestOneInput(uint8_t *in, size_t in_size) { 47 | unsigned int uncomp_size = 0; 48 | unsigned char *uncomp = arith_uncompress(in, in_size, &uncomp_size); 49 | if (uncomp) 50 | free(uncomp); 51 | 52 | return 0; 53 | } 54 | 55 | #ifdef NOFUZZ 56 | #include 57 | #include 58 | #include 59 | 60 | #define BS 1024*1024 61 | static unsigned char *load(char *fn, uint64_t *lenp) { 62 | unsigned char *data = NULL; 63 | uint64_t dsize = 0; 64 | uint64_t dcurr = 0; 65 | signed int len; 66 | int fd = open(fn, O_RDONLY); 67 | 68 | do { 69 | if (dsize - dcurr < BS) { 70 | dsize = dsize ? dsize * 2 : BS; 71 | data = realloc(data, dsize); 72 | } 73 | 74 | len = read(fd, data + dcurr, BS); 75 | if (len > 0) 76 | dcurr += len; 77 | } while (len > 0); 78 | 79 | if (len == -1) { 80 | perror("read"); 81 | } 82 | 83 | close(fd); 84 | *lenp = dcurr; 85 | return data; 86 | } 87 | 88 | int main(int argc, char **argv) { 89 | uint64_t in_size; 90 | unsigned char *in = load(argv[1], &in_size); 91 | 92 | LLVMFuzzerTestOneInput(in, in_size); 93 | 94 | free(in); 95 | return 0; 96 | } 97 | #endif 98 | -------------------------------------------------------------------------------- /tests/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Run this from the build subdirectory. 4 | # Usage: benchmark.sh filename 5 | 6 | file=$1 7 | file2=`echo $1 | sed 's#.*/##'` 8 | test_dir=${TEST_DIR:-./tests} 9 | r4x8=$test_dir/rans4x8 10 | r4x16=$test_dir/rans4x16pr 11 | ntrials=${ntrials:-5} 12 | 13 | awkscript='BEGIN {e1=99999;e2=0;d1=99999;d2=0} /bytes/ {if (e1 > $1) {e1 = $1} if (e2 < $1) {e2 = $1} if (d1 > $4) {d1 = $4} if (d2 < $4) {d2 = $4};s=$10} END {print e1,e2,d1,d2,s}' 14 | 15 | echo "Program Opts Size Encode Decode" 16 | echo "-----------------------------------------------" 17 | 18 | # Order-0 19 | set -- $(for i in `seq 1 $ntrials`;do 20 | $r4x8 -t -o0 $file 2>&1 21 | done | awk "$awkscript") 22 | printf "r4x8 -o0 %10d %6.1f %6.1f\n" $5 $2 $4 23 | 24 | set -- $(for i in `seq 1 $ntrials`;do 25 | $r4x16 -t -o0 $file 2>&1 26 | done | awk "$awkscript") 27 | printf "r4x16 -o0 %10d %6.1f %6.1f\n" $5 $2 $4 28 | 29 | for c in 0x0000 0x0101 0x0202 0x0404 30 | do 31 | set -- $(for i in `seq 1 $ntrials`;do 32 | $r4x16 -t -o4 -c$c $file 2>&1 33 | done | awk "$awkscript") 34 | printf "r32x16 -o4 -c %-4s %10d %6.1f %6.1f\n" $c $5 $2 $4 35 | done 36 | 37 | echo 38 | 39 | # Order-1 40 | set -- $(for i in `seq 1 $ntrials`;do 41 | $r4x8 -t -o1 $file 2>&1 42 | done | awk "$awkscript") 43 | printf "r4x8 -o1 %10d %6.1f %6.1f\n" $5 $2 $4 44 | 45 | set -- $(for i in `seq 1 $ntrials`;do 46 | $r4x16 -t -o1 $file 2>&1 47 | done | awk "$awkscript") 48 | printf "r4x16 -o1 %10d %6.1f %6.1f\n" $5 $2 $4 49 | 50 | for c in 0x0000 0x0101 0x0202 0x0404 51 | do 52 | set -- $(for i in `seq 1 $ntrials`;do 53 | $r4x16 -t -o5 -c$c $file 2>&1 54 | done | awk "$awkscript") 55 | printf "r32x16 -o5 -c %-4s %10d %6.1f %6.1f\n" $c $5 $2 $4 56 | done 57 | -------------------------------------------------------------------------------- /tests/dat/arith/q4.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/q4.0 -------------------------------------------------------------------------------- /tests/dat/arith/q4.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/q4.1 -------------------------------------------------------------------------------- /tests/dat/arith/q4.128: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/q4.128 -------------------------------------------------------------------------------- /tests/dat/arith/q4.129: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/q4.129 -------------------------------------------------------------------------------- /tests/dat/arith/q4.192: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/q4.192 -------------------------------------------------------------------------------- /tests/dat/arith/q4.193: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/q4.193 -------------------------------------------------------------------------------- /tests/dat/arith/q4.64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/q4.64 -------------------------------------------------------------------------------- /tests/dat/arith/q4.65: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/q4.65 -------------------------------------------------------------------------------- /tests/dat/arith/q4.8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/q4.8 -------------------------------------------------------------------------------- /tests/dat/arith/q4.9: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/q4.9 -------------------------------------------------------------------------------- /tests/dat/arith/q40+dir.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/q40+dir.0 -------------------------------------------------------------------------------- /tests/dat/arith/q40+dir.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/q40+dir.1 -------------------------------------------------------------------------------- /tests/dat/arith/q40+dir.64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/q40+dir.64 -------------------------------------------------------------------------------- /tests/dat/arith/q40+dir.65: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/q40+dir.65 -------------------------------------------------------------------------------- /tests/dat/arith/q40+dir.8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/q40+dir.8 -------------------------------------------------------------------------------- /tests/dat/arith/q40+dir.9: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/q40+dir.9 -------------------------------------------------------------------------------- /tests/dat/arith/q8.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/q8.0 -------------------------------------------------------------------------------- /tests/dat/arith/q8.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/q8.1 -------------------------------------------------------------------------------- /tests/dat/arith/q8.128: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/q8.128 -------------------------------------------------------------------------------- /tests/dat/arith/q8.129: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/q8.129 -------------------------------------------------------------------------------- /tests/dat/arith/q8.192: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/q8.192 -------------------------------------------------------------------------------- /tests/dat/arith/q8.193: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/q8.193 -------------------------------------------------------------------------------- /tests/dat/arith/q8.64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/q8.64 -------------------------------------------------------------------------------- /tests/dat/arith/q8.65: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/q8.65 -------------------------------------------------------------------------------- /tests/dat/arith/qvar.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/qvar.0 -------------------------------------------------------------------------------- /tests/dat/arith/qvar.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/qvar.1 -------------------------------------------------------------------------------- /tests/dat/arith/qvar.64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/qvar.64 -------------------------------------------------------------------------------- /tests/dat/arith/qvar.65: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/qvar.65 -------------------------------------------------------------------------------- /tests/dat/arith/u32.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/u32.1 -------------------------------------------------------------------------------- /tests/dat/arith/u32.4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/u32.4 -------------------------------------------------------------------------------- /tests/dat/arith/u32.65: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/u32.65 -------------------------------------------------------------------------------- /tests/dat/arith/u32.9: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/arith/u32.9 -------------------------------------------------------------------------------- /tests/dat/fqzcomp/q4.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/fqzcomp/q4.0 -------------------------------------------------------------------------------- /tests/dat/fqzcomp/q4.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/fqzcomp/q4.1 -------------------------------------------------------------------------------- /tests/dat/fqzcomp/q4.2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/fqzcomp/q4.2 -------------------------------------------------------------------------------- /tests/dat/fqzcomp/q4.3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/fqzcomp/q4.3 -------------------------------------------------------------------------------- /tests/dat/fqzcomp/q40+dir.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/fqzcomp/q40+dir.0 -------------------------------------------------------------------------------- /tests/dat/fqzcomp/q40+dir.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/fqzcomp/q40+dir.1 -------------------------------------------------------------------------------- /tests/dat/fqzcomp/q40+dir.2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/fqzcomp/q40+dir.2 -------------------------------------------------------------------------------- /tests/dat/fqzcomp/q40+dir.3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/fqzcomp/q40+dir.3 -------------------------------------------------------------------------------- /tests/dat/fqzcomp/q8.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/fqzcomp/q8.0 -------------------------------------------------------------------------------- /tests/dat/fqzcomp/q8.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/fqzcomp/q8.1 -------------------------------------------------------------------------------- /tests/dat/fqzcomp/q8.2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/fqzcomp/q8.2 -------------------------------------------------------------------------------- /tests/dat/fqzcomp/q8.3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/fqzcomp/q8.3 -------------------------------------------------------------------------------- /tests/dat/fqzcomp/qvar.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/fqzcomp/qvar.0 -------------------------------------------------------------------------------- /tests/dat/fqzcomp/qvar.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/fqzcomp/qvar.1 -------------------------------------------------------------------------------- /tests/dat/fqzcomp/qvar.2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/fqzcomp/qvar.2 -------------------------------------------------------------------------------- /tests/dat/fqzcomp/qvar.3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/fqzcomp/qvar.3 -------------------------------------------------------------------------------- /tests/dat/r4x16/q4.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q4.0 -------------------------------------------------------------------------------- /tests/dat/r4x16/q4.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q4.1 -------------------------------------------------------------------------------- /tests/dat/r4x16/q4.128: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q4.128 -------------------------------------------------------------------------------- /tests/dat/r4x16/q4.129: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q4.129 -------------------------------------------------------------------------------- /tests/dat/r4x16/q4.192: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q4.192 -------------------------------------------------------------------------------- /tests/dat/r4x16/q4.193: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q4.193 -------------------------------------------------------------------------------- /tests/dat/r4x16/q4.4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q4.4 -------------------------------------------------------------------------------- /tests/dat/r4x16/q4.5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q4.5 -------------------------------------------------------------------------------- /tests/dat/r4x16/q4.64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q4.64 -------------------------------------------------------------------------------- /tests/dat/r4x16/q4.65: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q4.65 -------------------------------------------------------------------------------- /tests/dat/r4x16/q4.8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q4.8 -------------------------------------------------------------------------------- /tests/dat/r4x16/q4.9: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q4.9 -------------------------------------------------------------------------------- /tests/dat/r4x16/q40+dir.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q40+dir.0 -------------------------------------------------------------------------------- /tests/dat/r4x16/q40+dir.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q40+dir.1 -------------------------------------------------------------------------------- /tests/dat/r4x16/q40+dir.4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q40+dir.4 -------------------------------------------------------------------------------- /tests/dat/r4x16/q40+dir.5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q40+dir.5 -------------------------------------------------------------------------------- /tests/dat/r4x16/q40+dir.8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q40+dir.8 -------------------------------------------------------------------------------- /tests/dat/r4x16/q40+dir.9: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q40+dir.9 -------------------------------------------------------------------------------- /tests/dat/r4x16/q8.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q8.0 -------------------------------------------------------------------------------- /tests/dat/r4x16/q8.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q8.1 -------------------------------------------------------------------------------- /tests/dat/r4x16/q8.128: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q8.128 -------------------------------------------------------------------------------- /tests/dat/r4x16/q8.129: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q8.129 -------------------------------------------------------------------------------- /tests/dat/r4x16/q8.192: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q8.192 -------------------------------------------------------------------------------- /tests/dat/r4x16/q8.193: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q8.193 -------------------------------------------------------------------------------- /tests/dat/r4x16/q8.4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q8.4 -------------------------------------------------------------------------------- /tests/dat/r4x16/q8.5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q8.5 -------------------------------------------------------------------------------- /tests/dat/r4x16/q8.64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q8.64 -------------------------------------------------------------------------------- /tests/dat/r4x16/q8.65: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/q8.65 -------------------------------------------------------------------------------- /tests/dat/r4x16/qvar.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/qvar.0 -------------------------------------------------------------------------------- /tests/dat/r4x16/qvar.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/qvar.1 -------------------------------------------------------------------------------- /tests/dat/r4x16/qvar.4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/qvar.4 -------------------------------------------------------------------------------- /tests/dat/r4x16/qvar.5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x16/qvar.5 -------------------------------------------------------------------------------- /tests/dat/r4x8/q4.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x8/q4.0 -------------------------------------------------------------------------------- /tests/dat/r4x8/q4.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x8/q4.1 -------------------------------------------------------------------------------- /tests/dat/r4x8/q40+dir.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x8/q40+dir.0 -------------------------------------------------------------------------------- /tests/dat/r4x8/q40+dir.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x8/q40+dir.1 -------------------------------------------------------------------------------- /tests/dat/r4x8/q8.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x8/q8.0 -------------------------------------------------------------------------------- /tests/dat/r4x8/q8.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x8/q8.1 -------------------------------------------------------------------------------- /tests/dat/r4x8/qvar.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x8/qvar.0 -------------------------------------------------------------------------------- /tests/dat/r4x8/qvar.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/r4x8/qvar.1 -------------------------------------------------------------------------------- /tests/dat/u32: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/dat/u32 -------------------------------------------------------------------------------- /tests/entropy.test: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # One copy tests the small buffer histogram variant 4 | ./entropy $srcdir/dat/q4 5 | 6 | # Four copies tests the large buffer histogram variant 7 | cat $srcdir/dat/q4 $srcdir/dat/q4 $srcdir/dat/q4 $srcdir/dat/q4 | ./entropy 8 | -------------------------------------------------------------------------------- /tests/entropy_fuzz.c: -------------------------------------------------------------------------------- 1 | /* Fuzz testing target. */ 2 | /* 3 | * Copyright (c) 2022 Genome Research Ltd. 4 | * Author(s): Rob Davies 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, 10 | * this list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above 13 | * copyright notice, this list of conditions and the following 14 | * disclaimer in the documentation and/or other materials provided 15 | * with the distribution. 16 | * 17 | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 18 | * Institute nor the names of its contributors may be used to endorse 19 | * or promote products derived from this software without specific 20 | * prior written permission. 21 | * 22 | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 23 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 25 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 26 | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | */ 34 | 35 | #include 36 | 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | 45 | #include "htscodecs/arith_dynamic.h" 46 | #include "htscodecs/rANS_static.h" 47 | #include "htscodecs/rANS_static4x16.h" 48 | #include "htscodecs/rANS_static32x16pr.h" 49 | 50 | int LLVMFuzzerTestOneInput(uint8_t *in, size_t in_size) { 51 | 52 | const int order_a[] = { 53 | 0,1, // No extras 54 | 0x40,041, // RANS_ORDER_RLE 55 | 0x80,0x81, // RANS_ORDER_PACK 56 | 0xc0,0xc1, // RANS_ORDER_RLE|RANS_ORDER_PACK 57 | }; 58 | 59 | #if defined(__x86_64__) 60 | const int cpu_enc_a[] = { 61 | 0, RANS_CPU_ENC_SSE4, RANS_CPU_ENC_AVX2, RANS_CPU_ENC_AVX512 62 | }; 63 | const int cpu_dec_a[] = { 64 | 0, RANS_CPU_DEC_SSE4, RANS_CPU_DEC_AVX2, RANS_CPU_DEC_AVX512 65 | }; 66 | #elif defined(__ARM_NEON) && defined(__aarch64__) 67 | const int cpu_enc_a[] = { 0, RANS_CPU_ENC_NEON }; 68 | const int cpu_dec_a[] = { 0, RANS_CPU_DEC_NEON }; 69 | #else 70 | const int cpu_enc_a[] = { 0 }; 71 | const int cpu_dec_a[] = { 0 }; 72 | #endif 73 | int i; 74 | 75 | if (in_size > 200000) 76 | return -1; 77 | 78 | // rans_compress() only supports order 0 and 1 79 | for (i = 0; i < 1; i++) { 80 | uint8_t *comp, *uncomp; 81 | uint32_t csize, usize; 82 | comp = rans_compress(in, in_size, &csize, i); 83 | if (!comp) abort(); 84 | uncomp = rans_uncompress(comp, csize, &usize); 85 | if (!uncomp) abort(); 86 | if (usize != in_size) abort(); 87 | if (memcmp(uncomp, in, in_size) != 0) abort(); 88 | free(comp); 89 | free(uncomp); 90 | } 91 | 92 | for (i = 0; i < sizeof(order_a) / sizeof(*order_a); i++) { 93 | int order = order_a[i]; 94 | uint8_t *comp, *uncomp, *comp0 = NULL; 95 | uint32_t csize, usize, csize0 = 0; 96 | int c; 97 | comp = rans_compress_4x16(in, in_size, &csize, order); 98 | if (!comp) abort(); 99 | uncomp = rans_uncompress_4x16(comp, csize, &usize); 100 | if (!uncomp) abort(); 101 | if (usize != in_size) abort(); 102 | if (memcmp(uncomp, in, in_size) != 0) abort(); 103 | free(comp); 104 | free(uncomp); 105 | 106 | comp = arith_compress(in, in_size, &csize, order); 107 | if (!comp) abort(); 108 | uncomp = arith_uncompress(comp, csize, &usize); 109 | if (!uncomp) abort(); 110 | if (usize != in_size) abort(); 111 | if (memcmp(uncomp, in, in_size) != 0) abort(); 112 | free(comp); 113 | free(uncomp); 114 | 115 | // Check all SIMD variants for RANS_ORDER_X32 116 | for (c = 0; c < sizeof(cpu_enc_a)/sizeof(*cpu_enc_a); c++) { 117 | rans_set_cpu(cpu_enc_a[c]); 118 | comp = rans_compress_4x16(in, in_size, &csize, 119 | order | RANS_ORDER_X32); 120 | if (!comp) abort(); 121 | if (comp0) { 122 | if (csize != csize0 || 123 | memcmp(comp0, comp, csize) != 0) { 124 | fprintf(stderr, 125 | "Compressed data mismatch order 0x%x cpu 0x%x\n", 126 | order, cpu_enc_a[c]); 127 | abort(); 128 | } 129 | free(comp); 130 | } else { 131 | comp0 = comp; 132 | csize0 = csize; 133 | } 134 | } 135 | for (c = 0; c < sizeof(cpu_dec_a)/sizeof(*cpu_dec_a); c++) { 136 | rans_set_cpu(cpu_dec_a[c]); 137 | uncomp = rans_uncompress_4x16(comp0, csize0, &usize); 138 | if (!uncomp) abort(); 139 | if (usize != in_size || 140 | memcmp(uncomp, in, in_size) != 0) { 141 | fprintf(stderr, 142 | "Uncompressed data mismatch order 0x%x cpu 0x%x\n", 143 | order, cpu_dec_a[c]); 144 | abort(); 145 | } 146 | free(uncomp); 147 | } 148 | free(comp0); 149 | } 150 | return 0; 151 | } 152 | -------------------------------------------------------------------------------- /tests/fqzcomp.test: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | out=test.out 3 | if test ! -d $out 4 | then 5 | mkdir $out 6 | fi 7 | 8 | for f in `ls -1 $srcdir/dat/q* $srcdir/htscodecs-corpus/dat/q* 2>/dev/null` 9 | do 10 | comp=${f%/*/*}/dat/fqzcomp/${f##*/} 11 | cut -f 1 $f > $out/fqz 12 | for s in 0 1 2 3 13 | do 14 | printf 'Testing fqzcomp_qual -r -s %s on %s\t' $s "$f" 15 | 16 | # Round trip 17 | ./fqzcomp_qual -r -s $s $out/fqz > $out/fqz.comp 2>>$out/fqz.stderr || exit 1 18 | wc -c < $out/fqz.comp 19 | ./fqzcomp_qual -r -d $out/fqz.comp > $out/fqz.uncomp 2>>$out/fqz.stderr || exit 1 20 | cmp $out/fqz $out/fqz.uncomp || exit 1 21 | 22 | # Precompressed data 23 | ./fqzcomp_qual -r -d $comp.$s > $out/fqz.uncomp 2>>$out/fqz.stderr || exit 1 24 | cmp $out/fqz $out/fqz.uncomp || exit 1 25 | done 26 | echo 27 | done 28 | -------------------------------------------------------------------------------- /tests/fqzcomp_qual_fuzz.c: -------------------------------------------------------------------------------- 1 | /* Fuzz testing target. */ 2 | /* 3 | * Copyright (c) 2019,2020 Genome Research Ltd. 4 | * Author(s): James Bonfield 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, 10 | * this list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above 13 | * copyright notice, this list of conditions and the following 14 | * disclaimer in the documentation and/or other materials provided 15 | * with the distribution. 16 | * 17 | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 18 | * Institute nor the names of its contributors may be used to endorse 19 | * or promote products derived from this software without specific 20 | * prior written permission. 21 | * 22 | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 23 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 25 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 26 | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | */ 34 | #include "config.h" 35 | 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | 45 | #include "htscodecs/fqzcomp_qual.h" 46 | 47 | int LLVMFuzzerTestOneInput(uint8_t *in, size_t in_size) { 48 | size_t uncomp_size; 49 | char *uncomp = fqz_decompress((char *)in, in_size, &uncomp_size, NULL, 0); 50 | if (uncomp) 51 | free(uncomp); 52 | 53 | return 0; 54 | } 55 | 56 | #ifdef NOFUZZ 57 | #include 58 | #include 59 | #include 60 | 61 | #define BS 1024*1024 62 | static unsigned char *load(char *fn, uint64_t *lenp) { 63 | unsigned char *data = NULL; 64 | uint64_t dsize = 0; 65 | uint64_t dcurr = 0; 66 | signed int len; 67 | 68 | int fd = open(fn, O_RDONLY); 69 | if (!fd) { 70 | perror(fn); 71 | return NULL; 72 | } 73 | 74 | do { 75 | if (dsize - dcurr < BS) { 76 | dsize = dsize ? dsize * 2 : BS; 77 | data = realloc(data, dsize); 78 | } 79 | 80 | len = read(fd, data + dcurr, BS); 81 | if (len > 0) 82 | dcurr += len; 83 | } while (len > 0); 84 | 85 | if (len == -1) { 86 | perror("read"); 87 | } 88 | close(fd); 89 | 90 | *lenp = dcurr; 91 | return data; 92 | } 93 | 94 | int main(int argc, char **argv) { 95 | uint64_t in_size; 96 | unsigned char *in = load(argv[1], &in_size); 97 | 98 | LLVMFuzzerTestOneInput(in, in_size); 99 | 100 | free(in); 101 | return 0; 102 | } 103 | #endif 104 | -------------------------------------------------------------------------------- /tests/fqzcomp_qual_fuzzrt.c: -------------------------------------------------------------------------------- 1 | /* Fuzz testing target. */ 2 | /* 3 | * Copyright (c) 2023 Genome Research Ltd. 4 | * Author(s): James Bonfield 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, 10 | * this list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above 13 | * copyright notice, this list of conditions and the following 14 | * disclaimer in the documentation and/or other materials provided 15 | * with the distribution. 16 | * 17 | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 18 | * Institute nor the names of its contributors may be used to endorse 19 | * or promote products derived from this software without specific 20 | * prior written permission. 21 | * 22 | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 23 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 25 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 26 | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | */ 34 | #include "config.h" 35 | 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | 45 | #include "htscodecs/fqzcomp_qual.h" 46 | 47 | #ifndef MAX_REC 48 | #define MAX_REC 5000 49 | #endif 50 | 51 | #ifndef MAX_SEQ 52 | # define MAX_SEQ 5000 53 | #endif 54 | 55 | static unsigned int slice_len[MAX_REC]; 56 | static unsigned int slice_flags[MAX_REC]; 57 | 58 | static fqz_slice fixed_slice = {0}; 59 | 60 | fqz_slice *fake_slice(size_t buf_len, int nrec) { 61 | fixed_slice.len = slice_len; 62 | fixed_slice.flags = slice_flags; 63 | fixed_slice.num_records = nrec; 64 | 65 | // 1 long record 66 | if (nrec == 1) { 67 | slice_len[0] = buf_len; 68 | slice_flags[0] = 0; // FIXME 69 | return &fixed_slice; 70 | } 71 | 72 | // N 1-byte records 73 | if (nrec == buf_len) { 74 | int i; 75 | for (i = 0; i < buf_len; i++) { 76 | slice_len[i] = 1; 77 | slice_flags[i] = 0; // FIXME 78 | } 79 | return &fixed_slice; 80 | } 81 | 82 | // Otherwise variable length records 83 | 84 | // Reproducability of randomness 85 | int seed = rand(); 86 | srand(0); 87 | 88 | int nlen = buf_len/10+1; 89 | int i, l, n = 0; 90 | for (i = 0; i < buf_len; i+=l, n++) { 91 | l = rand() % (nlen+1); 92 | l += l==0; 93 | slice_len[n] = i+l < buf_len ? l : buf_len-i; 94 | slice_flags[n] = 0; // FIXME 95 | } 96 | fixed_slice.num_records = n; 97 | 98 | srand(seed); // new random state 99 | 100 | return &fixed_slice; 101 | } 102 | 103 | int LLVMFuzzerTestOneInput(uint8_t *in, size_t in_size) { 104 | size_t c_size, u_size; 105 | 106 | int mode = 0; 107 | for (mode = 0; mode < 3; mode++) { 108 | int mval[3] = {0,1,in_size}; 109 | fqz_slice *s = fake_slice(in_size, mval[mode]); 110 | 111 | // Semi random strat, but based on a few bits of input data 112 | // for reproducability. 113 | // This lets the fuzzer explore the parameter space itself. 114 | int strat = in_size ? in[0] & 3 : 0; 115 | char *comp = fqz_compress(3, s, (char *)in, in_size, &c_size, 116 | strat, NULL); 117 | if (!comp) { 118 | fprintf(stderr, "REJECT FQZ %d to (null)n", (int)in_size); 119 | return -1; 120 | } 121 | 122 | char *uncomp = fqz_decompress(comp, c_size, &u_size, NULL, 0); 123 | if (!uncomp) 124 | abort(); 125 | 126 | if (in_size != u_size) 127 | abort(); 128 | 129 | if (memcmp(uncomp, in, in_size) != 0) 130 | abort(); 131 | 132 | free(comp); 133 | free(uncomp); 134 | } 135 | 136 | return 0; 137 | } 138 | -------------------------------------------------------------------------------- /tests/names/tok3/01.names.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/01.names.1 -------------------------------------------------------------------------------- /tests/names/tok3/01.names.11: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/01.names.11 -------------------------------------------------------------------------------- /tests/names/tok3/01.names.13: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/01.names.13 -------------------------------------------------------------------------------- /tests/names/tok3/01.names.15: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/01.names.15 -------------------------------------------------------------------------------- /tests/names/tok3/01.names.17: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/01.names.17 -------------------------------------------------------------------------------- /tests/names/tok3/01.names.19: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/01.names.19 -------------------------------------------------------------------------------- /tests/names/tok3/01.names.3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/01.names.3 -------------------------------------------------------------------------------- /tests/names/tok3/01.names.5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/01.names.5 -------------------------------------------------------------------------------- /tests/names/tok3/01.names.7: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/01.names.7 -------------------------------------------------------------------------------- /tests/names/tok3/01.names.9: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/01.names.9 -------------------------------------------------------------------------------- /tests/names/tok3/02.names.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/02.names.1 -------------------------------------------------------------------------------- /tests/names/tok3/02.names.11: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/02.names.11 -------------------------------------------------------------------------------- /tests/names/tok3/02.names.13: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/02.names.13 -------------------------------------------------------------------------------- /tests/names/tok3/02.names.15: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/02.names.15 -------------------------------------------------------------------------------- /tests/names/tok3/02.names.17: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/02.names.17 -------------------------------------------------------------------------------- /tests/names/tok3/02.names.19: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/02.names.19 -------------------------------------------------------------------------------- /tests/names/tok3/02.names.3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/02.names.3 -------------------------------------------------------------------------------- /tests/names/tok3/02.names.5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/02.names.5 -------------------------------------------------------------------------------- /tests/names/tok3/02.names.7: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/02.names.7 -------------------------------------------------------------------------------- /tests/names/tok3/02.names.9: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/02.names.9 -------------------------------------------------------------------------------- /tests/names/tok3/03.names.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/03.names.1 -------------------------------------------------------------------------------- /tests/names/tok3/03.names.11: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/03.names.11 -------------------------------------------------------------------------------- /tests/names/tok3/03.names.13: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/03.names.13 -------------------------------------------------------------------------------- /tests/names/tok3/03.names.15: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/03.names.15 -------------------------------------------------------------------------------- /tests/names/tok3/03.names.17: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/03.names.17 -------------------------------------------------------------------------------- /tests/names/tok3/03.names.19: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/03.names.19 -------------------------------------------------------------------------------- /tests/names/tok3/03.names.3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/03.names.3 -------------------------------------------------------------------------------- /tests/names/tok3/03.names.5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/03.names.5 -------------------------------------------------------------------------------- /tests/names/tok3/03.names.7: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/03.names.7 -------------------------------------------------------------------------------- /tests/names/tok3/03.names.9: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/03.names.9 -------------------------------------------------------------------------------- /tests/names/tok3/05.names.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/05.names.1 -------------------------------------------------------------------------------- /tests/names/tok3/05.names.11: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/05.names.11 -------------------------------------------------------------------------------- /tests/names/tok3/05.names.13: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/05.names.13 -------------------------------------------------------------------------------- /tests/names/tok3/05.names.15: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/05.names.15 -------------------------------------------------------------------------------- /tests/names/tok3/05.names.17: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/05.names.17 -------------------------------------------------------------------------------- /tests/names/tok3/05.names.19: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/05.names.19 -------------------------------------------------------------------------------- /tests/names/tok3/05.names.3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/05.names.3 -------------------------------------------------------------------------------- /tests/names/tok3/05.names.5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/05.names.5 -------------------------------------------------------------------------------- /tests/names/tok3/05.names.7: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/05.names.7 -------------------------------------------------------------------------------- /tests/names/tok3/05.names.9: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/05.names.9 -------------------------------------------------------------------------------- /tests/names/tok3/08.names.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/08.names.1 -------------------------------------------------------------------------------- /tests/names/tok3/08.names.11: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/08.names.11 -------------------------------------------------------------------------------- /tests/names/tok3/08.names.13: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/08.names.13 -------------------------------------------------------------------------------- /tests/names/tok3/08.names.15: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/08.names.15 -------------------------------------------------------------------------------- /tests/names/tok3/08.names.17: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/08.names.17 -------------------------------------------------------------------------------- /tests/names/tok3/08.names.19: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/08.names.19 -------------------------------------------------------------------------------- /tests/names/tok3/08.names.3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/08.names.3 -------------------------------------------------------------------------------- /tests/names/tok3/08.names.5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/08.names.5 -------------------------------------------------------------------------------- /tests/names/tok3/08.names.7: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/08.names.7 -------------------------------------------------------------------------------- /tests/names/tok3/08.names.9: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/08.names.9 -------------------------------------------------------------------------------- /tests/names/tok3/09.names.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/09.names.1 -------------------------------------------------------------------------------- /tests/names/tok3/09.names.11: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/09.names.11 -------------------------------------------------------------------------------- /tests/names/tok3/09.names.13: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/09.names.13 -------------------------------------------------------------------------------- /tests/names/tok3/09.names.15: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/09.names.15 -------------------------------------------------------------------------------- /tests/names/tok3/09.names.17: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/09.names.17 -------------------------------------------------------------------------------- /tests/names/tok3/09.names.19: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/09.names.19 -------------------------------------------------------------------------------- /tests/names/tok3/09.names.3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/09.names.3 -------------------------------------------------------------------------------- /tests/names/tok3/09.names.5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/09.names.5 -------------------------------------------------------------------------------- /tests/names/tok3/09.names.7: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/09.names.7 -------------------------------------------------------------------------------- /tests/names/tok3/09.names.9: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/09.names.9 -------------------------------------------------------------------------------- /tests/names/tok3/10.names.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/10.names.1 -------------------------------------------------------------------------------- /tests/names/tok3/10.names.11: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/10.names.11 -------------------------------------------------------------------------------- /tests/names/tok3/10.names.13: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/10.names.13 -------------------------------------------------------------------------------- /tests/names/tok3/10.names.15: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/10.names.15 -------------------------------------------------------------------------------- /tests/names/tok3/10.names.17: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/10.names.17 -------------------------------------------------------------------------------- /tests/names/tok3/10.names.19: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/10.names.19 -------------------------------------------------------------------------------- /tests/names/tok3/10.names.3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/10.names.3 -------------------------------------------------------------------------------- /tests/names/tok3/10.names.5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/10.names.5 -------------------------------------------------------------------------------- /tests/names/tok3/10.names.7: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/10.names.7 -------------------------------------------------------------------------------- /tests/names/tok3/10.names.9: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/10.names.9 -------------------------------------------------------------------------------- /tests/names/tok3/20.names.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/20.names.1 -------------------------------------------------------------------------------- /tests/names/tok3/20.names.11: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/20.names.11 -------------------------------------------------------------------------------- /tests/names/tok3/20.names.13: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/20.names.13 -------------------------------------------------------------------------------- /tests/names/tok3/20.names.15: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/20.names.15 -------------------------------------------------------------------------------- /tests/names/tok3/20.names.17: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/20.names.17 -------------------------------------------------------------------------------- /tests/names/tok3/20.names.19: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/20.names.19 -------------------------------------------------------------------------------- /tests/names/tok3/20.names.3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/20.names.3 -------------------------------------------------------------------------------- /tests/names/tok3/20.names.5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/20.names.5 -------------------------------------------------------------------------------- /tests/names/tok3/20.names.7: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/20.names.7 -------------------------------------------------------------------------------- /tests/names/tok3/20.names.9: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/20.names.9 -------------------------------------------------------------------------------- /tests/names/tok3/nv.names.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/nv.names.1 -------------------------------------------------------------------------------- /tests/names/tok3/nv.names.11: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/nv.names.11 -------------------------------------------------------------------------------- /tests/names/tok3/nv.names.13: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/nv.names.13 -------------------------------------------------------------------------------- /tests/names/tok3/nv.names.15: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/nv.names.15 -------------------------------------------------------------------------------- /tests/names/tok3/nv.names.17: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/nv.names.17 -------------------------------------------------------------------------------- /tests/names/tok3/nv.names.19: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/nv.names.19 -------------------------------------------------------------------------------- /tests/names/tok3/nv.names.3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/nv.names.3 -------------------------------------------------------------------------------- /tests/names/tok3/nv.names.5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/nv.names.5 -------------------------------------------------------------------------------- /tests/names/tok3/nv.names.7: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/nv.names.7 -------------------------------------------------------------------------------- /tests/names/tok3/nv.names.9: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/nv.names.9 -------------------------------------------------------------------------------- /tests/names/tok3/nv2.names.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/nv2.names.1 -------------------------------------------------------------------------------- /tests/names/tok3/nv2.names.11: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/nv2.names.11 -------------------------------------------------------------------------------- /tests/names/tok3/nv2.names.13: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/nv2.names.13 -------------------------------------------------------------------------------- /tests/names/tok3/nv2.names.15: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/nv2.names.15 -------------------------------------------------------------------------------- /tests/names/tok3/nv2.names.17: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/nv2.names.17 -------------------------------------------------------------------------------- /tests/names/tok3/nv2.names.19: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/nv2.names.19 -------------------------------------------------------------------------------- /tests/names/tok3/nv2.names.3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/nv2.names.3 -------------------------------------------------------------------------------- /tests/names/tok3/nv2.names.5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/nv2.names.5 -------------------------------------------------------------------------------- /tests/names/tok3/nv2.names.7: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/nv2.names.7 -------------------------------------------------------------------------------- /tests/names/tok3/nv2.names.9: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/nv2.names.9 -------------------------------------------------------------------------------- /tests/names/tok3/rr.names.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/rr.names.1 -------------------------------------------------------------------------------- /tests/names/tok3/rr.names.11: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/rr.names.11 -------------------------------------------------------------------------------- /tests/names/tok3/rr.names.13: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/rr.names.13 -------------------------------------------------------------------------------- /tests/names/tok3/rr.names.15: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/rr.names.15 -------------------------------------------------------------------------------- /tests/names/tok3/rr.names.17: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/rr.names.17 -------------------------------------------------------------------------------- /tests/names/tok3/rr.names.19: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/rr.names.19 -------------------------------------------------------------------------------- /tests/names/tok3/rr.names.3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/rr.names.3 -------------------------------------------------------------------------------- /tests/names/tok3/rr.names.5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/rr.names.5 -------------------------------------------------------------------------------- /tests/names/tok3/rr.names.7: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/rr.names.7 -------------------------------------------------------------------------------- /tests/names/tok3/rr.names.9: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/samtools/htscodecs/ce66e5f303862aad325e4df908636a31c877b4d8/tests/names/tok3/rr.names.9 -------------------------------------------------------------------------------- /tests/rANS_static4x16pr_fuzz.c: -------------------------------------------------------------------------------- 1 | /* Fuzz testing target. */ 2 | /* 3 | * Copyright (c) 2019,2020 Genome Research Ltd. 4 | * Author(s): James Bonfield 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, 10 | * this list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above 13 | * copyright notice, this list of conditions and the following 14 | * disclaimer in the documentation and/or other materials provided 15 | * with the distribution. 16 | * 17 | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 18 | * Institute nor the names of its contributors may be used to endorse 19 | * or promote products derived from this software without specific 20 | * prior written permission. 21 | * 22 | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 23 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 25 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 26 | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | */ 34 | #include "config.h" 35 | 36 | /* 37 | For best results, configure, from a build subdir, to use the address and 38 | undefined behaviour sanitizers, and run "make fuzz". 39 | E.g.: 40 | 41 | ../configure CFLAGS='-g -gdwarf-2 -O3 -Wall -fsanitize=address,undefined' CPPFLAGS='-DUBSAN' 42 | make fuzz 43 | 44 | Run with: 45 | export ASAN_OPTIONS=allow_addr2line=1 46 | export UBSAN_OPTION=halt_on_error=1 47 | tests/rANS_static4x16pr_fuzz corpus 48 | or 49 | tests/rANS_static4x16pr_fuzz -detect_leaks=0 corpus 50 | 51 | I generated corpus as a whole bunch of precompressed tiny inputs from 52 | tests/dat/q4 for different compression modes. 53 | 54 | For debugging purposes, we can compile a non-fuzzer non-ASAN build using 55 | -DNOFUZZ which creates a binary we can debug on any libfuzzer generated 56 | output using valgrind. (The rans4x16 command line test won't quite work as 57 | it's a slightly different input format with explicit sizes in the binary 58 | stream.) 59 | */ 60 | 61 | #include 62 | #include 63 | #include 64 | #include 65 | #include 66 | #include 67 | #include 68 | 69 | #include "htscodecs/rANS_static4x16.h" 70 | 71 | int LLVMFuzzerTestOneInput(uint8_t *in, size_t in_size) { 72 | int c; 73 | unsigned int uncomp_size = 0; 74 | unsigned char *uncomp; 75 | 76 | const int cpu_dec_a[] = { 77 | 0 78 | #if defined(__x86_64__) 79 | , RANS_CPU_DEC_SSE4, RANS_CPU_DEC_AVX2, RANS_CPU_DEC_AVX512 80 | #endif 81 | #if defined(__ARM_NEON) 82 | , RANS_CPU_DEC_NEON 83 | #endif 84 | }; 85 | 86 | for (c = 0; c < sizeof(cpu_dec_a)/sizeof(*cpu_dec_a); c++) { 87 | rans_set_cpu(cpu_dec_a[c]); 88 | uncomp = rans_uncompress_4x16(in, in_size, &uncomp_size); 89 | if (uncomp) 90 | free(uncomp); 91 | } 92 | 93 | return 0; 94 | } 95 | 96 | #ifdef NOFUZZ 97 | #include 98 | #include 99 | #include 100 | 101 | #define BS 1024*1024 102 | static unsigned char *load(char *fn, uint64_t *lenp) { 103 | unsigned char *data = NULL; 104 | uint64_t dsize = 0; 105 | uint64_t dcurr = 0; 106 | signed int len; 107 | int fd = open(fn, O_RDONLY); 108 | 109 | do { 110 | if (dsize - dcurr < BS) { 111 | dsize = dsize ? dsize * 2 : BS; 112 | data = realloc(data, dsize); 113 | } 114 | 115 | len = read(fd, data + dcurr, BS); 116 | if (len > 0) 117 | dcurr += len; 118 | } while (len > 0); 119 | 120 | if (len == -1) { 121 | perror("read"); 122 | } 123 | 124 | close(fd); 125 | *lenp = dcurr; 126 | return data; 127 | } 128 | 129 | int main(int argc, char **argv) { 130 | uint64_t in_size; 131 | unsigned char *in = load(argv[1], &in_size); 132 | 133 | LLVMFuzzerTestOneInput(in, in_size); 134 | 135 | free(in); 136 | 137 | return 0; 138 | } 139 | #endif 140 | -------------------------------------------------------------------------------- /tests/rANS_static_fuzz.c: -------------------------------------------------------------------------------- 1 | /* Fuzz testing target. */ 2 | /* 3 | * Copyright (c) 2019,2020 Genome Research Ltd. 4 | * Author(s): James Bonfield 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, 10 | * this list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above 13 | * copyright notice, this list of conditions and the following 14 | * disclaimer in the documentation and/or other materials provided 15 | * with the distribution. 16 | * 17 | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 18 | * Institute nor the names of its contributors may be used to endorse 19 | * or promote products derived from this software without specific 20 | * prior written permission. 21 | * 22 | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 23 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 25 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 26 | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | */ 34 | #include "config.h" 35 | 36 | /* 37 | For best results, configure, from a build subdir, to use the address and 38 | undefined behaviour sanitizers, and run "make fuzz". 39 | E.g.: 40 | 41 | ../configure CFLAGS='-g -gdwarf-2 -O3 -Wall -fsanitize=address,undefined' CPPFLAGS='-DUBSAN' 42 | make fuzz 43 | 44 | Run with: 45 | export ASAN_OPTIONS=allow_addr2line=1 46 | export UBSAN_OPTION=halt_on_error=1 47 | tests/rANS_static_fuzz corpus 48 | or 49 | tests/rANS_static_fuzz -detect_leaks=0 corpus 50 | 51 | I generated corpus as a whole bunch of precompressed tiny inputs from 52 | tests/dat/q4 for different compression modes. 53 | 54 | For debugging purposes, we can compile a non-fuzzer non-ASAN build using 55 | -DNOFUZZ which creates a binary we can debug on any libfuzzer generated 56 | output using valgrind. (The rans4x8 command line test won't quite work as 57 | it's a slightly different input format with explicit sizes in the binary 58 | stream.) 59 | */ 60 | 61 | #include 62 | #include 63 | #include 64 | #include 65 | #include 66 | #include 67 | #include 68 | 69 | #include "htscodecs/rANS_static.h" 70 | 71 | int LLVMFuzzerTestOneInput(uint8_t *in, size_t in_size) { 72 | unsigned int uncomp_size; 73 | unsigned char *uncomp = rans_uncompress(in, in_size, &uncomp_size); 74 | if (uncomp) 75 | free(uncomp); 76 | 77 | return 0; 78 | } 79 | 80 | #ifdef NOFUZZ 81 | #include 82 | #include 83 | #include 84 | 85 | #define BS 1024*1024 86 | static unsigned char *load(char *fn, uint64_t *lenp) { 87 | unsigned char *data = NULL; 88 | uint64_t dsize = 0; 89 | uint64_t dcurr = 0; 90 | signed int len; 91 | int fd = open(fn, O_RDONLY); 92 | 93 | do { 94 | if (dsize - dcurr < BS) { 95 | dsize = dsize ? dsize * 2 : BS; 96 | data = realloc(data, dsize); 97 | } 98 | 99 | len = read(fd, data + dcurr, BS); 100 | if (len > 0) 101 | dcurr += len; 102 | } while (len > 0); 103 | 104 | if (len == -1) { 105 | perror("read"); 106 | } 107 | 108 | close(fd); 109 | *lenp = dcurr; 110 | return data; 111 | } 112 | 113 | int main(int argc, char **argv) { 114 | uint64_t in_size; 115 | unsigned char *in = load(argv[1], &in_size); 116 | unsigned int uncomp_size; 117 | unsigned char *uncomp = rans_uncompress(in, in_size, &uncomp_size); 118 | if (uncomp) 119 | free(uncomp); 120 | 121 | free(in); 122 | 123 | return 0; 124 | } 125 | #endif 126 | -------------------------------------------------------------------------------- /tests/rans4x16.test: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | out=test.out 3 | if test ! -d $out 4 | then 5 | mkdir $out 6 | fi 7 | 8 | for f in `ls -1 $srcdir/dat/q* $srcdir/htscodecs-corpus/dat/q* 2>/dev/null` 9 | do 10 | comp=${f%/*/*}/dat/r4x16/${f##*/} 11 | cut -f 1 < $f | tr -d '\012' > $out/r4x16-nl 12 | for o in 0 1 64 65 128 129 192 193 68 69 132 133 196 197 8 9 13 | do 14 | if [ ! -e "$comp.$o" ] 15 | then 16 | continue 17 | fi 18 | printf 'Testing rans4x16 -r -o%s on %s\t' $o "$f" 19 | 20 | # Round trip 21 | ./rans4x16pr -r -o$o $out/r4x16-nl $out/r4x16.comp 2>>$out/r4x16.stderr || exit 1 22 | wc -c < $out/r4x16.comp 23 | ./rans4x16pr -r -d $out/r4x16.comp $out/r4x16.uncomp 2>>$out/r4x16.stderr || exit 1 24 | cmp $out/r4x16-nl $out/r4x16.uncomp || exit 1 25 | 26 | # Precompressed data 27 | ./rans4x16pr -r -d $comp.$o $out/r4x16.uncomp 2>>$out/r4x16.stderr || exit 1 28 | cmp $out/r4x16-nl $out/r4x16.uncomp || exit 1 29 | done 30 | 31 | # 32-way, with cross-compatibility between scalar and SIMD implementations 32 | for o in 4 5 33 | do 34 | printf 'Testing rans4x16 -r -o%s on %s\t' $o "$f" 35 | 36 | # Round trip 37 | # SIMD vs SIMD (auto) 38 | ./rans4x16pr -r -o$o $out/r4x16-nl $out/r4x16.comp 2>>$out/r4x16.stderr || exit 1 39 | wc -c < $out/r4x16.comp 40 | ./rans4x16pr -r -d $out/r4x16.comp $out/r4x16.uncomp 2>>$out/r4x16.stderr || exit 1 41 | cmp $out/r4x16-nl $out/r4x16.uncomp || exit 1 42 | 43 | # Scalar vs scalar 44 | ./rans4x16pr -r -o$o -c 0 $out/r4x16-nl $out/r4x16.comp 2>>$out/r4x16.stderr || exit 1 45 | wc -c < $out/r4x16.comp 46 | ./rans4x16pr -r -d -o$o -c 0 $out/r4x16.comp $out/r4x16.uncomp 2>>$out/r4x16.stderr || exit 1 47 | cmp $out/r4x16-nl $out/r4x16.uncomp || exit 1 48 | 49 | # Scalar vs SIMD 50 | ./rans4x16pr -r -o$o -c 0 $out/r4x16-nl $out/r4x16.comp 2>>$out/r4x16.stderr || exit 1 51 | wc -c < $out/r4x16.comp 52 | ./rans4x16pr -r -d -o$o $out/r4x16.comp $out/r4x16.uncomp 2>>$out/r4x16.stderr || exit 1 53 | cmp $out/r4x16-nl $out/r4x16.uncomp || exit 1 54 | 55 | # SIMD vs Scalar 56 | ./rans4x16pr -r -o$o $out/r4x16-nl $out/r4x16.comp 2>>$out/r4x16.stderr || exit 1 57 | wc -c < $out/r4x16.comp 58 | ./rans4x16pr -r -d -o$o -c 0 $out/r4x16.comp $out/r4x16.uncomp 2>>$out/r4x16.stderr || exit 1 59 | cmp $out/r4x16-nl $out/r4x16.uncomp || exit 1 60 | 61 | # # Precompressed data 62 | if [ ! -e "$comp.$o" ] 63 | then 64 | continue 65 | fi 66 | ./rans4x16pr -r -d $comp.$o $out/r4x16.uncomp 2>>$out/r4x16.stderr || exit 1 67 | cmp $out/r4x16-nl $out/r4x16.uncomp || exit 1 68 | done 69 | done 70 | -------------------------------------------------------------------------------- /tests/rans4x8.test: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | out=test.out 3 | if test ! -d $out 4 | then 5 | mkdir $out 6 | fi 7 | 8 | for f in `ls -1 $srcdir/dat/q* $srcdir/htscodecs-corpus/dat/q* 2>/dev/null` 9 | do 10 | comp=${f%/*/*}/dat/r4x8/${f##*/} 11 | cut -f 1 < $f | tr -d '\012' > $out/r4x8-nl 12 | for o in 0 1 13 | do 14 | printf 'Testing rans4x8 -r -o%s on %s\t' $o "$f" 15 | 16 | # Round trip 17 | ./rans4x8 -r -o$o $out/r4x8-nl $out/r4x8.comp 2>>$out/r4x8.stderr || exit 1 18 | wc -c < $out/r4x8.comp 19 | ./rans4x8 -r -d $out/r4x8.comp $out/r4x8.uncomp 2>>$out/r4x8.stderr || exit 1 20 | cmp $out/r4x8-nl $out/r4x8.uncomp || exit 1 21 | 22 | # Precompressed data 23 | ./rans4x8 -r -d $comp.$o $out/r4x8.uncomp 2>>$out/r4x8.stderr || exit 1 24 | cmp $out/r4x8-nl $out/r4x8.uncomp || exit 1 25 | done 26 | done 27 | -------------------------------------------------------------------------------- /tests/tok3.test: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | out=test.out 3 | if test ! -d $out 4 | then 5 | mkdir $out 6 | fi 7 | 8 | for f in `ls -1 $srcdir/names/*.names $srcdir/htscodecs-corpus/names/*.names 2>/dev/null` 9 | do 10 | comp=${f%/*/*}/names/tok3/${f##*/} 11 | for lvl in 1 3 5 7 9 11 13 15 17 19 12 | do 13 | printf 'Testing tokenise_name3 -r -%s on %s\t' $lvl "$f" 14 | 15 | # Round trip 16 | ./tokenise_name3 -r -$lvl < $f > $out/tok3.comp 17 | wc -c < $out/tok3.comp 18 | ./tokenise_name3 -d -r < $out/tok3.comp | tr '\000' '\012' > $out/tok3.uncomp 19 | cmp $f $out/tok3.uncomp || exit 1 20 | 21 | # Precompressed data 22 | ./tokenise_name3 -d -r < $comp.$lvl | tr '\000' '\012' > $out/tok3.uncomp 23 | cmp $f $out/tok3.uncomp || exit 1 24 | done 25 | echo 26 | done 27 | -------------------------------------------------------------------------------- /tests/tokenise_name3_fuzz.c: -------------------------------------------------------------------------------- 1 | /* Fuzz testing target. */ 2 | /* 3 | * Copyright (c) 2019,2020 Genome Research Ltd. 4 | * Author(s): James Bonfield 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, 10 | * this list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above 13 | * copyright notice, this list of conditions and the following 14 | * disclaimer in the documentation and/or other materials provided 15 | * with the distribution. 16 | * 17 | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 18 | * Institute nor the names of its contributors may be used to endorse 19 | * or promote products derived from this software without specific 20 | * prior written permission. 21 | * 22 | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 23 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 25 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 26 | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | */ 34 | #include "config.h" 35 | 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | 44 | #include "htscodecs/tokenise_name3.h" 45 | 46 | int LLVMFuzzerTestOneInput(uint8_t *in, size_t in_size) { 47 | unsigned int uncomp_size; 48 | unsigned char *uncomp = tok3_decode_names(in, in_size, &uncomp_size); 49 | if (uncomp) 50 | free(uncomp); 51 | 52 | return 0; 53 | } 54 | 55 | #ifdef NOFUZZ 56 | #include 57 | #include 58 | #include 59 | 60 | #define BS 1024*1024 61 | static unsigned char *load(char *fn, uint64_t *lenp) { 62 | unsigned char *data = NULL; 63 | uint64_t dsize = 0; 64 | uint64_t dcurr = 0; 65 | signed int len; 66 | int fd = open(fn, O_RDONLY); 67 | 68 | do { 69 | if (dsize - dcurr < BS) { 70 | dsize = dsize ? dsize * 2 : BS; 71 | data = realloc(data, dsize); 72 | } 73 | 74 | len = read(fd, data + dcurr, BS); 75 | if (len > 0) 76 | dcurr += len; 77 | } while (len > 0); 78 | 79 | if (len == -1) { 80 | perror("read"); 81 | } 82 | 83 | close(fd); 84 | *lenp = dcurr; 85 | return data; 86 | } 87 | 88 | int main(int argc, char **argv) { 89 | uint64_t in_size; 90 | unsigned char *in = load(argv[1], &in_size); 91 | 92 | LLVMFuzzerTestOneInput(in, in_size); 93 | 94 | free(in); 95 | return 0; 96 | } 97 | #endif 98 | -------------------------------------------------------------------------------- /tests/tokenise_name3_fuzzrt.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023 Genome Research Ltd. 3 | * Author(s): James Bonfield 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above 12 | * copyright notice, this list of conditions and the following 13 | * disclaimer in the documentation and/or other materials provided 14 | * with the distribution. 15 | * 16 | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger 17 | * Institute nor the names of its contributors may be used to endorse 18 | * or promote products derived from this software without specific 19 | * prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS 22 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH 25 | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | // Round-trip fuzz testing. While tokenise_name3_fuzz.c tests the name 35 | // decoder when given random input, this tests it can encode and then 36 | // (if an error isn't reported) decode and get back the same content. 37 | // 38 | // It's complicated as we need to construct meta-data for how many names 39 | // we have. 40 | #include "config.h" 41 | 42 | #include 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | #include 50 | #include 51 | #include 52 | #include 53 | #include 54 | #include 55 | #include 56 | 57 | #include "htscodecs/tokenise_name3.h" 58 | 59 | int LLVMFuzzerTestOneInput(const uint8_t *in, size_t in_sz) { 60 | int level, arith; 61 | char in2[8192]; 62 | 63 | // 4096 is default max size for libfuzzer anyway 64 | if (in_sz > 8192) 65 | return -1; 66 | 67 | // Turn newlines to nuls so we can do round-trip testing 68 | // on multi-name data. 69 | int i; 70 | for (i = 0; i < in_sz; i++) 71 | in2[i] = in[i] == '\n' ? 0 : in[i]; 72 | if (in_sz && in2[in_sz-1] > '\n') 73 | in2[in_sz++] = 0; 74 | 75 | for (arith = 0; arith < 2; arith++) { 76 | for (level = 1; level <= 9; level += 8) { // 1 & 9 only 77 | int clen; 78 | uint8_t *cdat = tok3_encode_names((char *)in2, in_sz, level, 79 | arith, &clen, NULL); 80 | if (!cdat) 81 | // skip this input from corpus as it's unparseable 82 | return -1; 83 | 84 | uint32_t ulen; 85 | uint8_t *udat = tok3_decode_names(cdat, clen, &ulen); 86 | if (!udat || ulen != in_sz) 87 | abort(); 88 | 89 | if (memcmp(in2, udat, ulen) != 0) 90 | abort(); 91 | 92 | free(cdat); 93 | free(udat); 94 | } 95 | } 96 | 97 | return 0; 98 | } 99 | --------------------------------------------------------------------------------