├── .cirrus.yml ├── .dockerignore ├── .gitattributes ├── .github ├── ISSUE_TEMPLATE.md ├── mmseqs2_logo.png └── workflows │ ├── Dockerfile.GPU-buster-cross-sbsa │ ├── Dockerfile.GPU-manylinux2014 │ ├── docker.yml │ ├── mac-arm64.yml │ └── test-gpu.yml ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── Dockerfile ├── LICENSE.md ├── README.md ├── azure-pipelines.yml ├── cmake ├── AppendTargetProperty.cmake ├── CheckSSEFeatures.cmake ├── FindASan.cmake ├── FindAtomic.cmake ├── FindMSan.cmake ├── FindTSan.cmake ├── FindUBSan.cmake ├── GetGitRevisionDescription.cmake ├── GetGitRevisionDescription.cmake.in ├── MMseqsResourceCompiler.cmake ├── MMseqsSetupDerivedTarget.cmake ├── MMseqsSetupTest.cmake ├── checkshell.sh └── xxdi.pl ├── data ├── CMakeLists.txt ├── PAM10.out ├── PAM100.out ├── PAM110.out ├── PAM120.out ├── PAM130.out ├── PAM140.out ├── PAM150.out ├── PAM160.out ├── PAM170.out ├── PAM180.out ├── PAM190.out ├── PAM20.out ├── PAM30.out ├── PAM40.out ├── PAM50.out ├── PAM60.out ├── PAM70.out ├── PAM80.out ├── PAM90.out ├── VTML10.out ├── VTML120.out ├── VTML160.out ├── VTML20.out ├── VTML40.out ├── VTML80.out ├── blosum100.out ├── blosum30.out ├── blosum35.out ├── blosum40.out ├── blosum45.out ├── blosum50.out ├── blosum55.out ├── blosum60.out ├── blosum62.out ├── blosum65.out ├── blosum70.out ├── blosum75.out ├── blosum80.out ├── blosum85.out ├── blosum90.out ├── blosum95.out ├── nucleotide.out ├── resources │ ├── CMakeLists.txt │ ├── CovSeqidQscPercMinDiag.lib │ ├── CovSeqidQscPercMinDiagTargetCov.lib │ ├── ExpOpt3_8_polished.cs32.lib │ ├── K4000.crf │ ├── Library255_may17.lib │ ├── cs219.lib │ ├── krona_prelude.html │ ├── libPolished_8.lib │ ├── libPure_blosum62_255.lib │ ├── libPure_blosum62_32.lib │ └── result_viz_prelude.html.zst └── workflow │ ├── CMakeLists.txt │ ├── blastn.sh │ ├── blastp.sh │ ├── blastpgp.sh │ ├── cascaded_clustering.sh │ ├── clustering.sh │ ├── createindex.sh │ ├── createtaxdb.sh │ ├── databases.sh │ ├── easycluster.sh │ ├── easyrbh.sh │ ├── easysearch.sh │ ├── easytaxonomy.sh │ ├── enrich.sh │ ├── iterativepp.sh │ ├── linclust.sh │ ├── linsearch.sh │ ├── map.sh │ ├── multihitdb.sh │ ├── multihitsearch.sh │ ├── nucleotide_clustering.sh │ ├── pickconsensusrep.sh │ ├── rbh.sh │ ├── searchslicedtargetprofile.sh │ ├── searchtargetprofile.sh │ ├── taxonomy.sh │ ├── taxpercontig.sh │ ├── translated_search.sh │ ├── tsv2exprofiledb.sh │ └── update_clustering.sh ├── examples ├── DB.fasta └── QUERY.fasta ├── lib ├── alp │ ├── CMakeLists.txt │ ├── LICENSE │ ├── njn_approx.hpp │ ├── njn_doubletype.hpp │ ├── njn_dynprogprob.cpp │ ├── njn_dynprogprob.hpp │ ├── njn_dynprogproblim.cpp │ ├── njn_dynprogproblim.hpp │ ├── njn_dynprogprobproto.cpp │ ├── njn_dynprogprobproto.hpp │ ├── njn_function.hpp │ ├── njn_integer.hpp │ ├── njn_ioutil.cpp │ ├── njn_ioutil.hpp │ ├── njn_localmaxstat.cpp │ ├── njn_localmaxstat.hpp │ ├── njn_localmaxstatmatrix.cpp │ ├── njn_localmaxstatmatrix.hpp │ ├── njn_localmaxstatutil.cpp │ ├── njn_localmaxstatutil.hpp │ ├── njn_matrix.hpp │ ├── njn_memutil.hpp │ ├── njn_random.cpp │ ├── njn_random.hpp │ ├── njn_root.hpp │ ├── njn_uniform.hpp │ ├── njn_vector.hpp │ ├── readme.txt │ ├── sls_alignment_evaluer.cpp │ ├── sls_alignment_evaluer.hpp │ ├── sls_alp.cpp │ ├── sls_alp.hpp │ ├── sls_alp_data.cpp │ ├── sls_alp_data.hpp │ ├── sls_alp_regression.cpp │ ├── sls_alp_regression.hpp │ ├── sls_alp_sim.cpp │ ├── sls_alp_sim.hpp │ ├── sls_basic.cpp │ ├── sls_basic.hpp │ ├── sls_normal_distr_array.hpp │ ├── sls_pvalues.cpp │ └── sls_pvalues.hpp ├── base64 │ ├── LICENSE │ ├── README.md │ └── base64.h ├── fast_float │ ├── VERSION │ └── fast_float.h ├── fmt │ ├── LICENSE │ ├── README.md │ ├── VERSION │ └── fmt │ │ ├── args.h │ │ ├── base.h │ │ ├── chrono.h │ │ ├── color.h │ │ ├── compile.h │ │ ├── core.h │ │ ├── format-inl.h │ │ ├── format.h │ │ ├── os.h │ │ ├── ostream.h │ │ ├── printf.h │ │ ├── ranges.h │ │ ├── std.h │ │ └── xchar.h ├── ips4o │ ├── LICENSE │ ├── README.md │ ├── ips4o.hpp │ └── ips4o │ │ ├── base_case.hpp │ │ ├── block_permutation.hpp │ │ ├── bucket_pointers.hpp │ │ ├── buffers.hpp │ │ ├── classifier.hpp │ │ ├── cleanup_margins.hpp │ │ ├── config.hpp │ │ ├── empty_block_movement.hpp │ │ ├── ips4o.hpp │ │ ├── ips4o_fwd.hpp │ │ ├── local_classification.hpp │ │ ├── memory.hpp │ │ ├── parallel.hpp │ │ ├── partitioning.hpp │ │ ├── sampling.hpp │ │ ├── sequential.hpp │ │ ├── synchronization.hpp │ │ ├── thread_pool.hpp │ │ └── utils.hpp ├── ksw2 │ ├── CMakeLists.txt │ ├── LICENSE.txt │ ├── README.md │ ├── kseq.h │ ├── ksw2.h │ └── ksw2_extz2_sse.cpp ├── libmarv │ ├── .gitignore │ ├── LICENSE │ ├── Makefile │ ├── Readme.md │ ├── allqueries.fasta │ ├── src │ │ ├── CMakeLists.txt │ │ ├── base64.h │ │ ├── benchmarking.cuh │ │ ├── blosum.cu │ │ ├── blosum.hpp │ │ ├── config.hpp │ │ ├── convert.cuh │ │ ├── cudasw4.cuh │ │ ├── dbbatching.cuh │ │ ├── dbdata.cpp │ │ ├── dbdata.hpp │ │ ├── gapless_kernel_config.cuh │ │ ├── gpudatabaseallocation.cuh │ │ ├── hpc_helpers │ │ │ ├── all_helpers.cuh │ │ │ ├── coop_group_helpers.cuh │ │ │ ├── cuda_helpers.cuh │ │ │ ├── cuda_raiiwrappers.cuh │ │ │ ├── custom_thrust_allocators.cuh │ │ │ ├── hashers.cuh │ │ │ ├── hpc_helpers.h │ │ │ ├── io_helpers.h │ │ │ ├── nvtx_markers.cuh │ │ │ ├── packed_types.cuh │ │ │ ├── peer_access.cuh │ │ │ ├── simple_allocation.cuh │ │ │ ├── timers.cuh │ │ │ ├── type_helpers.h │ │ │ └── utility_kernels.cuh │ │ ├── kernelhelpers.cuh │ │ ├── kernels.cuh │ │ ├── kseqpp │ │ │ ├── filereader.hpp │ │ │ ├── gziphelpers.hpp │ │ │ └── kseqpp.hpp │ │ ├── length_partitions.hpp │ │ ├── main.cu │ │ ├── makedb.cpp │ │ ├── mapped_file.hpp │ │ ├── marv.cu │ │ ├── marv.h │ │ ├── mathops.cuh │ │ ├── mmapbuffer.hpp │ │ ├── options.cpp │ │ ├── options.hpp │ │ ├── pssm.cuh │ │ ├── pssmkernels_gapless.cuh │ │ ├── pssmkernels_gapless_instantiation_dpx.cu │ │ ├── pssmkernels_gapless_instantiation_dpx_kernelparamzero.cu │ │ ├── pssmkernels_gapless_instantiation_half2.cu │ │ ├── pssmkernels_gapless_instantiation_half2_kernelparamzero.cu │ │ ├── pssmkernels_smithwaterman.cuh │ │ ├── pssmkernels_smithwaterman_instantiation_dpx.cu │ │ ├── pssmkernels_smithwaterman_instantiation_float.cu │ │ ├── sequence_io.cpp │ │ ├── sequence_io.h │ │ ├── smithwaterman_kernel_config.cuh │ │ ├── target_subject_ids.cuh │ │ ├── tileconfigsearch.cu │ │ ├── types.hpp │ │ └── util.cuh │ └── tuningconfigs │ │ ├── A100 │ │ ├── gapless.txt │ │ └── swendpos.txt │ │ ├── GraceHopper │ │ ├── gapless.txt │ │ └── swendpos.txt │ │ ├── H100 │ │ ├── gapless.txt │ │ └── swendpos.txt │ │ ├── L4 │ │ ├── gapless.txt │ │ └── swendpos.txt │ │ ├── L40S │ │ ├── gapless.txt │ │ └── swendpos.txt │ │ ├── RTX4090 │ │ ├── gapless.txt │ │ └── swendpos.txt │ │ └── T4 │ │ ├── gapless.txt │ │ └── swendpos.txt ├── microtar │ ├── CMakeLists.txt │ ├── LICENSE │ ├── README.md │ ├── microtar.c │ └── microtar.h ├── nedmalloc │ ├── CMakeLists.txt │ ├── License.txt │ ├── Readme.txt │ ├── malloc.c.h │ ├── nedmalloc.c │ └── nedmalloc.h ├── simd │ └── simd.h ├── simde │ └── simde │ │ ├── COPYING │ │ ├── README.md │ │ ├── arm │ │ ├── neon.h │ │ ├── neon │ │ │ ├── aba.h │ │ │ ├── abd.h │ │ │ ├── abdl.h │ │ │ ├── abs.h │ │ │ ├── add.h │ │ │ ├── addhn.h │ │ │ ├── addl.h │ │ │ ├── addl_high.h │ │ │ ├── addlv.h │ │ │ ├── addv.h │ │ │ ├── addw.h │ │ │ ├── addw_high.h │ │ │ ├── and.h │ │ │ ├── bcax.h │ │ │ ├── bic.h │ │ │ ├── bsl.h │ │ │ ├── cage.h │ │ │ ├── cagt.h │ │ │ ├── ceq.h │ │ │ ├── ceqz.h │ │ │ ├── cge.h │ │ │ ├── cgez.h │ │ │ ├── cgt.h │ │ │ ├── cgtz.h │ │ │ ├── cle.h │ │ │ ├── clez.h │ │ │ ├── cls.h │ │ │ ├── clt.h │ │ │ ├── cltz.h │ │ │ ├── clz.h │ │ │ ├── cmla.h │ │ │ ├── cmla_rot180.h │ │ │ ├── cmla_rot270.h │ │ │ ├── cmla_rot90.h │ │ │ ├── cnt.h │ │ │ ├── combine.h │ │ │ ├── create.h │ │ │ ├── cvt.h │ │ │ ├── dot.h │ │ │ ├── dot_lane.h │ │ │ ├── dup_lane.h │ │ │ ├── dup_n.h │ │ │ ├── eor.h │ │ │ ├── ext.h │ │ │ ├── fma.h │ │ │ ├── fma_lane.h │ │ │ ├── fma_n.h │ │ │ ├── get_high.h │ │ │ ├── get_lane.h │ │ │ ├── get_low.h │ │ │ ├── hadd.h │ │ │ ├── hsub.h │ │ │ ├── ld1.h │ │ │ ├── ld1_dup.h │ │ │ ├── ld1_lane.h │ │ │ ├── ld2.h │ │ │ ├── ld3.h │ │ │ ├── ld4.h │ │ │ ├── ld4_lane.h │ │ │ ├── max.h │ │ │ ├── maxnm.h │ │ │ ├── maxv.h │ │ │ ├── min.h │ │ │ ├── minnm.h │ │ │ ├── minv.h │ │ │ ├── mla.h │ │ │ ├── mla_n.h │ │ │ ├── mlal.h │ │ │ ├── mlal_high.h │ │ │ ├── mlal_high_n.h │ │ │ ├── mlal_lane.h │ │ │ ├── mlal_n.h │ │ │ ├── mls.h │ │ │ ├── mls_n.h │ │ │ ├── mlsl.h │ │ │ ├── mlsl_high.h │ │ │ ├── mlsl_high_n.h │ │ │ ├── mlsl_lane.h │ │ │ ├── mlsl_n.h │ │ │ ├── movl.h │ │ │ ├── movl_high.h │ │ │ ├── movn.h │ │ │ ├── movn_high.h │ │ │ ├── mul.h │ │ │ ├── mul_lane.h │ │ │ ├── mul_n.h │ │ │ ├── mull.h │ │ │ ├── mull_high.h │ │ │ ├── mull_lane.h │ │ │ ├── mull_n.h │ │ │ ├── mvn.h │ │ │ ├── neg.h │ │ │ ├── orn.h │ │ │ ├── orr.h │ │ │ ├── padal.h │ │ │ ├── padd.h │ │ │ ├── paddl.h │ │ │ ├── pmax.h │ │ │ ├── pmin.h │ │ │ ├── qabs.h │ │ │ ├── qadd.h │ │ │ ├── qdmulh.h │ │ │ ├── qdmulh_lane.h │ │ │ ├── qdmulh_n.h │ │ │ ├── qdmull.h │ │ │ ├── qmovn.h │ │ │ ├── qmovn_high.h │ │ │ ├── qmovun.h │ │ │ ├── qneg.h │ │ │ ├── qrdmulh.h │ │ │ ├── qrdmulh_lane.h │ │ │ ├── qrdmulh_n.h │ │ │ ├── qrshrn_n.h │ │ │ ├── qrshrun_n.h │ │ │ ├── qshl.h │ │ │ ├── qshlu_n.h │ │ │ ├── qshrn_n.h │ │ │ ├── qshrun_n.h │ │ │ ├── qsub.h │ │ │ ├── qtbl.h │ │ │ ├── qtbx.h │ │ │ ├── rbit.h │ │ │ ├── recpe.h │ │ │ ├── recps.h │ │ │ ├── reinterpret.h │ │ │ ├── rev16.h │ │ │ ├── rev32.h │ │ │ ├── rev64.h │ │ │ ├── rhadd.h │ │ │ ├── rnd.h │ │ │ ├── rndi.h │ │ │ ├── rndm.h │ │ │ ├── rndn.h │ │ │ ├── rndp.h │ │ │ ├── rshl.h │ │ │ ├── rshr_n.h │ │ │ ├── rshrn_n.h │ │ │ ├── rsqrte.h │ │ │ ├── rsqrts.h │ │ │ ├── rsra_n.h │ │ │ ├── set_lane.h │ │ │ ├── shl.h │ │ │ ├── shl_n.h │ │ │ ├── shll_n.h │ │ │ ├── shr_n.h │ │ │ ├── shrn_n.h │ │ │ ├── sqadd.h │ │ │ ├── sra_n.h │ │ │ ├── sri_n.h │ │ │ ├── st1.h │ │ │ ├── st1_lane.h │ │ │ ├── st2.h │ │ │ ├── st2_lane.h │ │ │ ├── st3.h │ │ │ ├── st3_lane.h │ │ │ ├── st4.h │ │ │ ├── st4_lane.h │ │ │ ├── sub.h │ │ │ ├── subhn.h │ │ │ ├── subl.h │ │ │ ├── subl_high.h │ │ │ ├── subw.h │ │ │ ├── subw_high.h │ │ │ ├── tbl.h │ │ │ ├── tbx.h │ │ │ ├── trn.h │ │ │ ├── trn1.h │ │ │ ├── trn2.h │ │ │ ├── tst.h │ │ │ ├── types.h │ │ │ ├── uqadd.h │ │ │ ├── uzp.h │ │ │ ├── uzp1.h │ │ │ ├── uzp2.h │ │ │ ├── xar.h │ │ │ ├── zip.h │ │ │ ├── zip1.h │ │ │ └── zip2.h │ │ ├── sve.h │ │ └── sve │ │ │ ├── add.h │ │ │ ├── and.h │ │ │ ├── cmplt.h │ │ │ ├── cnt.h │ │ │ ├── dup.h │ │ │ ├── ld1.h │ │ │ ├── ptest.h │ │ │ ├── ptrue.h │ │ │ ├── qadd.h │ │ │ ├── reinterpret.h │ │ │ ├── sel.h │ │ │ ├── st1.h │ │ │ ├── sub.h │ │ │ ├── types.h │ │ │ └── whilelt.h │ │ ├── check.h │ │ ├── debug-trap.h │ │ ├── hedley.h │ │ ├── mips │ │ ├── msa.h │ │ └── msa │ │ │ ├── add_a.h │ │ │ ├── adds.h │ │ │ ├── adds_a.h │ │ │ ├── addv.h │ │ │ ├── addvi.h │ │ │ ├── and.h │ │ │ ├── andi.h │ │ │ ├── ld.h │ │ │ ├── madd.h │ │ │ ├── st.h │ │ │ ├── subv.h │ │ │ └── types.h │ │ ├── simde-align.h │ │ ├── simde-arch.h │ │ ├── simde-common.h │ │ ├── simde-complex.h │ │ ├── simde-constify.h │ │ ├── simde-detect-clang.h │ │ ├── simde-diagnostic.h │ │ ├── simde-f16.h │ │ ├── simde-features.h │ │ ├── simde-math.h │ │ ├── wasm │ │ ├── relaxed-simd.h │ │ └── simd128.h │ │ └── x86 │ │ ├── avx.h │ │ ├── avx2.h │ │ ├── avx512.h │ │ ├── avx512 │ │ ├── 2intersect.h │ │ ├── 4dpwssd.h │ │ ├── 4dpwssds.h │ │ ├── abs.h │ │ ├── add.h │ │ ├── adds.h │ │ ├── and.h │ │ ├── andnot.h │ │ ├── avg.h │ │ ├── bitshuffle.h │ │ ├── blend.h │ │ ├── broadcast.h │ │ ├── cast.h │ │ ├── cmp.h │ │ ├── cmpeq.h │ │ ├── cmpge.h │ │ ├── cmpgt.h │ │ ├── cmple.h │ │ ├── cmplt.h │ │ ├── cmpneq.h │ │ ├── compress.h │ │ ├── conflict.h │ │ ├── copysign.h │ │ ├── cvt.h │ │ ├── cvts.h │ │ ├── cvtt.h │ │ ├── dbsad.h │ │ ├── div.h │ │ ├── dpbf16.h │ │ ├── dpbusd.h │ │ ├── dpbusds.h │ │ ├── dpwssd.h │ │ ├── dpwssds.h │ │ ├── expand.h │ │ ├── extract.h │ │ ├── fixupimm.h │ │ ├── fixupimm_round.h │ │ ├── flushsubnormal.h │ │ ├── fmadd.h │ │ ├── fmsub.h │ │ ├── fnmadd.h │ │ ├── fnmsub.h │ │ ├── insert.h │ │ ├── kshift.h │ │ ├── load.h │ │ ├── loadu.h │ │ ├── lzcnt.h │ │ ├── madd.h │ │ ├── maddubs.h │ │ ├── max.h │ │ ├── min.h │ │ ├── mov.h │ │ ├── mov_mask.h │ │ ├── movm.h │ │ ├── mul.h │ │ ├── mulhi.h │ │ ├── mulhrs.h │ │ ├── mullo.h │ │ ├── multishift.h │ │ ├── negate.h │ │ ├── or.h │ │ ├── packs.h │ │ ├── packus.h │ │ ├── permutex2var.h │ │ ├── permutexvar.h │ │ ├── popcnt.h │ │ ├── range.h │ │ ├── range_round.h │ │ ├── rol.h │ │ ├── rolv.h │ │ ├── ror.h │ │ ├── rorv.h │ │ ├── round.h │ │ ├── roundscale.h │ │ ├── roundscale_round.h │ │ ├── sad.h │ │ ├── scalef.h │ │ ├── set.h │ │ ├── set1.h │ │ ├── set4.h │ │ ├── setone.h │ │ ├── setr.h │ │ ├── setr4.h │ │ ├── setzero.h │ │ ├── shldv.h │ │ ├── shuffle.h │ │ ├── sll.h │ │ ├── slli.h │ │ ├── sllv.h │ │ ├── sqrt.h │ │ ├── sra.h │ │ ├── srai.h │ │ ├── srav.h │ │ ├── srl.h │ │ ├── srli.h │ │ ├── srlv.h │ │ ├── store.h │ │ ├── storeu.h │ │ ├── sub.h │ │ ├── subs.h │ │ ├── ternarylogic.h │ │ ├── test.h │ │ ├── testn.h │ │ ├── types.h │ │ ├── unpackhi.h │ │ ├── unpacklo.h │ │ ├── xor.h │ │ └── xorsign.h │ │ ├── clmul.h │ │ ├── f16c.h │ │ ├── fma.h │ │ ├── gfni.h │ │ ├── mmx.h │ │ ├── sse.h │ │ ├── sse2.h │ │ ├── sse3.h │ │ ├── sse4.1.h │ │ ├── sse4.2.h │ │ ├── ssse3.h │ │ ├── svml.h │ │ └── xop.h ├── tantan │ ├── CMakeLists.txt │ ├── mcf_simd.h │ ├── tantan.cpp │ └── tantan.h ├── tinyexpr │ ├── .travis.yml │ ├── CMakeLists.txt │ ├── CONTRIBUTING │ ├── LICENSE │ ├── Makefile │ ├── README.md │ ├── benchmark.c │ ├── doc │ │ ├── e1.dot │ │ ├── e1.png │ │ ├── e2.dot │ │ └── e2.png │ ├── example.c │ ├── example2.c │ ├── example3.c │ ├── minctest.h │ ├── test.c │ ├── tinyexpr.c │ └── tinyexpr.h ├── xxhash │ ├── LICENSE │ ├── xxhash.c │ └── xxhash.h └── zstd │ ├── .buckconfig │ ├── .buckversion │ ├── .circleci │ └── config.yml │ ├── .gitattributes │ ├── .gitignore │ ├── .travis.yml │ ├── CHANGELOG │ ├── CODE_OF_CONDUCT.md │ ├── CONTRIBUTING.md │ ├── COPYING │ ├── LICENSE │ ├── Makefile │ ├── README.md │ ├── TESTING.md │ ├── appveyor.yml │ ├── build │ ├── .gitignore │ ├── README.md │ ├── VS2008 │ │ ├── fullbench │ │ │ └── fullbench.vcproj │ │ ├── fuzzer │ │ │ └── fuzzer.vcproj │ │ ├── zstd.sln │ │ ├── zstd │ │ │ └── zstd.vcproj │ │ └── zstdlib │ │ │ └── zstdlib.vcproj │ ├── VS2010 │ │ ├── CompileAsCpp.props │ │ ├── datagen │ │ │ └── datagen.vcxproj │ │ ├── fullbench-dll │ │ │ └── fullbench-dll.vcxproj │ │ ├── fullbench │ │ │ └── fullbench.vcxproj │ │ ├── fuzzer │ │ │ └── fuzzer.vcxproj │ │ ├── libzstd-dll │ │ │ ├── libzstd-dll.rc │ │ │ └── libzstd-dll.vcxproj │ │ ├── libzstd │ │ │ └── libzstd.vcxproj │ │ ├── zstd.sln │ │ └── zstd │ │ │ ├── zstd.rc │ │ │ └── zstd.vcxproj │ ├── VS_scripts │ │ ├── README.md │ │ ├── build.VS2010.cmd │ │ ├── build.VS2012.cmd │ │ ├── build.VS2013.cmd │ │ ├── build.VS2015.cmd │ │ ├── build.VS2017.cmd │ │ ├── build.VS2017Community.cmd │ │ ├── build.VS2017Enterprise.cmd │ │ ├── build.VS2017Professional.cmd │ │ └── build.generic.cmd │ ├── cmake │ │ ├── .gitignore │ │ ├── CMakeLists.txt │ │ ├── CMakeModules │ │ │ ├── AddZstdCompilationFlags.cmake │ │ │ └── GetZstdLibraryVersion.cmake │ │ ├── contrib │ │ │ ├── CMakeLists.txt │ │ │ ├── gen_html │ │ │ │ └── CMakeLists.txt │ │ │ └── pzstd │ │ │ │ └── CMakeLists.txt │ │ ├── lib │ │ │ ├── .gitignore │ │ │ ├── CMakeLists.txt │ │ │ ├── cmake_uninstall.cmake.in │ │ │ └── pkgconfig.cmake │ │ ├── programs │ │ │ ├── .gitignore │ │ │ └── CMakeLists.txt │ │ └── tests │ │ │ ├── .gitignore │ │ │ └── CMakeLists.txt │ └── meson │ │ ├── GetZstdLibraryVersion.py │ │ ├── InstallSymlink.py │ │ ├── README.md │ │ ├── contrib │ │ ├── gen_html │ │ │ └── meson.build │ │ ├── meson.build │ │ └── pzstd │ │ │ └── meson.build │ │ ├── lib │ │ └── meson.build │ │ ├── meson.build │ │ ├── meson_options.txt │ │ ├── programs │ │ └── meson.build │ │ └── tests │ │ ├── meson.build │ │ └── valgrindTest.py │ ├── contrib │ ├── VS2005 │ │ ├── README.md │ │ ├── fullbench │ │ │ └── fullbench.vcproj │ │ ├── fuzzer │ │ │ └── fuzzer.vcproj │ │ ├── zstd.sln │ │ ├── zstd │ │ │ └── zstd.vcproj │ │ └── zstdlib │ │ │ └── zstdlib.vcproj │ ├── adaptive-compression │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── README.md │ │ ├── adapt.c │ │ ├── datagencli.c │ │ ├── test-correctness.sh │ │ └── test-performance.sh │ ├── cleanTabs │ ├── docker │ │ ├── Dockerfile │ │ └── README.md │ ├── experimental_dict_builders │ │ ├── benchmarkDictBuilder │ │ │ ├── Makefile │ │ │ ├── README.md │ │ │ ├── benchmark.c │ │ │ ├── dictBuilder.h │ │ │ └── test.sh │ │ ├── fastCover │ │ │ ├── Makefile │ │ │ ├── README.md │ │ │ ├── fastCover.c │ │ │ ├── fastCover.h │ │ │ ├── main.c │ │ │ └── test.sh │ │ └── randomDictBuilder │ │ │ ├── Makefile │ │ │ ├── README.md │ │ │ ├── io.c │ │ │ ├── io.h │ │ │ ├── main.c │ │ │ ├── random.c │ │ │ ├── random.h │ │ │ └── test.sh │ ├── gen_html │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── README.md │ │ ├── gen-zstd-manual.sh │ │ └── gen_html.cpp │ ├── largeNbDicts │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── README.md │ │ └── largeNbDicts.c │ ├── linux-kernel │ │ ├── .gitignore │ │ ├── 0000-cover-letter.patch │ │ ├── 0001-lib-Add-xxhash-module.patch │ │ ├── 0002-lib-Add-zstd-modules.patch │ │ ├── 0003-btrfs-Add-zstd-support.patch │ │ ├── 0004-squashfs-Add-zstd-support.patch │ │ ├── 0005-crypto-Add-zstd-support.patch │ │ ├── 0006-squashfs-tools-Add-zstd-support.patch │ │ ├── COPYING │ │ ├── README.md │ │ ├── btrfs-benchmark.sh │ │ ├── btrfs-extract-benchmark.sh │ │ ├── fs │ │ │ ├── btrfs │ │ │ │ └── zstd.c │ │ │ └── squashfs │ │ │ │ └── zstd_wrapper.c │ │ ├── include │ │ │ └── linux │ │ │ │ ├── xxhash.h │ │ │ │ └── zstd.h │ │ ├── kernelize.sh │ │ ├── lib │ │ │ ├── Kconfig.diff │ │ │ ├── Makefile.diff │ │ │ ├── xxhash.c │ │ │ └── zstd │ │ │ │ ├── .clang-format │ │ │ │ ├── Makefile │ │ │ │ ├── bitstream.h │ │ │ │ ├── compress.c │ │ │ │ ├── decompress.c │ │ │ │ ├── entropy_common.c │ │ │ │ ├── error_private.h │ │ │ │ ├── fse.h │ │ │ │ ├── fse_compress.c │ │ │ │ ├── fse_decompress.c │ │ │ │ ├── huf.h │ │ │ │ ├── huf_compress.c │ │ │ │ ├── huf_decompress.c │ │ │ │ ├── mem.h │ │ │ │ ├── zstd_common.c │ │ │ │ ├── zstd_internal.h │ │ │ │ └── zstd_opt.h │ │ ├── squashfs-benchmark.sh │ │ ├── test │ │ │ ├── .gitignore │ │ │ ├── DecompressCrash.c │ │ │ ├── Makefile │ │ │ ├── RoundTripCrash.c │ │ │ ├── UserlandTest.cpp │ │ │ ├── XXHashUserlandTest.cpp │ │ │ └── include │ │ │ │ ├── asm │ │ │ │ └── unaligned.h │ │ │ │ └── linux │ │ │ │ ├── compiler.h │ │ │ │ ├── errno.h │ │ │ │ ├── kernel.h │ │ │ │ ├── math64.h │ │ │ │ ├── module.h │ │ │ │ ├── string.h │ │ │ │ └── types.h │ │ ├── xxhash_test.c │ │ ├── zstd_compress_test.c │ │ └── zstd_decompress_test.c │ ├── premake │ │ ├── premake4.lua │ │ └── zstd.lua │ ├── pzstd │ │ ├── .gitignore │ │ ├── BUCK │ │ ├── ErrorHolder.h │ │ ├── Logging.h │ │ ├── Makefile │ │ ├── Options.cpp │ │ ├── Options.h │ │ ├── Pzstd.cpp │ │ ├── Pzstd.h │ │ ├── README.md │ │ ├── SkippableFrame.cpp │ │ ├── SkippableFrame.h │ │ ├── images │ │ │ ├── Cspeed.png │ │ │ └── Dspeed.png │ │ ├── main.cpp │ │ ├── test │ │ │ ├── BUCK │ │ │ ├── OptionsTest.cpp │ │ │ ├── PzstdTest.cpp │ │ │ ├── RoundTrip.h │ │ │ └── RoundTripTest.cpp │ │ └── utils │ │ │ ├── BUCK │ │ │ ├── Buffer.h │ │ │ ├── FileSystem.h │ │ │ ├── Likely.h │ │ │ ├── Range.h │ │ │ ├── ResourcePool.h │ │ │ ├── ScopeGuard.h │ │ │ ├── ThreadPool.h │ │ │ ├── WorkQueue.h │ │ │ └── test │ │ │ ├── BUCK │ │ │ ├── BufferTest.cpp │ │ │ ├── RangeTest.cpp │ │ │ ├── ResourcePoolTest.cpp │ │ │ ├── ScopeGuardTest.cpp │ │ │ ├── ThreadPoolTest.cpp │ │ │ └── WorkQueueTest.cpp │ ├── seekable_format │ │ ├── examples │ │ │ ├── .gitignore │ │ │ ├── Makefile │ │ │ ├── parallel_compression.c │ │ │ ├── parallel_processing.c │ │ │ ├── seekable_compression.c │ │ │ └── seekable_decompression.c │ │ ├── zstd_seekable.h │ │ ├── zstd_seekable_compression_format.md │ │ ├── zstdseek_compress.c │ │ └── zstdseek_decompress.c │ └── snap │ │ └── snapcraft.yaml │ ├── doc │ ├── README.md │ ├── educational_decoder │ │ ├── Makefile │ │ ├── README.md │ │ ├── harness.c │ │ ├── zstd_decompress.c │ │ └── zstd_decompress.h │ ├── images │ │ ├── CSpeed2.png │ │ ├── DCspeed5.png │ │ ├── DSpeed3.png │ │ ├── cdict_v136.png │ │ ├── dict-cr.png │ │ ├── dict-cs.png │ │ ├── dict-ds.png │ │ ├── zstd_cdict_v1_3_5.png │ │ └── zstd_logo86.png │ ├── zstd_compression_format.md │ └── zstd_manual.html │ ├── examples │ ├── .gitignore │ ├── Makefile │ ├── README.md │ ├── dictionary_compression.c │ ├── dictionary_decompression.c │ ├── multiple_streaming_compression.c │ ├── simple_compression.c │ ├── simple_decompression.c │ ├── streaming_compression.c │ ├── streaming_decompression.c │ └── streaming_memory_usage.c │ ├── lib │ ├── .gitignore │ ├── BUCK │ ├── Makefile │ ├── README.md │ ├── common │ │ ├── bitstream.h │ │ ├── compiler.h │ │ ├── cpu.h │ │ ├── debug.c │ │ ├── debug.h │ │ ├── entropy_common.c │ │ ├── error_private.c │ │ ├── error_private.h │ │ ├── fse.h │ │ ├── fse_decompress.c │ │ ├── huf.h │ │ ├── mem.h │ │ ├── pool.c │ │ ├── pool.h │ │ ├── threading.c │ │ ├── threading.h │ │ ├── xxhash.c │ │ ├── xxhash.h │ │ ├── zstd_common.c │ │ ├── zstd_errors.h │ │ └── zstd_internal.h │ ├── compress │ │ ├── fse_compress.c │ │ ├── hist.c │ │ ├── hist.h │ │ ├── huf_compress.c │ │ ├── zstd_compress.c │ │ ├── zstd_compress_internal.h │ │ ├── zstd_double_fast.c │ │ ├── zstd_double_fast.h │ │ ├── zstd_fast.c │ │ ├── zstd_fast.h │ │ ├── zstd_lazy.c │ │ ├── zstd_lazy.h │ │ ├── zstd_ldm.c │ │ ├── zstd_ldm.h │ │ ├── zstd_opt.c │ │ ├── zstd_opt.h │ │ ├── zstdmt_compress.c │ │ └── zstdmt_compress.h │ ├── decompress │ │ ├── huf_decompress.c │ │ ├── zstd_ddict.c │ │ ├── zstd_ddict.h │ │ ├── zstd_decompress.c │ │ ├── zstd_decompress_block.c │ │ ├── zstd_decompress_block.h │ │ └── zstd_decompress_internal.h │ ├── deprecated │ │ ├── zbuff.h │ │ ├── zbuff_common.c │ │ ├── zbuff_compress.c │ │ └── zbuff_decompress.c │ ├── dictBuilder │ │ ├── cover.c │ │ ├── cover.h │ │ ├── divsufsort.c │ │ ├── divsufsort.h │ │ ├── fastcover.c │ │ ├── zdict.c │ │ └── zdict.h │ ├── dll │ │ ├── example │ │ │ ├── Makefile │ │ │ ├── README.md │ │ │ ├── build_package.bat │ │ │ ├── fullbench-dll.sln │ │ │ └── fullbench-dll.vcxproj │ │ └── libzstd.def │ ├── legacy │ │ ├── zstd_legacy.h │ │ ├── zstd_v01.c │ │ ├── zstd_v01.h │ │ ├── zstd_v02.c │ │ ├── zstd_v02.h │ │ ├── zstd_v03.c │ │ ├── zstd_v03.h │ │ ├── zstd_v04.c │ │ ├── zstd_v04.h │ │ ├── zstd_v05.c │ │ ├── zstd_v05.h │ │ ├── zstd_v06.c │ │ ├── zstd_v06.h │ │ ├── zstd_v07.c │ │ └── zstd_v07.h │ ├── libzstd.pc.in │ └── zstd.h │ ├── programs │ ├── .gitignore │ ├── BUCK │ ├── Makefile │ ├── README.md │ ├── benchfn.c │ ├── benchfn.h │ ├── benchzstd.c │ ├── benchzstd.h │ ├── datagen.c │ ├── datagen.h │ ├── dibio.c │ ├── dibio.h │ ├── fileio.c │ ├── fileio.h │ ├── platform.h │ ├── util.c │ ├── util.h │ ├── windres │ │ ├── generate_res.bat │ │ ├── verrsrc.h │ │ ├── zstd.rc │ │ ├── zstd32.res │ │ └── zstd64.res │ ├── zstd.1 │ ├── zstd.1.md │ ├── zstdcli.c │ ├── zstdgrep │ ├── zstdgrep.1 │ ├── zstdgrep.1.md │ ├── zstdless │ ├── zstdless.1 │ └── zstdless.1.md │ ├── tests │ ├── .gitignore │ ├── Makefile │ ├── README.md │ ├── checkTag.c │ ├── datagencli.c │ ├── decodecorpus.c │ ├── files │ │ └── huffman-compressed-larger │ ├── fullbench.c │ ├── fuzz │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── README.md │ │ ├── block_decompress.c │ │ ├── block_round_trip.c │ │ ├── default.options │ │ ├── fuzz.h │ │ ├── fuzz.py │ │ ├── fuzz_helpers.h │ │ ├── regression_driver.c │ │ ├── simple_decompress.c │ │ ├── simple_round_trip.c │ │ ├── stream_decompress.c │ │ ├── stream_round_trip.c │ │ ├── zstd_helpers.c │ │ └── zstd_helpers.h │ ├── fuzzer.c │ ├── gzip │ │ ├── Makefile │ │ ├── gzip-env.sh │ │ ├── helin-segv.sh │ │ ├── help-version.sh │ │ ├── hufts-segv.gz │ │ ├── hufts.sh │ │ ├── init.cfg │ │ ├── init.sh │ │ ├── keep.sh │ │ ├── list.sh │ │ ├── memcpy-abuse.sh │ │ ├── mixed.sh │ │ ├── null-suffix-clobber.sh │ │ ├── stdin.sh │ │ ├── test-driver.sh │ │ ├── trailing-nul.sh │ │ ├── unpack-invalid.sh │ │ ├── z-suffix.sh │ │ ├── zdiff.sh │ │ ├── zgrep-context.sh │ │ ├── zgrep-f.sh │ │ ├── zgrep-signal.sh │ │ └── znew-k.sh │ ├── invalidDictionaries.c │ ├── legacy.c │ ├── libzstd_partial_builds.sh │ ├── longmatch.c │ ├── paramgrill.c │ ├── playTests.sh │ ├── poolTests.c │ ├── rateLimiter.py │ ├── regression │ │ ├── Makefile │ │ ├── config.c │ │ ├── config.h │ │ ├── data.c │ │ ├── data.h │ │ ├── levels.h │ │ ├── method.c │ │ ├── method.h │ │ ├── result.c │ │ ├── result.h │ │ ├── results.csv │ │ └── test.c │ ├── roundTripCrash.c │ ├── seqgen.c │ ├── seqgen.h │ ├── symbols.c │ ├── test-zstd-speed.py │ ├── test-zstd-versions.py │ ├── zbufftest.c │ └── zstreamtest.c │ └── zlibWrapper │ ├── .gitignore │ ├── BUCK │ ├── Makefile │ ├── README.md │ ├── examples │ ├── example.c │ ├── example_original.c │ ├── fitblk.c │ ├── fitblk_original.c │ ├── minigzip.c │ └── zwrapbench.c │ ├── gzclose.c │ ├── gzcompatibility.h │ ├── gzguts.h │ ├── gzlib.c │ ├── gzread.c │ ├── gzwrite.c │ ├── zstd_zlibwrapper.c │ └── zstd_zlibwrapper.h ├── src ├── CMakeLists.txt ├── CommandDeclarations.h ├── MMseqsBase.cpp ├── alignment │ ├── Alignment.cpp │ ├── Alignment.h │ ├── BandedNucleotideAligner.cpp │ ├── BandedNucleotideAligner.h │ ├── CMakeLists.txt │ ├── CompressedA3M.cpp │ ├── CompressedA3M.h │ ├── DistanceCalculator.h │ ├── EvalueComputation.h │ ├── Main.cpp │ ├── Matcher.cpp │ ├── Matcher.h │ ├── MsaFilter.cpp │ ├── MsaFilter.h │ ├── MultipleAlignment.cpp │ ├── MultipleAlignment.h │ ├── PSSMCalculator.cpp │ ├── PSSMCalculator.h │ ├── StripedSmithWaterman.cpp │ ├── StripedSmithWaterman.h │ └── rescorediagonal.cpp ├── clustering │ ├── AlignmentSymmetry.cpp │ ├── AlignmentSymmetry.h │ ├── CMakeLists.txt │ ├── Clustering.cpp │ ├── Clustering.h │ ├── ClusteringAlgorithms.cpp │ ├── ClusteringAlgorithms.h │ └── Main.cpp ├── commons │ ├── A3MReader.cpp │ ├── A3MReader.h │ ├── AminoAcidLookupTables.h │ ├── Application.cpp │ ├── BacktraceTranslator.h │ ├── BaseMatrix.cpp │ ├── BaseMatrix.h │ ├── ByteParser.h │ ├── CMakeLists.txt │ ├── CSProfile.cpp │ ├── CSProfile.h │ ├── Command.cpp │ ├── Command.h │ ├── CommandCaller.cpp │ ├── CommandCaller.h │ ├── Concat.h │ ├── DBConcat.cpp │ ├── DBConcat.h │ ├── DBReader.cpp │ ├── DBReader.h │ ├── DBWriter.cpp │ ├── DBWriter.h │ ├── Debug.cpp │ ├── Debug.h │ ├── Domain.h │ ├── DownloadDatabase.h │ ├── ExpressionParser.cpp │ ├── ExpressionParser.h │ ├── FastSort.h │ ├── FileUtil.cpp │ ├── FileUtil.h │ ├── GpuUtil.cpp │ ├── GpuUtil.h │ ├── GzReader.h │ ├── HeaderSummarizer.cpp │ ├── HeaderSummarizer.h │ ├── IndexReader.h │ ├── IntervalArray.h │ ├── KSeqBufferReader.h │ ├── KSeqWrapper.cpp │ ├── KSeqWrapper.h │ ├── LibraryReader.cpp │ ├── LibraryReader.h │ ├── MMseqsMPI.cpp │ ├── MMseqsMPI.h │ ├── Masker.cpp │ ├── Masker.h │ ├── MathUtil.h │ ├── MemoryMapped.cpp │ ├── MemoryMapped.h │ ├── MemoryTracker.cpp │ ├── MemoryTracker.h │ ├── MultiParam.cpp │ ├── MultiParam.h │ ├── NucleotideMatrix.cpp │ ├── NucleotideMatrix.h │ ├── Orf.cpp │ ├── Orf.h │ ├── Parameters.cpp │ ├── Parameters.h │ ├── PatternCompiler.h │ ├── ProfileStates.cpp │ ├── ProfileStates.h │ ├── ScoreMatrix.h │ ├── Sequence.cpp │ ├── Sequence.h │ ├── SequenceWeights.cpp │ ├── SequenceWeights.h │ ├── StringBlock.h │ ├── SubstitutionMatrix.cpp │ ├── SubstitutionMatrix.h │ ├── SubstitutionMatrixProfileStates.h │ ├── Timer.h │ ├── TranslateNucl.h │ ├── UniprotKB.cpp │ ├── UniprotKB.h │ ├── Util.cpp │ ├── Util.h │ └── itoa.h ├── linclust │ ├── CMakeLists.txt │ ├── KmerIndex.h │ ├── LinsearchIndexReader.cpp │ ├── LinsearchIndexReader.h │ ├── MarkovKmerScore.h │ ├── kmerindexdb.cpp │ ├── kmermatcher.cpp │ ├── kmermatcher.h │ ├── kmersearch.cpp │ └── kmersearch.h ├── mmseqs.cpp ├── multihit │ ├── Aggregation.cpp │ ├── Aggregation.h │ ├── CMakeLists.txt │ ├── MultiHitDb.cpp │ ├── MultiHitSearch.cpp │ ├── besthitperset.cpp │ ├── combinepvalperset.cpp │ └── resultsbyset.cpp ├── prefiltering │ ├── CMakeLists.txt │ ├── CacheFriendlyOperations.cpp │ ├── CacheFriendlyOperations.h │ ├── ExtendedSubstitutionMatrix.cpp │ ├── ExtendedSubstitutionMatrix.h │ ├── IndexBuilder.cpp │ ├── IndexBuilder.h │ ├── IndexTable.h │ ├── Indexer.cpp │ ├── Indexer.h │ ├── KmerGenerator.cpp │ ├── KmerGenerator.h │ ├── Main.cpp │ ├── Prefiltering.cpp │ ├── Prefiltering.h │ ├── PrefilteringIndexReader.cpp │ ├── PrefilteringIndexReader.h │ ├── QueryMatcher.cpp │ ├── QueryMatcher.h │ ├── QueryMatcherTaxonomyHook.h │ ├── ReducedMatrix.cpp │ ├── ReducedMatrix.h │ ├── SequenceLookup.cpp │ ├── SequenceLookup.h │ ├── UngappedAlignment.cpp │ ├── UngappedAlignment.h │ └── ungappedprefilter.cpp ├── taxonomy │ ├── CMakeLists.txt │ ├── MappingReader.h │ ├── NcbiTaxonomy.cpp │ ├── NcbiTaxonomy.h │ ├── TaxonomyExpression.h │ ├── addtaxonomy.cpp │ ├── aggregatetax.cpp │ ├── createbintaxmapping.cpp │ ├── createbintaxonomy.cpp │ ├── createtaxdb.cpp │ ├── filtertaxdb.cpp │ ├── filtertaxseqdb.cpp │ ├── lca.cpp │ └── taxonomyreport.cpp ├── test │ ├── CMakeLists.txt │ ├── TestAlignment.cpp │ ├── TestAlignmentPerformance.cpp │ ├── TestAlignmentTraceback.cpp │ ├── TestAlp.cpp │ ├── TestBacktraceTranslator.cpp │ ├── TestBestAlphabet.cpp │ ├── TestCompositionBias.cpp │ ├── TestCounting.cpp │ ├── TestDBReader.cpp │ ├── TestDBReaderIndexSerialization.cpp │ ├── TestDBReaderZstd.cpp │ ├── TestDiagonalScoring.cpp │ ├── TestDiagonalScoringPerformance.cpp │ ├── TestExtendedSubstitutionMatrix.cpp │ ├── TestKmerGenerator.cpp │ ├── TestKmerGeneratorPerf.cpp │ ├── TestKmerGeneratorProfile.cpp │ ├── TestKmerNucl.cpp │ ├── TestKmerScore.cpp │ ├── TestKsw2.cpp │ ├── TestKwayMerge.cpp │ ├── TestMerge.cpp │ ├── TestMultipleAlignment.cpp │ ├── TestOrf.cpp │ ├── TestPSSM.cpp │ ├── TestPSSMPrune.cpp │ ├── TestProfileAlignment.cpp │ ├── TestReduceMatrix.cpp │ ├── TestScoreMatrixSerialization.cpp │ ├── TestSequenceIndex.cpp │ ├── TestTanTan.cpp │ ├── TestTaxExpr.cpp │ ├── TestTaxonomy.cpp │ ├── TestTinyExpr.cpp │ ├── TestTranslate.cpp │ ├── TestUngappedCpuPerf.cpp │ ├── TestUtil.cpp │ ├── dataGap │ ├── dataGap.index │ ├── dataLinear │ └── dataLinear.index ├── util │ ├── CMakeLists.txt │ ├── alignall.cpp │ ├── alignbykmer.cpp │ ├── appenddbtoindex.cpp │ ├── apply.cpp │ ├── clusthash.cpp │ ├── compress.cpp │ ├── convert2fasta.cpp │ ├── convertalignments.cpp │ ├── convertca3m.cpp │ ├── convertkb.cpp │ ├── convertmsa.cpp │ ├── convertprofiledb.cpp │ ├── countkmer.cpp │ ├── cpmvrmlndb.cpp │ ├── createclusterdb.cpp │ ├── createdb.cpp │ ├── createseqfiledb.cpp │ ├── createsubdb.cpp │ ├── createtsv.cpp │ ├── db2tar.cpp │ ├── dbtype.cpp │ ├── diffseqdbs.cpp │ ├── diskspaceavail.cpp │ ├── expandaln.cpp │ ├── extractalignedregion.cpp │ ├── extractdomains.cpp │ ├── extractframes.cpp │ ├── extractorfs.cpp │ ├── filtera3m.cpp │ ├── filterdb.cpp │ ├── gff2db.cpp │ ├── gpuserver.cpp │ ├── indexdb.cpp │ ├── makepaddedseqdb.cpp │ ├── maskbygff.cpp │ ├── masksequence.cpp │ ├── mergeclusters.cpp │ ├── mergedbs.cpp │ ├── mergeresultsbyset.cpp │ ├── msa2profile.cpp │ ├── msa2result.cpp │ ├── nrtotaxmapping.cpp │ ├── offsetalignment.cpp │ ├── orftocontig.cpp │ ├── pairaln.cpp │ ├── prefixid.cpp │ ├── profile2neff.cpp │ ├── profile2pssm.cpp │ ├── profile2seq.cpp │ ├── proteinaln2nucl.cpp │ ├── recoverlongestorf.cpp │ ├── renamedbkeys.cpp │ ├── result2dnamsa.cpp │ ├── result2flat.cpp │ ├── result2msa.cpp │ ├── result2profile.cpp │ ├── result2rbh.cpp │ ├── result2repseq.cpp │ ├── result2stats.cpp │ ├── result2stats.h │ ├── reverseseq.cpp │ ├── sequence2profile.cpp │ ├── setextendeddbtype.cpp │ ├── sortresult.cpp │ ├── splitdb.cpp │ ├── splitsequence.cpp │ ├── subtractdbs.cpp │ ├── summarizealis.cpp │ ├── summarizeheaders.cpp │ ├── summarizeresult.cpp │ ├── summarizetabs.cpp │ ├── swapresults.cpp │ ├── tar2db.cpp │ ├── touchdb.cpp │ ├── transitivealign.cpp │ ├── translateaa.cpp │ ├── translatenucs.cpp │ ├── tsv2db.cpp │ ├── tsv2exprofiledb.cpp │ ├── unpackdb.cpp │ ├── versionstring.cpp │ └── view.cpp ├── version │ ├── CMakeLists.txt │ └── Version.cpp └── workflow │ ├── CMakeLists.txt │ ├── Cluster.cpp │ ├── ClusterUpdate.cpp │ ├── CreateIndex.cpp │ ├── Databases.cpp │ ├── EasyCluster.cpp │ ├── EasyLinclust.cpp │ ├── EasyRbh.cpp │ ├── EasySearch.cpp │ ├── EasyTaxonomy.cpp │ ├── Enrich.cpp │ ├── Linclust.cpp │ ├── Linsearch.cpp │ ├── Map.cpp │ ├── PickConsensusRep.cpp │ ├── Rbh.cpp │ ├── Search.cpp │ └── Taxonomy.cpp └── util ├── .gitattributes ├── CMakeLists.txt ├── bash-completion.sh ├── build_osx.sh ├── build_windows.sh ├── format_substitution_matrix.R ├── make-release.sh ├── mmseqs_wrapper.bat ├── mmseqs_wrapper.sh ├── update_libmarv.sh ├── update_simde.sh └── update_zstd.sh /.dockerignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | bin/ 3 | cmake-build-*/ 4 | .github/workflows 5 | Dockerfile -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | data/resources/krona_prelude.html linguist-vendored 2 | lib/* linguist-vendored 3 | lib/simd linguist-vendored=false 4 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Expected Behavior 2 | 3 | ## Current Behavior 4 | 5 | ## Steps to Reproduce (for bugs) 6 | Please make sure to execute the reproduction steps with newly recreated and empty tmp folders. 7 | 8 | ## MMseqs Output (for bugs) 9 | Please make sure to also post the complete output of MMseqs. You can use gist.github.com for large output. 10 | 11 | ## Context 12 | Providing context helps us come up with a solution and improve our documentation for the future. 13 | 14 | ## Your Environment 15 | Include as many relevant details about the environment you experienced the bug in. 16 | * Git commit used (The string after "MMseqs Version:" when you execute MMseqs without any parameters): 17 | * Which MMseqs version was used (Statically-compiled, self-compiled, Homebrew, etc.): 18 | * For self-compiled and Homebrew: Compiler and Cmake versions used and their invocation: 19 | * Server specifications (especially CPU support for AVX2/SSE and amount of system memory): 20 | * Operating system and version: 21 | -------------------------------------------------------------------------------- /.github/mmseqs2_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soedinglab/MMseqs2/42f129b6bbea1524f739cecf6296eea97bc99911/.github/mmseqs2_logo.png -------------------------------------------------------------------------------- /.github/workflows/mac-arm64.yml: -------------------------------------------------------------------------------- 1 | name: Build macOS ARM64 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | 8 | jobs: 9 | build: 10 | runs-on: macos-latest 11 | steps: 12 | - uses: actions/checkout@v3 13 | with: 14 | submodules: true 15 | 16 | - name: Dependencies 17 | run: | 18 | brew install -f --overwrite libomp 19 | 20 | - name: Build 21 | run: | 22 | mkdir -p build 23 | cd build 24 | LIBOMP=$(brew --prefix libomp) 25 | cmake \ 26 | -DCMAKE_BUILD_TYPE=Release \ 27 | -DOpenMP_C_FLAGS="-Xpreprocessor -fopenmp -I${LIBOMP}/include" \ 28 | -DOpenMP_C_LIB_NAMES=omp \ 29 | -DOpenMP_CXX_FLAGS="-Xpreprocessor -fopenmp -I${LIBOMP}/include" \ 30 | -DOpenMP_CXX_LIB_NAMES=omp \ 31 | -DOpenMP_omp_LIBRARY=${LIBOMP}/lib/libomp.a \ 32 | .. 33 | make -j$(sysctl -n hw.ncpu) 34 | 35 | - name: Test 36 | run: ./util/regression/run_regression.sh ./build/src/mmseqs SCRATCH -------------------------------------------------------------------------------- /.github/workflows/test-gpu.yml: -------------------------------------------------------------------------------- 1 | name: Test GPU 2 | on: 3 | push: 4 | branches: 5 | - master 6 | jobs: 7 | test_gpu: 8 | runs-on: [ self-hosted, slurm, gpu ] 9 | steps: 10 | - uses: actions/checkout@v3 11 | with: 12 | submodules: true 13 | - name: Test GPU 14 | run: | 15 | source /home/cirunner/miniforge3/etc/profile.d/conda.sh 16 | conda activate nvcc 17 | mkdir build && cd build 18 | cmake -DCMAKE_BUILD_TYPE=Release -DENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="native" .. 19 | make -j$(nproc --all) 20 | - name: Regression 21 | run: | 22 | ./util/regression/run_regression.sh ./build/src/mmseqs SCRATCH 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | 7 | # Compiled Dynamic libraries 8 | *.so.* 9 | *.dylib 10 | *.dll 11 | 12 | # Compiled Static libraries 13 | *.lai 14 | *.la 15 | *.a 16 | *.lib 17 | 18 | # Executables 19 | *.exe 20 | *.out 21 | *.app 22 | # debug 23 | *.dSYM 24 | .DS_Store 25 | *.swp 26 | # programs 27 | 28 | *.depend 29 | *.swp 30 | src/workflow/time_test 31 | 32 | build/ 33 | build-*/ 34 | .idea/ 35 | cmake-build-*/ 36 | BenchmarkingDatas/ 37 | examples/ 38 | TestPhages/ 39 | temp/ 40 | BacteriaBench/ 41 | EliTestFiles/ 42 | NCBI_Bacteria_Orfs/ 43 | build.old/ 44 | .vscode/ 45 | vsc.code-workspace 46 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "util/regression"] 2 | path = util/regression 3 | url = https://github.com/soedinglab/MMseqs2-Regression.git 4 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | ===================== 3 | 4 | Copyright © 2024 The MMseqs2 Development Team 5 | 6 | Permission is hereby granted, free of charge, to any person 7 | obtaining a copy of this software and associated documentation 8 | files (the “Software”), to deal in the Software without 9 | restriction, including without limitation the rights to use, 10 | copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the 12 | Software is furnished to do so, subject to the following 13 | conditions: 14 | 15 | The above copyright notice and this permission notice shall be 16 | included in all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, 19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 20 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 22 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 23 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 25 | OTHER DEALINGS IN THE SOFTWARE. 26 | 27 | -------------------------------------------------------------------------------- /cmake/GetGitRevisionDescription.cmake.in: -------------------------------------------------------------------------------- 1 | # 2 | # Internal file for GetGitRevisionDescription.cmake 3 | # 4 | # Requires CMake 2.6 or newer (uses the 'function' command) 5 | # 6 | # Original Author: 7 | # 2009-2010 Ryan Pavlik 8 | # http://academic.cleardefinition.com 9 | # Iowa State University HCI Graduate Program/VRAC 10 | # 11 | # Copyright Iowa State University 2009-2010. 12 | # Distributed under the Boost Software License, Version 1.0. 13 | # (See accompanying file LICENSE_1_0.txt or copy at 14 | # http://www.boost.org/LICENSE_1_0.txt) 15 | 16 | set(HEAD_HASH) 17 | 18 | file(READ "@HEAD_FILE@" HEAD_CONTENTS LIMIT 1024) 19 | 20 | string(STRIP "${HEAD_CONTENTS}" HEAD_CONTENTS) 21 | if(HEAD_CONTENTS MATCHES "ref") 22 | # named branch 23 | string(REPLACE "ref: " "" HEAD_REF "${HEAD_CONTENTS}") 24 | if(EXISTS "@GIT_DIR@/${HEAD_REF}") 25 | configure_file("@GIT_DIR@/${HEAD_REF}" "@GIT_DATA@/head-ref" COPYONLY) 26 | else() 27 | configure_file("@GIT_DIR@/packed-refs" "@GIT_DATA@/packed-refs" COPYONLY) 28 | file(READ "@GIT_DATA@/packed-refs" PACKED_REFS) 29 | if(${PACKED_REFS} MATCHES "([0-9a-z]*) ${HEAD_REF}") 30 | set(HEAD_HASH "${CMAKE_MATCH_1}") 31 | endif() 32 | endif() 33 | else() 34 | # detached HEAD 35 | configure_file("@GIT_DIR@/HEAD" "@GIT_DATA@/head-ref" COPYONLY) 36 | endif() 37 | 38 | if(NOT HEAD_HASH) 39 | file(READ "@GIT_DATA@/head-ref" HEAD_HASH LIMIT 1024) 40 | string(STRIP "${HEAD_HASH}" HEAD_HASH) 41 | endif() 42 | -------------------------------------------------------------------------------- /cmake/MMseqsSetupDerivedTarget.cmake: -------------------------------------------------------------------------------- 1 | include(AppendTargetProperty) 2 | 3 | function (mmseqs_setup_derived_target TARGET) 4 | set(SOURCE "${ARGN}") 5 | if(NOT SOURCE) 6 | set(SOURCE "mmseqs-framework") 7 | endif() 8 | get_target_property(COMPILE_TMP ${SOURCE} COMPILE_FLAGS) 9 | get_target_property(LINK_TMP ${SOURCE} LINK_FLAGS) 10 | get_target_property(DEF_TMP ${SOURCE} COMPILE_DEFINITIONS) 11 | get_target_property(INCL_TMP ${SOURCE} INCLUDE_DIRECTORIES) 12 | 13 | target_link_libraries(${TARGET} ${SOURCE}) 14 | append_target_property(${TARGET} COMPILE_FLAGS ${COMPILE_TMP}) 15 | append_target_property(${TARGET} LINK_FLAGS ${LINK_TMP}) 16 | set_property(TARGET ${TARGET} APPEND PROPERTY COMPILE_DEFINITIONS ${DEF_TMP}) 17 | set_property(TARGET ${TARGET} APPEND PROPERTY INCLUDE_DIRECTORIES ${INCL_TMP}) 18 | endfunction() 19 | 20 | function (restore_exceptions TARGET) 21 | get_target_property(COMPILE_TMP ${TARGET} COMPILE_FLAGS) 22 | get_target_property(LINK_TMP ${TARGET} LINK_FLAGS) 23 | 24 | if(COMPILE_TMP MATCHES "-fno-exceptions") 25 | string(REPLACE "-fno-exceptions" "" COMPILE_TMP "${COMPILE_TMP}") 26 | endif() 27 | 28 | if(LINK_TMP MATCHES "-fno-exceptions") 29 | string(REPLACE "-fno-exceptions" "" LINK_TMP "${LINK_TMP}") 30 | endif() 31 | 32 | set_property(TARGET ${TARGET} PROPERTY COMPILE_FLAGS ${COMPILE_TMP}) 33 | set_property(TARGET ${TARGET} PROPERTY LINK_FLAGS ${LINK_TMP}) 34 | endfunction() 35 | -------------------------------------------------------------------------------- /cmake/MMseqsSetupTest.cmake: -------------------------------------------------------------------------------- 1 | function(mmseqs_setup_test NAME) 2 | include(MMseqsSetupDerivedTarget) 3 | string(TOLOWER ${NAME} BASE_NAME) 4 | string(REGEX REPLACE "\\.[^.]*$" "" BASE_NAME ${BASE_NAME}) 5 | string(REGEX REPLACE "^test" "test_" BASE_NAME ${BASE_NAME}) 6 | add_executable(${BASE_NAME} ${NAME}) 7 | 8 | mmseqs_setup_derived_target(${BASE_NAME}) 9 | target_link_libraries(${BASE_NAME} version) 10 | endfunction() 11 | -------------------------------------------------------------------------------- /cmake/checkshell.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh -e 2 | [ "$#" -ne 2 ] && echo "Please provide " && exit 1; 3 | 4 | SHELLCHECK="$1" 5 | if [ ! -x "$SHELLCHECK" ]; then 6 | exit 0 7 | fi 8 | 9 | INPUT="$2" 10 | INPUT_EXT="${INPUT##*.}" 11 | 12 | if [ "${INPUT_EXT}" = "sh" ]; then 13 | ${SHELLCHECK} "$2" 14 | else 15 | exit 0 16 | fi 17 | 18 | 19 | -------------------------------------------------------------------------------- /cmake/xxdi.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | my $file = shift; 7 | open my $input, '<', $file or die "Can't open file for read: $file $!"; 8 | my $text = do { local $/; <$input> }; 9 | close $input; 10 | 11 | my @hex_values = map { "0x$_" } unpack("(H2)*", $text); 12 | my $hex_data = join(",", map { ($_ % 16 == 0 ? "\n\t" : "") . $hex_values[$_] } 0 .. $#hex_values); 13 | my $len_data = length($text); 14 | 15 | my $varname = $file; 16 | $varname =~ s/[\/.]/_/g; 17 | print "unsigned char $varname\[\] = { $hex_data \n};\n"; 18 | print "unsigned int ${varname}_len = $len_data;\n"; 19 | -------------------------------------------------------------------------------- /data/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include(MMseqsResourceCompiler) 2 | 3 | 4 | add_subdirectory(resources) 5 | add_subdirectory(workflow) 6 | set(GENERATED_MATRICES 7 | VTML80.out 8 | VTML40.out 9 | nucleotide.out 10 | blosum62.out 11 | PAM30.out 12 | ) 13 | 14 | set(GENERATED_OUTPUT_HEADERS "") 15 | FOREACH(INPUT_FILE ${GENERATED_MATRICES} ${GENERATED_RESOURCES} ${GENERATED_WORKFLOWS}) 16 | compile_resource(${INPUT_FILE} OUTPUT_FILE) 17 | list(APPEND GENERATED_OUTPUT_HEADERS "${OUTPUT_FILE}") 18 | ENDFOREACH() 19 | 20 | add_custom_target(generated ALL DEPENDS ${GENERATED_OUTPUT_HEADERS}) 21 | -------------------------------------------------------------------------------- /data/PAM150.out: -------------------------------------------------------------------------------- 1 | # PAM150 2 | A C D E F G H I K L M N P Q R S T V W Y X 3 | A 3 -2 0 0 -4 1 -2 -1 -2 -2 -1 0 1 -1 -2 1 1 0 -6 -3 -1 4 | C -2 9 -6 -6 -5 -4 -3 -2 -6 -6 -5 -4 -3 -6 -4 0 -3 -2 -7 0 -3 5 | D 0 -6 4 3 -6 0 0 -3 -1 -5 -3 2 -2 1 -2 0 -1 -3 -7 -4 -1 6 | E 0 -6 3 4 -6 -1 0 -2 -1 -4 -2 1 -1 2 -2 -1 -1 -2 -7 -4 -1 7 | F -4 -5 -6 -6 7 -5 -2 0 -6 1 -1 -4 -5 -5 -4 -3 -3 -2 -1 5 -3 8 | G 1 -4 0 -1 -5 4 -3 -3 -2 -4 -3 0 -1 -2 -3 1 -1 -2 -7 -5 -1 9 | H -2 -3 0 0 -2 -3 6 -3 -1 -2 -3 2 -1 3 1 -1 -2 -3 -3 0 -1 10 | I -1 -2 -3 -2 0 -3 -3 5 -2 1 2 -2 -3 -3 -2 -2 0 3 -5 -2 -1 11 | K -2 -6 -1 -1 -6 -2 -1 -2 4 -3 0 1 -2 0 3 -1 0 -3 -4 -4 -1 12 | L -2 -6 -5 -4 1 -4 -2 1 -3 5 3 -3 -3 -2 -3 -3 -2 1 -2 -2 -2 13 | M -1 -5 -3 -2 -1 -3 -3 2 0 3 7 -2 -3 -1 -1 -2 -1 1 -5 -3 -1 14 | N 0 -4 2 1 -4 0 2 -2 1 -3 -2 3 -1 0 -1 1 0 -2 -4 -2 -1 15 | P 1 -3 -2 -1 -5 -1 -1 -3 -2 -3 -3 -1 6 0 -1 1 0 -2 -6 -5 -1 16 | Q -1 -6 1 2 -5 -2 3 -3 0 -2 -1 0 0 5 1 -1 -1 -2 -5 -4 -1 17 | R -2 -4 -2 -2 -4 -3 1 -2 3 -3 -1 -1 -1 1 6 -1 -2 -3 1 -4 -1 18 | S 1 0 0 -1 -3 1 -1 -2 -1 -3 -2 1 1 -1 -1 2 1 -1 -2 -3 0 19 | T 1 -3 -1 -1 -3 -1 -2 0 0 -2 -1 0 0 -1 -2 1 4 0 -5 -3 -1 20 | V 0 -2 -3 -2 -2 -2 -3 3 -3 1 1 -2 -2 -2 -3 -1 0 4 -6 -3 -1 21 | W -6 -7 -7 -7 -1 -7 -3 -5 -4 -2 -5 -4 -6 -5 1 -2 -5 -6 12 -1 -4 22 | Y -3 0 -4 -4 5 -5 0 -2 -4 -2 -3 -2 -5 -4 -4 -3 -3 -3 -1 8 -3 23 | X -1 -3 -1 -1 -3 -1 -1 -1 -1 -2 -1 -1 -1 -1 -1 0 -1 -1 -4 -3 -1 24 | -------------------------------------------------------------------------------- /data/nucleotide.out: -------------------------------------------------------------------------------- 1 | # NUCL in 1/2 Bit 2 | # Background (precomputed optional): 0.2499975 0.2499975 0.2499975 0.2499975 0.00001 3 | # Lambda (precomputed optional): 0.6337314 4 | A C T G X 5 | A 2.0000 -3.0000 -3.0000 -3.0000 -3.0000 6 | C -3.0000 2.0000 -3.0000 -3.0000 -3.0000 7 | T -3.0000 -3.0000 2.0000 -3.0000 -3.0000 8 | G -3.0000 -3.0000 -3.0000 2.0000 -3.0000 9 | X -3.0000 -3.0000 -3.0000 -3.0000 -3.0000 -------------------------------------------------------------------------------- /data/resources/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(GENERATED_RESOURCES 2 | resources/CovSeqidQscPercMinDiag.lib 3 | resources/CovSeqidQscPercMinDiagTargetCov.lib 4 | resources/ExpOpt3_8_polished.cs32.lib 5 | resources/Library255_may17.lib 6 | resources/libPolished_8.lib 7 | resources/cs219.lib 8 | resources/krona_prelude.html 9 | resources/result_viz_prelude.html.zst 10 | resources/K4000.crf 11 | PARENT_SCOPE 12 | ) 13 | -------------------------------------------------------------------------------- /data/resources/result_viz_prelude.html.zst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soedinglab/MMseqs2/42f129b6bbea1524f739cecf6296eea97bc99911/data/resources/result_viz_prelude.html.zst -------------------------------------------------------------------------------- /data/workflow/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(GENERATED_WORKFLOWS 2 | workflow/easysearch.sh 3 | workflow/easycluster.sh 4 | workflow/easytaxonomy.sh 5 | workflow/easyrbh.sh 6 | workflow/blastp.sh 7 | workflow/blastpgp.sh 8 | workflow/map.sh 9 | workflow/rbh.sh 10 | workflow/linclust.sh 11 | workflow/clustering.sh 12 | workflow/cascaded_clustering.sh 13 | workflow/update_clustering.sh 14 | workflow/searchtargetprofile.sh 15 | workflow/createindex.sh 16 | workflow/createtaxdb.sh 17 | workflow/translated_search.sh 18 | workflow/taxpercontig.sh 19 | workflow/multihitdb.sh 20 | workflow/multihitsearch.sh 21 | workflow/enrich.sh 22 | workflow/blastn.sh 23 | workflow/searchslicedtargetprofile.sh 24 | workflow/taxonomy.sh 25 | workflow/linsearch.sh 26 | workflow/databases.sh 27 | workflow/pickconsensusrep.sh 28 | workflow/nucleotide_clustering.sh 29 | workflow/iterativepp.sh 30 | workflow/tsv2exprofiledb.sh 31 | PARENT_SCOPE 32 | ) 33 | -------------------------------------------------------------------------------- /data/workflow/map.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh -e 2 | [ -z "$MMSEQS" ] && echo "Please set the environment variable \$MMSEQS to your MMSEQS binary." && exit 1; 3 | [ "$#" -ne 4 ] && echo "Please provide " && exit 1; 4 | [ ! -f "$1.dbtype" ] && echo "$1.dbtype not found!" && exit 1; 5 | [ ! -f "$2.dbtype" ] && echo "$2.dbtype not found!" && exit 1; 6 | [ -f "$3.dbtype" ] && echo "$3.dbtype exists already!" && exit 1; 7 | [ ! -d "$4" ] && echo "tmp directory $4 not found!" && mkdir -p "$4"; 8 | 9 | # shellcheck disable=SC2086 10 | "$MMSEQS" search "$1" "$2" "$3" "$4" ${SEARCH_PAR} 11 | -------------------------------------------------------------------------------- /lib/alp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(alp OBJECT 2 | sls_basic.hpp 3 | njn_dynprogprob.cpp 4 | njn_random.cpp 5 | sls_pvalues.hpp 6 | njn_random.hpp 7 | njn_integer.hpp 8 | njn_doubletype.hpp 9 | njn_dynprogprob.hpp 10 | njn_dynprogproblim.hpp 11 | sls_alp_sim.hpp 12 | njn_dynprogprobproto.cpp 13 | njn_localmaxstatutil.hpp 14 | njn_localmaxstat.cpp 15 | njn_localmaxstat.hpp 16 | njn_matrix.hpp 17 | njn_localmaxstatmatrix.cpp 18 | sls_alp_regression.hpp 19 | sls_alp_sim.cpp 20 | sls_pvalues.cpp 21 | njn_root.hpp 22 | njn_dynprogproblim.cpp 23 | sls_alignment_evaluer.cpp 24 | njn_localmaxstatmatrix.hpp 25 | sls_alp_regression.cpp 26 | njn_dynprogprobproto.hpp 27 | sls_alp.cpp 28 | njn_approx.hpp 29 | njn_memutil.hpp 30 | njn_localmaxstatutil.cpp 31 | njn_function.hpp 32 | njn_ioutil.hpp 33 | sls_basic.cpp 34 | njn_uniform.hpp 35 | sls_alignment_evaluer.hpp 36 | njn_vector.hpp 37 | sls_alp_data.cpp 38 | sls_alp_data.hpp 39 | njn_ioutil.cpp 40 | sls_alp.hpp 41 | sls_normal_distr_array.hpp 42 | ) 43 | set_target_properties(alp PROPERTIES COMPILE_FLAGS "${MMSEQS_CXX_FLAGS} -w" LINK_FLAGS "${MMSEQS_CXX_FLAGS} -w") 44 | -------------------------------------------------------------------------------- /lib/alp/LICENSE: -------------------------------------------------------------------------------- 1 | PUBLIC DOMAIN NOTICE 2 | National Center for Biotechnology Information 3 | 4 | This software/database is a "United States Government Work" under the 5 | terms of the United States Copyright Act. It was written as part of 6 | the author's offical duties as a United States Government employee and 7 | thus cannot be copyrighted. This software/database is freely available 8 | to the public for use. The National Library of Medicine and the U.S. 9 | Government have not placed any restriction on its use or reproduction. 10 | 11 | Although all reasonable efforts have been taken to ensure the accuracy 12 | and reliability of the software and data, the NLM and the U.S. 13 | Government do not and cannot warrant the performance or results that 14 | may be obtained by using this software or data. The NLM and the U.S. 15 | Government disclaim all warranties, express or implied, including 16 | warranties of performance, merchantability or fitness for any particular 17 | purpose. 18 | 19 | Please cite the author in any work or product based on this material. 20 | 21 | -------------------------------------------------------------------------------- /lib/alp/readme.txt: -------------------------------------------------------------------------------- 1 | This directory contains C++ library files related to calculation of the Gumbel parameters for pairwise sequence alignment. 2 | 3 | Usage with "make". 4 | 5 | One way to use this library is with the "make" command. The following assumes you have "make" and a C++ compiler suitably installed. If you use the command line to enter the "cpp" directory and type "make", it should create a library file called "libalp.a". How to use the library is shown in the example directory. If you enter this directory and type "make", it should compile the test program: this will work only if it can find the header and library files. In "example/Makefile", the -I flag to the C preprocessor adds a directory to search for headers ("sls_alignment_evaluer.hpp"), the -L flag to the linker adds a directory to search for libraries, and -lalp specifies linking to "libalp". 6 | 7 | 8 | Please see the URL 9 | http://www.ncbi.nlm.nih.gov/CBBresearch/Spouge/html_ncbi/html/index/software.html#6 10 | for further information. 11 | -------------------------------------------------------------------------------- /lib/base64/LICENSE: -------------------------------------------------------------------------------- 1 | https://github.com/superwills/NibbleAndAHalf 2 | base64.h -- Fast base64 encoding and decoding. 3 | version 1.0.0, April 17, 2013 143a 4 | 5 | Copyright (C) 2013 William Sherif 6 | 7 | This software is provided 'as-is', without any express or implied 8 | warranty. In no event will the authors be held liable for any damages 9 | arising from the use of this software. 10 | 11 | Permission is granted to anyone to use this software for any purpose, 12 | including commercial applications, and to alter it and redistribute it 13 | freely, subject to the following restrictions: 14 | 15 | 1. The origin of this software must not be misrepresented; you must not 16 | claim that you wrote the original software. If you use this software 17 | in a product, an acknowledgment in the product documentation would be 18 | appreciated but is not required. 19 | 2. Altered source versions must be plainly marked as such, and must not be 20 | misrepresented as being the original software. 21 | 3. This notice may not be removed or altered from any source distribution. 22 | 23 | William Sherif 24 | will.sherif@gmail.com 25 | 26 | YWxsIHlvdXIgYmFzZSBhcmUgYmVsb25nIHRvIHVz 27 | -------------------------------------------------------------------------------- /lib/base64/README.md: -------------------------------------------------------------------------------- 1 | NibbleAndAHalf 2 | ============== 3 | 4 | "Nibble And A Half" is an ANSI C library that provides fast base64 encoding and decoding, all in a single header file. 5 | 6 | Wed Apr 17 6:13p 7 | - All test related functions moved to testbase64.h. To use, only need #include "base64.h": 8 | https://github.com/superwills/NibbleAndAHalf/blob/master/NibbleAndAHalf/base64.h 9 | -------------------------------------------------------------------------------- /lib/fast_float/VERSION: -------------------------------------------------------------------------------- 1 | 6.1.5 -------------------------------------------------------------------------------- /lib/fmt/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 - present, Victor Zverovich and {fmt} contributors 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | 22 | --- Optional exception to the license --- 23 | 24 | As an exception, if, as a result of your compiling your source code, portions 25 | of this Software are embedded into a machine-executable object form of such 26 | source code, you may redistribute such embedded portions in such object form 27 | without including the above copyright and permission notices. 28 | -------------------------------------------------------------------------------- /lib/fmt/VERSION: -------------------------------------------------------------------------------- 1 | 11.0.2 -------------------------------------------------------------------------------- /lib/fmt/fmt/core.h: -------------------------------------------------------------------------------- 1 | // This file is only provided for compatibility and may be removed in future 2 | // versions. Use fmt/base.h if you don't need fmt::format and fmt/format.h 3 | // otherwise. 4 | 5 | #include "format.h" 6 | -------------------------------------------------------------------------------- /lib/ksw2/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(ksw2 OBJECT 2 | ksw2.h 3 | ksw2_extz2_sse.cpp 4 | ) 5 | set_target_properties(ksw2 PROPERTIES COMPILE_FLAGS "${MMSEQS_CXX_FLAGS}" LINK_FLAGS "${MMSEQS_CXX_FLAGS}") 6 | -------------------------------------------------------------------------------- /lib/ksw2/LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2018- Dana-Farber Cancer Institute 4 | 2017-2018 Broad Institute, Inc. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining 7 | a copy of this software and associated documentation files (the 8 | "Software"), to deal in the Software without restriction, including 9 | without limitation the rights to use, copy, modify, merge, publish, 10 | distribute, sublicense, and/or sell copies of the Software, and to 11 | permit persons to whom the Software is furnished to do so, subject to 12 | the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be 15 | included in all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 22 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | SOFTWARE. 25 | -------------------------------------------------------------------------------- /lib/libmarv/.gitignore: -------------------------------------------------------------------------------- 1 | align 2 | makedb 3 | modifydb 4 | tileconfigsearch 5 | build/ -------------------------------------------------------------------------------- /lib/libmarv/src/blosum.hpp: -------------------------------------------------------------------------------- 1 | #ifndef BLOSUM_HPP 2 | #define BLOSUM_HPP 3 | 4 | #include "types.hpp" 5 | #include "util.cuh" 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | namespace cudasw4{ 12 | 13 | #ifdef __CUDACC__ 14 | 15 | extern __constant__ std::int8_t deviceBlosum[25*25]; 16 | extern __constant__ int deviceBlosumDim; 17 | extern __constant__ int deviceBlosumDimSquared; 18 | 19 | #endif 20 | 21 | extern std::int8_t hostBlosum[25*25]; 22 | extern int hostBlosumDim; 23 | extern int hostBlosumDimSquared; 24 | 25 | //set host and device global blosum variables 26 | void setProgramWideBlosum(BlosumType blosumType, const std::vector& deviceIds); 27 | 28 | } //namespace cudasw4 29 | 30 | #endif -------------------------------------------------------------------------------- /lib/libmarv/src/config.hpp: -------------------------------------------------------------------------------- 1 | #ifndef CONFIG_HPP 2 | #define CONFIG_HPP 3 | 4 | #include 5 | #include 6 | 7 | namespace cudasw4{ 8 | 9 | //MODIFY AT OWN RISK 10 | 11 | //data type to enumerate all sequences in the database 12 | using ReferenceIdT = std::int32_t; 13 | 14 | //data type for length of of both query sequences and databases sequences 15 | using SequenceLengthT = std::int32_t; 16 | 17 | static_assert(std::is_same_v, "unexpected reference type"); 18 | static_assert(std::is_same_v, "unexpected sequence length type"); 19 | 20 | struct MaxSequencesInDB{ 21 | static constexpr ReferenceIdT value(){ 22 | return std::numeric_limits::max() - 1; 23 | } 24 | }; 25 | 26 | struct MaxSequenceLength{ 27 | static constexpr SequenceLengthT value(){ 28 | return std::numeric_limits::max() - 128 - 4; 29 | } 30 | }; 31 | 32 | struct MaxNumberOfResults{ 33 | static constexpr int value(){ 34 | return 512*1024; 35 | } 36 | }; 37 | 38 | struct alignas(8) AlignmentEndPosition{ 39 | int x; 40 | int y; 41 | 42 | #ifdef __CUDACC__ 43 | __host__ __device__ 44 | #endif 45 | int getQueryEndInclusive() const{ 46 | return x; 47 | } 48 | 49 | #ifdef __CUDACC__ 50 | __host__ __device__ 51 | #endif 52 | int getSubjectEndInclusive() const{ 53 | return y; 54 | } 55 | }; 56 | 57 | 58 | } //namespace cudasw4 59 | 60 | 61 | #endif -------------------------------------------------------------------------------- /lib/libmarv/src/hpc_helpers/all_helpers.cuh: -------------------------------------------------------------------------------- 1 | #include "cuda_helpers.cuh" 2 | #include "hpc_helpers.h" 3 | #include "io_helpers.h" 4 | #include "type_helpers.h" 5 | #include "timers.cuh" -------------------------------------------------------------------------------- /lib/libmarv/src/hpc_helpers/type_helpers.h: -------------------------------------------------------------------------------- 1 | #ifndef HELPERS_TYPE_HELPERS_H 2 | #define HELPERS_TYPE_HELPERS_H 3 | 4 | #include 5 | #include 6 | 7 | namespace helpers { 8 | 9 | template 10 | using uint_t = 11 | typename std::conditional< 12 | (Bits > 64), 13 | std::false_type, 14 | typename std::conditional< 15 | (Bits > 32), 16 | std::uint64_t, 17 | typename std::conditional< 18 | (Bits > 16), 19 | std::uint32_t, 20 | typename std::conditional< 21 | (Bits > 8), 22 | std::uint16_t, 23 | std::uint8_t>::type>::type>::type>::type; 24 | 25 | template 26 | class no_init_t 27 | { 28 | public: 29 | static_assert(std::is_fundamental::value && 30 | std::is_arithmetic::value, 31 | "wrapped type must be a fundamental, numeric type"); 32 | 33 | //do nothing 34 | constexpr no_init_t() noexcept {} 35 | 36 | //convertible from a T 37 | constexpr no_init_t(T value) noexcept: v_(value) {} 38 | 39 | //act as a T in all conversion contexts 40 | constexpr operator T () const noexcept { return v_; } 41 | 42 | private: 43 | T v_; 44 | }; 45 | 46 | } // namespace helpers 47 | 48 | #endif /* HELPERS_TYPE_HELPERS_H */ 49 | -------------------------------------------------------------------------------- /lib/libmarv/src/length_partitions.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LENGTH_PARTITIONS_HPP 2 | #define LENGTH_PARTITIONS_HPP 3 | 4 | #include "config.hpp" 5 | 6 | #include 7 | #include 8 | 9 | namespace cudasw4{ 10 | 11 | //length k is in partition i if boundaries[i-1] < k <= boundaries[i] 12 | 13 | constexpr auto getLengthPartitionBoundaries(){ 14 | 15 | constexpr int numLengthPartitions = 36; 16 | std::array boundaries{ 17 | 48, 18 | 64, 19 | 80, 20 | 96, 21 | 112, 22 | 128, 23 | 144, 24 | 160, 25 | 176, 26 | 192, 27 | 208, 28 | 224, 29 | 240, 30 | 256, 31 | 288, 32 | 320, 33 | 352, 34 | 384, 35 | 416, 36 | 448, 37 | 480, 38 | 512, 39 | 576, 40 | 640, 41 | 704, 42 | 768, 43 | 832, 44 | 896, 45 | 960, 46 | 1024, 47 | 1088, 48 | 1152, 49 | 1216, 50 | 1280, 51 | 8000, 52 | std::numeric_limits::max()-1 53 | }; 54 | 55 | 56 | return boundaries; 57 | } 58 | 59 | 60 | } //namespace cudasw4 61 | 62 | #endif -------------------------------------------------------------------------------- /lib/libmarv/tuningconfigs/A100/gapless.txt: -------------------------------------------------------------------------------- 1 | #tilesize groupsize numRegs dpx kernelApproach 2 | 32 4 4 0 1 3 | 64 4 8 0 0 4 | 96 4 12 0 1 5 | 128 4 16 0 0 6 | 160 4 20 0 1 7 | 192 4 24 0 1 8 | 224 4 28 0 1 9 | 256 4 32 0 1 10 | 288 4 36 0 1 11 | 320 4 40 0 1 12 | 352 4 44 0 1 13 | 384 4 48 0 1 14 | 416 4 52 0 0 15 | 448 4 56 0 0 16 | 480 4 60 0 0 17 | 512 4 64 0 1 18 | 576 8 36 0 0 19 | 640 8 40 0 0 20 | 704 8 44 0 0 21 | 768 8 48 0 0 22 | 832 8 52 0 0 23 | 896 8 56 0 0 24 | 960 8 60 0 0 25 | 1024 8 64 0 0 26 | 1152 16 36 0 1 27 | 1280 16 40 0 0 28 | 1408 16 44 0 0 29 | 1536 16 48 0 0 30 | 1664 16 52 0 0 31 | 1792 16 56 0 0 32 | 1920 16 60 0 0 33 | 2048 16 64 0 0 34 | -------------------------------------------------------------------------------- /lib/libmarv/tuningconfigs/A100/swendpos.txt: -------------------------------------------------------------------------------- 1 | #tilesize groupsize numRegs dpx kernelApproach 2 | 16 4 4 0 999999 3 | 32 4 8 0 999999 4 | 48 4 12 0 999999 5 | 64 4 16 0 999999 6 | 80 4 20 0 999999 7 | 96 4 24 0 999999 8 | 112 4 28 0 999999 9 | 128 8 16 0 999999 10 | 144 4 36 0 999999 11 | 160 8 20 0 999999 12 | 176 4 44 0 999999 13 | 192 8 24 0 999999 14 | 224 8 28 0 999999 15 | 256 8 32 0 999999 16 | 288 8 36 0 999999 17 | 320 16 20 0 999999 18 | 352 8 44 0 999999 19 | 384 16 24 0 999999 20 | 448 16 28 0 999999 21 | 512 16 32 0 999999 22 | 576 16 36 0 999999 23 | 640 32 20 0 999999 24 | 704 16 44 0 999999 25 | 768 32 24 0 999999 26 | 896 32 28 0 999999 27 | 1024 32 32 0 999999 28 | -------------------------------------------------------------------------------- /lib/libmarv/tuningconfigs/GraceHopper/gapless.txt: -------------------------------------------------------------------------------- 1 | #tilesize groupsize numRegs dpx kernelApproach 2 | 32 4 4 1 1 3 | 64 4 8 1 1 4 | 96 4 12 1 1 5 | 128 4 16 0 0 6 | 160 4 20 1 1 7 | 192 4 24 1 1 8 | 224 4 28 1 1 9 | 256 4 32 1 1 10 | 288 4 36 1 1 11 | 320 4 40 1 1 12 | 352 4 44 1 1 13 | 384 4 48 1 1 14 | 416 4 52 1 1 15 | 448 4 56 1 1 16 | 480 4 60 1 1 17 | 512 8 32 1 1 18 | 576 8 36 1 1 19 | 640 8 40 1 1 20 | 704 8 44 1 1 21 | 768 8 48 1 1 22 | 832 8 52 1 1 23 | 896 8 56 1 1 24 | 960 8 60 1 1 25 | 1024 16 32 1 1 26 | 1152 16 36 1 1 27 | 1280 16 40 1 1 28 | 1408 16 44 1 1 29 | 1536 16 48 1 1 30 | 1664 16 52 1 1 31 | 1792 16 56 1 1 32 | 1920 16 60 1 1 33 | 2048 16 64 0 0 34 | -------------------------------------------------------------------------------- /lib/libmarv/tuningconfigs/GraceHopper/swendpos.txt: -------------------------------------------------------------------------------- 1 | #tilesize groupsize numRegs dpx kernelApproach 2 | 16 4 4 1 999999 3 | 32 4 8 1 999999 4 | 48 4 12 1 999999 5 | 64 4 16 1 999999 6 | 80 4 20 1 999999 7 | 96 4 24 1 999999 8 | 112 4 28 1 999999 9 | 128 4 32 1 999999 10 | 144 4 36 1 999999 11 | 160 4 40 1 999999 12 | 176 4 44 1 999999 13 | 192 8 24 1 999999 14 | 224 8 28 1 999999 15 | 256 8 32 1 999999 16 | 288 8 36 1 999999 17 | 320 8 40 1 999999 18 | 352 8 44 1 999999 19 | 384 16 24 1 999999 20 | 448 16 28 1 999999 21 | 512 16 32 1 999999 22 | 576 16 36 1 999999 23 | 640 16 40 1 999999 24 | 704 16 44 1 999999 25 | 768 32 24 1 999999 26 | 896 32 28 1 999999 27 | 1024 32 32 1 999999 28 | -------------------------------------------------------------------------------- /lib/libmarv/tuningconfigs/H100/gapless.txt: -------------------------------------------------------------------------------- 1 | #tilesize groupsize numRegs dpx kernelApproach 2 | 32 4 4 1 1 3 | 64 4 8 1 1 4 | 96 4 12 1 1 5 | 128 4 16 0 0 6 | 160 4 20 1 1 7 | 192 4 24 1 1 8 | 224 4 28 1 1 9 | 256 4 32 1 1 10 | 288 4 36 1 1 11 | 320 4 40 1 1 12 | 352 4 44 1 1 13 | 384 4 48 1 1 14 | 416 4 52 1 1 15 | 448 4 56 1 1 16 | 480 4 60 0 0 17 | 512 8 32 1 1 18 | 576 8 36 1 1 19 | 640 8 40 1 1 20 | 704 8 44 1 1 21 | 768 8 48 1 1 22 | 832 8 52 1 1 23 | 896 8 56 1 1 24 | 960 8 60 1 1 25 | 1024 16 32 1 1 26 | 1152 16 36 1 1 27 | 1280 16 40 1 1 28 | 1408 16 44 1 1 29 | 1536 16 48 1 1 30 | 1664 16 52 1 1 31 | 1792 16 56 1 1 32 | 1920 16 60 1 1 33 | 2048 16 64 0 0 34 | -------------------------------------------------------------------------------- /lib/libmarv/tuningconfigs/H100/swendpos.txt: -------------------------------------------------------------------------------- 1 | #tilesize groupsize numRegs dpx kernelApproach 2 | 16 4 4 1 999999 3 | 32 4 8 1 999999 4 | 48 4 12 1 999999 5 | 64 4 16 1 999999 6 | 80 4 20 1 999999 7 | 96 4 24 1 999999 8 | 112 4 28 1 999999 9 | 128 4 32 1 999999 10 | 144 4 36 1 999999 11 | 160 4 40 1 999999 12 | 176 4 44 1 999999 13 | 192 8 24 1 999999 14 | 224 8 28 1 999999 15 | 256 8 32 1 999999 16 | 288 8 36 1 999999 17 | 320 8 40 1 999999 18 | 352 8 44 1 999999 19 | 384 16 24 1 999999 20 | 448 16 28 1 999999 21 | 512 16 32 1 999999 22 | 576 16 36 1 999999 23 | 640 16 40 1 999999 24 | 704 16 44 1 999999 25 | 768 32 24 1 999999 26 | 896 32 28 1 999999 27 | 1024 32 32 1 999999 28 | -------------------------------------------------------------------------------- /lib/libmarv/tuningconfigs/L4/gapless.txt: -------------------------------------------------------------------------------- 1 | #tilesize groupsize numRegs dpx kernelApproach 2 | 32 4 4 0 1 3 | 64 4 8 0 0 4 | 96 4 12 0 0 5 | 128 4 16 0 0 6 | 160 4 20 0 0 7 | 192 4 24 0 0 8 | 224 4 28 0 0 9 | 256 4 32 0 0 10 | 288 4 36 0 0 11 | 320 4 40 0 0 12 | 352 4 44 0 0 13 | 384 4 48 0 0 14 | 416 4 52 0 0 15 | 448 4 56 0 0 16 | 480 4 60 0 0 17 | 512 4 64 0 0 18 | 576 8 36 0 0 19 | 640 8 40 0 0 20 | 704 8 44 0 0 21 | 768 8 48 0 0 22 | 832 8 52 0 0 23 | 896 8 56 0 1 24 | 960 8 60 0 0 25 | 1024 8 64 0 0 26 | 1152 16 36 0 0 27 | 1280 16 40 0 0 28 | 1408 16 44 0 0 29 | 1536 16 48 0 0 30 | 1664 16 52 0 0 31 | 1792 16 56 0 0 32 | 1920 16 60 0 0 33 | 2048 16 64 0 0 34 | -------------------------------------------------------------------------------- /lib/libmarv/tuningconfigs/L4/swendpos.txt: -------------------------------------------------------------------------------- 1 | #tilesize groupsize numRegs dpx kernelApproach 2 | 16 4 4 0 999999 3 | 32 4 8 0 999999 4 | 48 4 12 0 999999 5 | 64 4 16 0 999999 6 | 80 4 20 0 999999 7 | 96 4 24 0 999999 8 | 112 4 28 0 999999 9 | 128 8 16 0 999999 10 | 144 4 36 0 999999 11 | 160 8 20 0 999999 12 | 176 4 44 0 999999 13 | 192 8 24 0 999999 14 | 224 8 28 0 999999 15 | 256 16 16 0 999999 16 | 288 8 36 0 999999 17 | 320 16 20 0 999999 18 | 352 8 44 0 999999 19 | 384 16 24 0 999999 20 | 448 16 28 0 999999 21 | 512 32 16 0 999999 22 | 576 16 36 0 999999 23 | 640 32 20 0 999999 24 | 704 16 44 0 999999 25 | 768 32 24 0 999999 26 | 896 32 28 0 999999 27 | 1024 32 32 0 999999 28 | -------------------------------------------------------------------------------- /lib/libmarv/tuningconfigs/L40S/gapless.txt: -------------------------------------------------------------------------------- 1 | #tilesize groupsize numRegs dpx kernelApproach 2 | 32 4 4 0 1 3 | 64 4 8 0 1 4 | 96 4 12 0 1 5 | 128 4 16 0 1 6 | 160 4 20 0 0 7 | 192 4 24 0 1 8 | 224 4 28 0 0 9 | 256 4 32 0 0 10 | 288 4 36 0 0 11 | 320 4 40 0 0 12 | 352 4 44 0 0 13 | 384 4 48 0 1 14 | 416 4 52 0 0 15 | 448 4 56 0 1 16 | 480 4 60 0 1 17 | 512 4 64 0 0 18 | 576 8 36 0 1 19 | 640 8 40 0 1 20 | 704 8 44 0 1 21 | 768 8 48 0 1 22 | 832 8 52 0 1 23 | 896 8 56 0 1 24 | 960 8 60 0 0 25 | 1024 8 64 0 0 26 | 1152 16 36 0 0 27 | 1280 16 40 0 0 28 | 1408 16 44 0 1 29 | 1536 16 48 0 1 30 | 1664 16 52 0 0 31 | 1792 16 56 0 0 32 | 1920 16 60 0 1 33 | 2048 16 64 0 0 34 | -------------------------------------------------------------------------------- /lib/libmarv/tuningconfigs/L40S/swendpos.txt: -------------------------------------------------------------------------------- 1 | #tilesize groupsize numRegs dpx kernelApproach 2 | 16 4 4 0 999999 3 | 32 4 8 0 999999 4 | 48 4 12 0 999999 5 | 64 4 16 0 999999 6 | 80 4 20 0 999999 7 | 96 4 24 0 999999 8 | 112 4 28 0 999999 9 | 128 4 32 0 999999 10 | 144 4 36 0 999999 11 | 160 4 40 0 999999 12 | 176 4 44 0 999999 13 | 192 8 24 0 999999 14 | 224 8 28 0 999999 15 | 256 16 16 0 999999 16 | 288 8 36 0 999999 17 | 320 16 20 0 999999 18 | 352 8 44 0 999999 19 | 384 16 24 0 999999 20 | 448 16 28 0 999999 21 | 512 32 16 0 999999 22 | 576 16 36 0 999999 23 | 640 32 20 0 999999 24 | 704 16 44 0 999999 25 | 768 32 24 0 999999 26 | 896 32 28 0 999999 27 | 1024 32 32 0 999999 28 | -------------------------------------------------------------------------------- /lib/libmarv/tuningconfigs/RTX4090/gapless.txt: -------------------------------------------------------------------------------- 1 | #tilesize groupsize numRegs dpx kernelApproach 2 | 32 4 4 0 1 3 | 64 4 8 0 0 4 | 96 4 12 0 1 5 | 128 4 16 0 1 6 | 160 4 20 0 0 7 | 192 4 24 0 1 8 | 224 4 28 0 1 9 | 256 4 32 0 1 10 | 288 4 36 0 0 11 | 320 4 40 0 0 12 | 352 4 44 0 1 13 | 384 4 48 0 1 14 | 416 4 52 0 0 15 | 448 4 56 0 0 16 | 480 4 60 0 0 17 | 512 4 64 0 1 18 | 576 8 36 0 1 19 | 640 8 40 0 1 20 | 704 8 44 0 0 21 | 768 8 48 0 0 22 | 832 8 52 0 1 23 | 896 8 56 0 1 24 | 960 8 60 0 1 25 | 1024 8 64 0 1 26 | 1152 16 36 0 1 27 | 1280 16 40 0 0 28 | 1408 16 44 0 1 29 | 1536 16 48 0 1 30 | 1664 16 52 0 1 31 | 1792 16 56 0 1 32 | 1920 16 60 0 1 33 | 2048 16 64 0 0 34 | -------------------------------------------------------------------------------- /lib/libmarv/tuningconfigs/RTX4090/swendpos.txt: -------------------------------------------------------------------------------- 1 | #tilesize groupsize numRegs dpx kernelApproach 2 | 16 4 4 0 999999 3 | 32 4 8 0 999999 4 | 48 4 12 0 999999 5 | 64 4 16 0 999999 6 | 80 4 20 0 999999 7 | 96 4 24 0 999999 8 | 112 4 28 0 999999 9 | 128 8 16 0 999999 10 | 144 4 36 0 999999 11 | 160 4 40 0 999999 12 | 176 4 44 0 999999 13 | 192 8 24 0 999999 14 | 224 8 28 0 999999 15 | 256 8 32 0 999999 16 | 288 8 36 0 999999 17 | 320 8 40 0 999999 18 | 352 8 44 0 999999 19 | 384 16 24 0 999999 20 | 448 16 28 0 999999 21 | 512 16 32 0 999999 22 | 576 16 36 0 999999 23 | 640 32 20 0 999999 24 | 704 16 44 0 999999 25 | 768 32 24 0 999999 26 | 896 32 28 0 999999 27 | 1024 32 32 0 999999 28 | 29 | -------------------------------------------------------------------------------- /lib/libmarv/tuningconfigs/T4/gapless.txt: -------------------------------------------------------------------------------- 1 | #tilesize groupsize numRegs dpx kernelApproach 2 | 32 4 4 0 0 3 | 64 4 8 0 0 4 | 96 4 12 0 0 5 | 128 4 16 0 0 6 | 160 4 20 0 0 7 | 192 8 12 0 0 8 | 224 4 28 0 0 9 | 256 8 16 0 0 10 | 288 4 36 0 0 11 | 320 8 20 0 0 12 | 352 4 44 0 0 13 | 384 16 12 0 0 14 | 416 4 52 0 0 15 | 448 8 28 0 0 16 | 480 4 60 0 0 17 | 512 16 16 0 0 18 | 576 8 36 0 0 19 | 640 16 20 0 0 20 | 704 8 44 0 0 21 | 768 16 24 0 0 22 | 832 8 52 0 0 23 | 896 16 28 0 0 24 | 960 8 60 0 0 25 | 1024 16 32 0 0 26 | 1152 16 36 0 0 27 | 1280 16 40 0 0 28 | 1408 16 44 0 0 29 | 1536 16 48 0 0 30 | -------------------------------------------------------------------------------- /lib/libmarv/tuningconfigs/T4/swendpos.txt: -------------------------------------------------------------------------------- 1 | #tilesize groupsize numRegs dpx kernelApproach 2 | 16 4 4 0 999999 3 | 32 4 8 0 999999 4 | 48 4 12 0 999999 5 | 64 4 16 0 999999 6 | 80 4 20 0 999999 7 | 96 4 24 0 999999 8 | 112 4 28 0 999999 9 | 128 8 16 0 999999 10 | 144 4 36 0 999999 11 | 160 8 20 0 999999 12 | 176 4 44 0 999999 13 | 192 8 24 0 999999 14 | 224 8 28 0 999999 15 | 256 16 16 0 999999 16 | 288 8 36 0 999999 17 | 320 16 20 0 999999 18 | 352 8 44 0 999999 19 | 384 16 24 0 999999 20 | 448 16 28 0 999999 21 | 512 32 16 0 999999 22 | 576 16 36 0 999999 23 | 640 32 20 0 999999 24 | 704 16 44 0 999999 25 | 768 32 24 0 999999 26 | -------------------------------------------------------------------------------- /lib/microtar/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(microtar microtar.h microtar.c) 2 | set_target_properties(microtar PROPERTIES COMPILE_FLAGS "${MMSEQS_C_FLAGS}" LINK_FLAGS "${MMSEQS_C_FLAGS}") 3 | -------------------------------------------------------------------------------- /lib/microtar/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 rxi 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 7 | of the Software, and to permit persons to whom the Software is furnished to do 8 | so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /lib/microtar/README.md: -------------------------------------------------------------------------------- 1 | # microtar 2 | A lightweight tar library written in ANSI C 3 | 4 | This library was adapted from the original microtar (https://github.com/rxi/microtar) 5 | to be read-only and support fast seeking. 6 | 7 | ## License 8 | This library is free software; you can redistribute it and/or modify it under 9 | the terms of the MIT license. See [LICENSE](LICENSE) for details. 10 | -------------------------------------------------------------------------------- /lib/nedmalloc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(nedmalloc nedmalloc.c) 2 | set_target_properties(nedmalloc 3 | PROPERTIES 4 | COMPILE_FLAGS "-DREPLACE_SYSTEM_ALLOCATOR -DWIN32 -DNO_MALLINFO=1 ${MMSEQS_CXX_FLAGS} -w" 5 | LINK_FLAGS "-DREPLACE_SYSTEM_ALLOCATOR -DWIN32 -DNO_MALLINFO=1 ${MMSEQS_CXX_FLAGS} -w") 6 | -------------------------------------------------------------------------------- /lib/nedmalloc/License.txt: -------------------------------------------------------------------------------- 1 | Boost Software License - Version 1.0 - August 17th, 2003 2 | 3 | Permission is hereby granted, free of charge, to any person or organization 4 | obtaining a copy of the software and accompanying documentation covered by 5 | this license (the "Software") to use, reproduce, display, distribute, 6 | execute, and transmit the Software, and to prepare derivative works of the 7 | Software, and to permit third-parties to whom the Software is furnished to 8 | do so, all subject to the following: 9 | 10 | The copyright notices in the Software and this entire statement, including 11 | the above license grant, this restriction and the following disclaimer, 12 | must be included in all copies of the Software, in whole or in part, and 13 | all derivative works of the Software, unless such copies or derivative 14 | works are solely in the form of machine-executable object code generated by 15 | a source language processor. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 20 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 21 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 22 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /lib/simde/simde/COPYING: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 Evan Nemerson 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /lib/simde/simde/README.md: -------------------------------------------------------------------------------- 1 | # SIMDe Without Test Cases 2 | 3 | This repository contains only the core of 4 | [SIMDe](https://github.com/simd-everywhere/simde). 5 | It is generated automatically for every commit to master, and is 6 | intended to be used as a submodule in projects which don't want to 7 | include the (rather large) test cases. 8 | 9 | All development work happens in the main repository, please do not 10 | file issues or create pull requests against this repository. 11 | -------------------------------------------------------------------------------- /lib/tantan/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(tantan tantan.cpp tantan.h mcf_simd.h) 2 | set_target_properties(tantan PROPERTIES COMPILE_FLAGS "${MMSEQS_CXX_FLAGS}" LINK_FLAGS "${MMSEQS_CXX_FLAGS}") 3 | 4 | -------------------------------------------------------------------------------- /lib/tinyexpr/.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | 3 | compiler: 4 | - clang 5 | - gcc 6 | 7 | script: make 8 | -------------------------------------------------------------------------------- /lib/tinyexpr/CONTRIBUTING: -------------------------------------------------------------------------------- 1 | A core strength of TinyExpr is that it is small and simple. This makes it easy 2 | to add new features. However, if we keep adding new features, it'll no longer 3 | be small or simple. In other words, each new feature corrodes away at the core 4 | strength of TinyExpr. 5 | 6 | If you want to add a new feature, and you expect me to merge it, please discuss 7 | it with me before you go to that work. Open an issue at 8 | https://github.com/codeplea/tinyexpr and let us know what you're proposing. 9 | 10 | Bug fixes are always welcome and appreciated, of course. 11 | -------------------------------------------------------------------------------- /lib/tinyexpr/LICENSE: -------------------------------------------------------------------------------- 1 | zlib License 2 | 3 | Copyright (C) 2015, 2016 Lewis Van Winkle 4 | 5 | This software is provided 'as-is', without any express or implied 6 | warranty. In no event will the authors be held liable for any damages 7 | arising from the use of this software. 8 | 9 | Permission is granted to anyone to use this software for any purpose, 10 | including commercial applications, and to alter it and redistribute it 11 | freely, subject to the following restrictions: 12 | 13 | 1. The origin of this software must not be misrepresented; you must not 14 | claim that you wrote the original software. If you use this software 15 | in a product, an acknowledgement in the product documentation would be 16 | appreciated but is not required. 17 | 2. Altered source versions must be plainly marked as such, and must not be 18 | misrepresented as being the original software. 19 | 3. This notice may not be removed or altered from any source distribution. 20 | 21 | -------------------------------------------------------------------------------- /lib/tinyexpr/Makefile: -------------------------------------------------------------------------------- 1 | CCFLAGS = -ansi -Wall -Wshadow -O2 2 | LFLAGS = -lm 3 | 4 | .PHONY = all clean 5 | 6 | all: test test_pr bench example example2 example3 7 | 8 | 9 | test: test.c tinyexpr.c 10 | $(CC) $(CCFLAGS) -o $@ $^ $(LFLAGS) 11 | ./$@ 12 | 13 | test_pr: test.c tinyexpr.c 14 | $(CC) $(CCFLAGS) -DTE_POW_FROM_RIGHT -DTE_NAT_LOG -o $@ $^ $(LFLAGS) 15 | ./$@ 16 | 17 | bench: benchmark.o tinyexpr.o 18 | $(CC) $(CCFLAGS) -o $@ $^ $(LFLAGS) 19 | 20 | example: example.o tinyexpr.o 21 | $(CC) $(CCFLAGS) -o $@ $^ $(LFLAGS) 22 | 23 | example2: example2.o tinyexpr.o 24 | $(CC) $(CCFLAGS) -o $@ $^ $(LFLAGS) 25 | 26 | example3: example3.o tinyexpr.o 27 | $(CC) $(CCFLAGS) -o $@ $^ $(LFLAGS) 28 | 29 | .c.o: 30 | $(CC) -c $(CCFLAGS) $< -o $@ 31 | 32 | clean: 33 | rm -f *.o *.exe example example2 example3 bench test_pr test 34 | -------------------------------------------------------------------------------- /lib/tinyexpr/doc/e1.dot: -------------------------------------------------------------------------------- 1 | digraph G { 2 | "+" -> "sin"; 3 | "+" -> div; 4 | "sin" -> "x"; 5 | div -> "1"; 6 | div -> "4"; 7 | div [label="÷"] 8 | } 9 | -------------------------------------------------------------------------------- /lib/tinyexpr/doc/e1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soedinglab/MMseqs2/42f129b6bbea1524f739cecf6296eea97bc99911/lib/tinyexpr/doc/e1.png -------------------------------------------------------------------------------- /lib/tinyexpr/doc/e2.dot: -------------------------------------------------------------------------------- 1 | digraph G { 2 | "+" -> "sin"; 3 | "+" -> "0.25"; 4 | "sin" -> "x"; 5 | } 6 | -------------------------------------------------------------------------------- /lib/tinyexpr/doc/e2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soedinglab/MMseqs2/42f129b6bbea1524f739cecf6296eea97bc99911/lib/tinyexpr/doc/e2.png -------------------------------------------------------------------------------- /lib/tinyexpr/example.c: -------------------------------------------------------------------------------- 1 | #include "tinyexpr.h" 2 | #include 3 | 4 | int main(int argc, char *argv[]) 5 | { 6 | const char *c = "sqrt(5^2+7^2+11^2+(8-2)^2)"; 7 | double r = te_interp(c, 0); 8 | printf("The expression:\n\t%s\nevaluates to:\n\t%f\n", c, r); 9 | return 0; 10 | } 11 | -------------------------------------------------------------------------------- /lib/tinyexpr/example2.c: -------------------------------------------------------------------------------- 1 | #include "tinyexpr.h" 2 | #include 3 | 4 | int main(int argc, char *argv[]) 5 | { 6 | if (argc < 2) { 7 | printf("Usage: example2 \"expression\"\n"); 8 | return 0; 9 | } 10 | 11 | const char *expression = argv[1]; 12 | printf("Evaluating:\n\t%s\n", expression); 13 | 14 | /* This shows an example where the variables 15 | * x and y are bound at eval-time. */ 16 | double x, y; 17 | te_variable vars[] = {{"x", &x}, {"y", &y}}; 18 | 19 | /* This will compile the expression and check for errors. */ 20 | int err; 21 | te_expr *n = te_compile(expression, vars, 2, &err); 22 | 23 | if (n) { 24 | /* The variables can be changed here, and eval can be called as many 25 | * times as you like. This is fairly efficient because the parsing has 26 | * already been done. */ 27 | x = 3; y = 4; 28 | const double r = te_eval(n); printf("Result:\n\t%f\n", r); 29 | 30 | te_free(n); 31 | } else { 32 | /* Show the user where the error is at. */ 33 | printf("\t%*s^\nError near here", err-1, ""); 34 | } 35 | 36 | 37 | return 0; 38 | } 39 | -------------------------------------------------------------------------------- /lib/tinyexpr/example3.c: -------------------------------------------------------------------------------- 1 | #include "tinyexpr.h" 2 | #include 3 | 4 | 5 | /* An example of calling a C function. */ 6 | double my_sum(double a, double b) { 7 | printf("Called C function with %f and %f.\n", a, b); 8 | return a + b; 9 | } 10 | 11 | 12 | int main(int argc, char *argv[]) 13 | { 14 | te_variable vars[] = { 15 | {"mysum", my_sum, TE_FUNCTION2} 16 | }; 17 | 18 | const char *expression = "mysum(5, 6)"; 19 | printf("Evaluating:\n\t%s\n", expression); 20 | 21 | int err; 22 | te_expr *n = te_compile(expression, vars, 1, &err); 23 | 24 | if (n) { 25 | const double r = te_eval(n); 26 | printf("Result:\n\t%f\n", r); 27 | te_free(n); 28 | } else { 29 | /* Show the user where the error is at. */ 30 | printf("\t%*s^\nError near here", err-1, ""); 31 | } 32 | 33 | 34 | return 0; 35 | } 36 | -------------------------------------------------------------------------------- /lib/zstd/.buckconfig: -------------------------------------------------------------------------------- 1 | [cxx] 2 | cppflags = -DXXH_NAMESPACE=ZSTD_ -DZSTD_LEGACY_SUPPORT=4 3 | cflags = -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes -Wundef -Wpointer-arith 4 | cxxppflags = -DXXH_NAMESPACE=ZSTD_ -DZSTD_LEGACY_SUPPORT=4 5 | cxxflags = -std=c++11 -Wno-deprecated-declarations 6 | gtest_dep = //contrib/pzstd:gtest 7 | 8 | [httpserver] 9 | port = 0 10 | -------------------------------------------------------------------------------- /lib/zstd/.buckversion: -------------------------------------------------------------------------------- 1 | c8dec2e8da52d483f6dd7c6cd2ad694e8e6fed2b 2 | -------------------------------------------------------------------------------- /lib/zstd/.gitattributes: -------------------------------------------------------------------------------- 1 | # Set the default behavior 2 | * text eol=lf 3 | 4 | # Explicitly declare source files 5 | *.c text eol=lf 6 | *.h text eol=lf 7 | 8 | # Denote files that should not be modified. 9 | *.odt binary 10 | *.png binary 11 | 12 | # Visual Studio 13 | *.sln text eol=crlf 14 | *.vcxproj* text eol=crlf 15 | *.vcproj* text eol=crlf 16 | *.suo binary 17 | *.rc text eol=crlf 18 | 19 | # Windows 20 | *.bat text eol=crlf 21 | *.cmd text eol=crlf 22 | -------------------------------------------------------------------------------- /lib/zstd/.gitignore: -------------------------------------------------------------------------------- 1 | # Object files 2 | *.o 3 | *.ko 4 | *.dSYM 5 | 6 | # Libraries 7 | *.lib 8 | *.a 9 | 10 | # Shared objects (inc. Windows DLLs) 11 | *.dll 12 | *.so 13 | *.so.* 14 | *.dylib 15 | 16 | # Executables 17 | zstd 18 | zstdmt 19 | *.exe 20 | *.out 21 | *.app 22 | 23 | # Test artefacts 24 | tmp* 25 | dictionary* 26 | NUL 27 | 28 | # Build artefacts 29 | projects/ 30 | bin/ 31 | .buckd/ 32 | buck-out/ 33 | 34 | # Other files 35 | .directory 36 | _codelite/ 37 | _zstdbench/ 38 | .clang_complete 39 | *.idea 40 | *.swp 41 | .DS_Store 42 | googletest/ 43 | *.d 44 | -------------------------------------------------------------------------------- /lib/zstd/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to. 4 | Please read the [full text](https://code.fb.com/codeofconduct/) 5 | so that you can understand what actions will and will not be tolerated. 6 | -------------------------------------------------------------------------------- /lib/zstd/build/.gitignore: -------------------------------------------------------------------------------- 1 | # Visual C++ 2 | .vs/ 3 | *Copy 4 | *.db 5 | *.opensdf 6 | *.sdf 7 | *.suo 8 | *.user 9 | *.opendb 10 | 11 | VS2008/bin/ 12 | VS2010/bin/ 13 | VS2010/zwrapbench/ 14 | VS2012/ 15 | VS2013/ 16 | VS2015/ 17 | Studio* 18 | 19 | # CMake 20 | cmake/build/ 21 | CMakeCache.txt 22 | CMakeFiles 23 | CMakeScripts 24 | Testing 25 | Makefile 26 | cmake_install.cmake 27 | install_manifest.txt 28 | compile_commands.json 29 | CTestTestfile.cmake 30 | build 31 | lib 32 | -------------------------------------------------------------------------------- /lib/zstd/build/VS2010/CompileAsCpp.props: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CompileAsCpp 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /lib/zstd/build/VS_scripts/build.VS2010.cmd: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | rem build 32-bit 4 | call "%~p0%build.generic.cmd" VS2010 Win32 Release v100 5 | 6 | rem build 64-bit 7 | call "%~p0%build.generic.cmd" VS2010 x64 Release v100 -------------------------------------------------------------------------------- /lib/zstd/build/VS_scripts/build.VS2012.cmd: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | rem build 32-bit 4 | call "%~p0%build.generic.cmd" VS2012 Win32 Release v110 5 | rem build 64-bit 6 | call "%~p0%build.generic.cmd" VS2012 x64 Release v110 -------------------------------------------------------------------------------- /lib/zstd/build/VS_scripts/build.VS2013.cmd: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | rem build 32-bit 4 | call "%~p0%build.generic.cmd" VS2013 Win32 Release v120 5 | 6 | rem build 64-bit 7 | call "%~p0%build.generic.cmd" VS2013 x64 Release v120 -------------------------------------------------------------------------------- /lib/zstd/build/VS_scripts/build.VS2015.cmd: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | rem build 32-bit 4 | call "%~p0%build.generic.cmd" VS2015 Win32 Release v140 5 | 6 | rem build 64-bit 7 | call "%~p0%build.generic.cmd" VS2015 x64 Release v140 -------------------------------------------------------------------------------- /lib/zstd/build/VS_scripts/build.VS2017.cmd: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | rem build 32-bit 4 | call "%~p0%build.generic.cmd" VS2017 Win32 Release v141 5 | 6 | rem build 64-bit 7 | call "%~p0%build.generic.cmd" VS2017 x64 Release v141 -------------------------------------------------------------------------------- /lib/zstd/build/VS_scripts/build.VS2017Community.cmd: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | rem build 32-bit 4 | call "%~p0%build.generic.cmd" VS2017Community Win32 Release v141 5 | 6 | rem build 64-bit 7 | call "%~p0%build.generic.cmd" VS2017Community x64 Release v141 -------------------------------------------------------------------------------- /lib/zstd/build/VS_scripts/build.VS2017Enterprise.cmd: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | rem build 32-bit 4 | call "%~p0%build.generic.cmd" VS2017Enterprise Win32 Release v141 5 | 6 | rem build 64-bit 7 | call "%~p0%build.generic.cmd" VS2017Enterprise x64 Release v141 -------------------------------------------------------------------------------- /lib/zstd/build/VS_scripts/build.VS2017Professional.cmd: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | rem build 32-bit 4 | call "%~p0%build.generic.cmd" VS2017Professional Win32 Release v141 5 | 6 | rem build 64-bit 7 | call "%~p0%build.generic.cmd" VS2017Professional x64 Release v141 -------------------------------------------------------------------------------- /lib/zstd/build/cmake/.gitignore: -------------------------------------------------------------------------------- 1 | # cmake working directory 2 | cmakeBuild 3 | 4 | # cmake artefacts 5 | CMakeCache.txt 6 | CMakeFiles 7 | Makefile 8 | cmake_install.cmake 9 | cmake_uninstall.cmake 10 | *.1 11 | -------------------------------------------------------------------------------- /lib/zstd/build/cmake/CMakeModules/GetZstdLibraryVersion.cmake: -------------------------------------------------------------------------------- 1 | function(GetZstdLibraryVersion _header _major _minor _release) 2 | # Read file content 3 | FILE(READ ${_header} CONTENT) 4 | 5 | string(REGEX MATCH ".*define ZSTD_VERSION_MAJOR *([0-9]+).*define ZSTD_VERSION_MINOR *([0-9]+).*define ZSTD_VERSION_RELEASE *([0-9]+)" VERSION_REGEX "${CONTENT}") 6 | SET(${_major} ${CMAKE_MATCH_1} PARENT_SCOPE) 7 | SET(${_minor} ${CMAKE_MATCH_2} PARENT_SCOPE) 8 | SET(${_release} ${CMAKE_MATCH_3} PARENT_SCOPE) 9 | endfunction() 10 | -------------------------------------------------------------------------------- /lib/zstd/build/cmake/contrib/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ################################################################ 2 | # Copyright (c) 2016-present, Yann Collet, Facebook, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under both the BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | # in the COPYING file in the root directory of this source tree). 8 | # ################################################################ 9 | 10 | PROJECT(contrib) 11 | 12 | ADD_SUBDIRECTORY(pzstd) 13 | ADD_SUBDIRECTORY(gen_html) 14 | -------------------------------------------------------------------------------- /lib/zstd/build/cmake/contrib/gen_html/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ################################################################ 2 | # Copyright (c) 2015-present, Yann Collet, Facebook, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under both the BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | # in the COPYING file in the root directory of this source tree). 8 | # ################################################################ 9 | 10 | PROJECT(gen_html) 11 | INCLUDE(GetZstdLibraryVersion) 12 | 13 | SET(CMAKE_INCLUDE_CURRENT_DIR TRUE) 14 | 15 | # Define programs directory, where sources and header files are located 16 | SET(LIBRARY_DIR ${ZSTD_SOURCE_DIR}/lib) 17 | SET(PROGRAMS_DIR ${ZSTD_SOURCE_DIR}/programs) 18 | SET(GENHTML_DIR ${ZSTD_SOURCE_DIR}/contrib/gen_html) 19 | SET(GENHTML_BINARY ${PROJECT_BINARY_DIR}/gen_html${CMAKE_EXECUTABLE_SUFFIX}) 20 | INCLUDE_DIRECTORIES(${PROGRAMS_DIR} ${LIBRARY_DIR} ${LIBRARY_DIR}/common ${GENHTML_DIR}) 21 | 22 | ADD_EXECUTABLE(gen_html ${GENHTML_DIR}/gen_html.cpp) 23 | 24 | GetZstdLibraryVersion(${LIBRARY_DIR}/zstd.h VMAJOR VMINOR VRELEASE) 25 | SET(LIBVERSION "${VMAJOR}.${VMINOR}.${VRELEASE}") 26 | ADD_CUSTOM_TARGET(zstd_manual.html ALL 27 | ${GENHTML_BINARY} "${LIBVERSION}" "${LIBRARY_DIR}/zstd.h" "${PROJECT_BINARY_DIR}/zstd_manual.html" 28 | DEPENDS gen_html COMMENT "Update zstd manual") 29 | 30 | INSTALL(FILES "${PROJECT_BINARY_DIR}/zstd_manual.html" DESTINATION "${CMAKE_INSTALL_DOCDIR}") 31 | -------------------------------------------------------------------------------- /lib/zstd/build/cmake/lib/.gitignore: -------------------------------------------------------------------------------- 1 | # cmake build artefact 2 | libzstd.pc 3 | -------------------------------------------------------------------------------- /lib/zstd/build/cmake/lib/cmake_uninstall.cmake.in: -------------------------------------------------------------------------------- 1 | 2 | if(NOT EXISTS "@CMAKE_BINARY_DIR@/install_manifest.txt") 3 | message(FATAL_ERROR "Cannot find install manifest: @CMAKE_BINARY_DIR@/install_manifest.txt") 4 | endif(NOT EXISTS "@CMAKE_BINARY_DIR@/install_manifest.txt") 5 | 6 | file(READ "@CMAKE_BINARY_DIR@/install_manifest.txt" files) 7 | string(REGEX REPLACE "\n" ";" files "${files}") 8 | foreach(file ${files}) 9 | message(STATUS "Uninstalling $ENV{DESTDIR}${file}") 10 | if(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") 11 | exec_program( 12 | "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\"" 13 | OUTPUT_VARIABLE rm_out 14 | RETURN_VALUE rm_retval 15 | ) 16 | if(NOT "${rm_retval}" STREQUAL 0) 17 | message(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}") 18 | endif(NOT "${rm_retval}" STREQUAL 0) 19 | else(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") 20 | message(STATUS "File $ENV{DESTDIR}${file} does not exist.") 21 | endif(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") 22 | endforeach(file) 23 | -------------------------------------------------------------------------------- /lib/zstd/build/cmake/lib/pkgconfig.cmake: -------------------------------------------------------------------------------- 1 | CONFIGURE_FILE("${IN}" "${OUT}" @ONLY) 2 | -------------------------------------------------------------------------------- /lib/zstd/build/cmake/programs/.gitignore: -------------------------------------------------------------------------------- 1 | # produced by make 2 | zstd 3 | zstd-frugal 4 | unzstd 5 | zstdcat 6 | -------------------------------------------------------------------------------- /lib/zstd/build/cmake/tests/.gitignore: -------------------------------------------------------------------------------- 1 | # produced by make 2 | datagen 3 | fullbench 4 | fuzzer 5 | paramgrill 6 | zbufftest 7 | 8 | -------------------------------------------------------------------------------- /lib/zstd/build/meson/GetZstdLibraryVersion.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # ############################################################################# 3 | # Copyright (c) 2018-present lzutao 4 | # All rights reserved. 5 | # 6 | # This source code is licensed under both the BSD-style license (found in the 7 | # LICENSE file in the root directory of this source tree) and the GPLv2 (found 8 | # in the COPYING file in the root directory of this source tree). 9 | # ############################################################################# 10 | import re 11 | import sys 12 | 13 | 14 | def find_version(filepath): 15 | version_file_data = None 16 | with open(filepath) as fd: 17 | version_file_data = fd.read() 18 | 19 | patterns = r"""#\s*define\s+ZSTD_VERSION_MAJOR\s+([0-9]+) 20 | #\s*define\s+ZSTD_VERSION_MINOR\s+([0-9]+) 21 | #\s*define\s+ZSTD_VERSION_RELEASE\s+([0-9]+) 22 | """ 23 | regex = re.compile(patterns, re.MULTILINE) 24 | version_match = regex.search(version_file_data) 25 | if version_match: 26 | return version_match.groups() 27 | raise Exception("Unable to find version string.") 28 | 29 | 30 | def main(): 31 | import argparse 32 | parser = argparse.ArgumentParser(description='Print zstd version from lib/zstd.h') 33 | parser.add_argument('file', help='path to lib/zstd.h') 34 | args = parser.parse_args() 35 | filepath = args.file 36 | version_tup = find_version(filepath) 37 | print('.'.join(version_tup)) 38 | 39 | 40 | if __name__ == '__main__': 41 | main() 42 | -------------------------------------------------------------------------------- /lib/zstd/build/meson/README.md: -------------------------------------------------------------------------------- 1 | Meson build system for zstandard 2 | ================================ 3 | 4 | Meson is a build system designed to optimize programmer productivity. 5 | It aims to do this by providing simple, out-of-the-box support for 6 | modern software development tools and practices, such as unit tests, 7 | coverage reports, Valgrind, CCache and the like. 8 | 9 | This Meson build system is provided with no guarantee and maintained 10 | by Dima Krasner . 11 | 12 | It outputs one `libzstd`, either shared or static, depending on 13 | `default_library` option. 14 | 15 | ## How to build 16 | 17 | `cd` to this meson directory (`build/meson`) 18 | 19 | ```sh 20 | meson --buildtype=release -D with-contrib=true -D with-tests=true -D with-contrib=true builddir 21 | cd builddir 22 | ninja # to build 23 | ninja install # to install 24 | ``` 25 | 26 | You might want to install it in staging directory: 27 | 28 | ```sh 29 | DESTDIR=./staging ninja install 30 | ``` 31 | 32 | To configure build options, use: 33 | 34 | ```sh 35 | meson configure 36 | ``` 37 | 38 | See [man meson(1)](https://manpages.debian.org/testing/meson/meson.1.en.html). 39 | -------------------------------------------------------------------------------- /lib/zstd/build/meson/contrib/gen_html/meson.build: -------------------------------------------------------------------------------- 1 | # ############################################################################# 2 | # Copyright (c) 2018-present lzutao 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under both the BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | # in the COPYING file in the root directory of this source tree). 8 | # ############################################################################# 9 | 10 | zstd_rootdir = '../../../..' 11 | 12 | gen_html_includes = include_directories(join_paths(zstd_rootdir, 'programs'), 13 | join_paths(zstd_rootdir, 'lib'), 14 | join_paths(zstd_rootdir, 'lib/common'), 15 | join_paths(zstd_rootdir, 'contrib/gen_html')) 16 | 17 | gen_html = executable('gen_html', 18 | join_paths(zstd_rootdir, 'contrib/gen_html/gen_html.cpp'), 19 | include_directories: gen_html_includes, 20 | install: false) 21 | 22 | # Update zstd manual 23 | zstd_manual_html = custom_target('zstd_manual.html', 24 | output : 'zstd_manual.html', 25 | command : [gen_html, 26 | zstd_version, 27 | join_paths(meson.current_source_dir(), zstd_rootdir, 'lib/zstd.h'), 28 | '@OUTPUT@'], 29 | install : false) 30 | -------------------------------------------------------------------------------- /lib/zstd/build/meson/contrib/meson.build: -------------------------------------------------------------------------------- 1 | # ############################################################################# 2 | # Copyright (c) 2018-present Dima Krasner 3 | # lzutao 4 | # All rights reserved. 5 | # 6 | # This source code is licensed under both the BSD-style license (found in the 7 | # LICENSE file in the root directory of this source tree) and the GPLv2 (found 8 | # in the COPYING file in the root directory of this source tree). 9 | # ############################################################################# 10 | 11 | subdir('pzstd') 12 | subdir('gen_html') 13 | -------------------------------------------------------------------------------- /lib/zstd/build/meson/contrib/pzstd/meson.build: -------------------------------------------------------------------------------- 1 | # ############################################################################# 2 | # Copyright (c) 2018-present lzutao 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under both the BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | # in the COPYING file in the root directory of this source tree). 8 | # ############################################################################# 9 | 10 | zstd_rootdir = '../../../..' 11 | 12 | pzstd_includes = include_directories(join_paths(zstd_rootdir, 'programs'), 13 | join_paths(zstd_rootdir, 'contrib/pzstd')) 14 | pzstd_sources = [join_paths(zstd_rootdir, 'programs/util.c'), 15 | join_paths(zstd_rootdir, 'contrib/pzstd/main.cpp'), 16 | join_paths(zstd_rootdir, 'contrib/pzstd/Options.cpp'), 17 | join_paths(zstd_rootdir, 'contrib/pzstd/Pzstd.cpp'), 18 | join_paths(zstd_rootdir, 'contrib/pzstd/SkippableFrame.cpp')] 19 | pzstd = executable('pzstd', 20 | pzstd_sources, 21 | cpp_args: [ '-DNDEBUG', '-Wno-shadow', '-pedantic' ], 22 | include_directories: pzstd_includes, 23 | dependencies: [ libzstd_dep, thread_dep ], 24 | install: true) 25 | -------------------------------------------------------------------------------- /lib/zstd/contrib/VS2005/README.md: -------------------------------------------------------------------------------- 1 | ## Project Support Notice 2 | 3 | The VS2005 Project directory has been moved to the contrib directory in order to indicate that it will no longer be supported. 4 | -------------------------------------------------------------------------------- /lib/zstd/contrib/adaptive-compression/.gitignore: -------------------------------------------------------------------------------- 1 | # binaries generated 2 | adapt 3 | datagen 4 | -------------------------------------------------------------------------------- /lib/zstd/contrib/adaptive-compression/test-performance.sh: -------------------------------------------------------------------------------- 1 | echo "testing time -- no limits set" 2 | ./datagen -s1 -g1GB > tmp 3 | time ./adapt -otmp1.zst tmp 4 | time zstd -1 -o tmp2.zst tmp 5 | rm tmp* 6 | 7 | ./datagen -s2 -g2GB > tmp 8 | time ./adapt -otmp1.zst tmp 9 | time zstd -1 -o tmp2.zst tmp 10 | rm tmp* 11 | 12 | ./datagen -s3 -g4GB > tmp 13 | time ./adapt -otmp1.zst tmp 14 | time zstd -1 -o tmp2.zst tmp 15 | rm tmp* 16 | 17 | echo -e "\ntesting compression ratio -- no limits set" 18 | ./datagen -s4 -g1GB > tmp 19 | time ./adapt -otmp1.zst tmp 20 | time zstd -1 -o tmp2.zst tmp 21 | ls -l tmp1.zst tmp2.zst 22 | rm tmp* 23 | 24 | ./datagen -s5 -g2GB > tmp 25 | time ./adapt -otmp1.zst tmp 26 | time zstd -1 -o tmp2.zst tmp 27 | ls -l tmp1.zst tmp2.zst 28 | rm tmp* 29 | 30 | ./datagen -s6 -g4GB > tmp 31 | time ./adapt -otmp1.zst tmp 32 | time zstd -1 -o tmp2.zst tmp 33 | ls -l tmp1.zst tmp2.zst 34 | rm tmp* 35 | 36 | echo e "\ntesting performance at various compression levels -- no limits set" 37 | ./datagen -s7 -g1GB > tmp 38 | echo "adapt" 39 | time ./adapt -i5 -f tmp -otmp1.zst 40 | echo "zstdcli" 41 | time zstd -5 tmp -o tmp2.zst 42 | ls -l tmp1.zst tmp2.zst 43 | rm tmp* 44 | 45 | ./datagen -s8 -g1GB > tmp 46 | echo "adapt" 47 | time ./adapt -i10 -f tmp -otmp1.zst 48 | echo "zstdcli" 49 | time zstd -10 tmp -o tmp2.zst 50 | ls -l tmp1.zst tmp2.zst 51 | rm tmp* 52 | 53 | ./datagen -s9 -g1GB > tmp 54 | echo "adapt" 55 | time ./adapt -i15 -f tmp -otmp1.zst 56 | echo "zstdcli" 57 | time zstd -15 tmp -o tmp2.zst 58 | ls -l tmp1.zst tmp2.zst 59 | rm tmp* 60 | -------------------------------------------------------------------------------- /lib/zstd/contrib/cleanTabs: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | sed -i '' $'s/\t/ /g' ../lib/**/*.{h,c} ../programs/*.{h,c} ../tests/*.c ./**/*.{h,cpp} ../examples/*.c ../zlibWrapper/*.{h,c} 3 | -------------------------------------------------------------------------------- /lib/zstd/contrib/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # Dockerfile 2 | # First image to build the binary 3 | FROM alpine as builder 4 | 5 | RUN apk --no-cache add make gcc libc-dev 6 | COPY . /src 7 | RUN mkdir /pkg && cd /src && make && make DESTDIR=/pkg install 8 | 9 | # Second minimal image to only keep the built binary 10 | FROM alpine 11 | 12 | # Copy the built files 13 | COPY --from=builder /pkg / 14 | 15 | # Copy the license as well 16 | RUN mkdir -p /usr/local/share/licenses/zstd 17 | COPY --from=builder /src/LICENSE /usr/local/share/licences/zstd/ 18 | 19 | # Just run `zstd` if no other command is given 20 | CMD ["/usr/local/bin/zstd"] 21 | -------------------------------------------------------------------------------- /lib/zstd/contrib/docker/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Requirement 3 | 4 | The `Dockerfile` script requires a version of `docker` >= 17.05 5 | 6 | ## Installing docker 7 | 8 | The officiel docker install docs use a ppa with a modern version available: 9 | https://docs.docker.com/install/linux/docker-ce/ubuntu/ 10 | 11 | ## How to run 12 | 13 | `docker build -t zstd .` 14 | 15 | ## test 16 | 17 | ``` 18 | echo foo | docker run -i --rm zstd | docker run -i --rm zstd zstdcat 19 | foo 20 | ``` 21 | -------------------------------------------------------------------------------- /lib/zstd/contrib/experimental_dict_builders/benchmarkDictBuilder/Makefile: -------------------------------------------------------------------------------- 1 | ARG := 2 | 3 | CC ?= gcc 4 | CFLAGS ?= -O3 5 | INCLUDES := -I ../randomDictBuilder -I ../../../programs -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder 6 | 7 | RANDOM_FILE := ../randomDictBuilder/random.c 8 | IO_FILE := ../randomDictBuilder/io.c 9 | 10 | all: run clean 11 | 12 | .PHONY: run 13 | run: benchmark 14 | echo "Benchmarking with $(ARG)" 15 | ./benchmark $(ARG) 16 | 17 | .PHONY: test 18 | test: benchmarkTest clean 19 | 20 | .PHONY: benchmarkTest 21 | benchmarkTest: benchmark test.sh 22 | sh test.sh 23 | 24 | benchmark: benchmark.o io.o random.o libzstd.a 25 | $(CC) $(CFLAGS) benchmark.o io.o random.o libzstd.a -o benchmark 26 | 27 | benchmark.o: benchmark.c 28 | $(CC) $(CFLAGS) $(INCLUDES) -c benchmark.c 29 | 30 | random.o: $(RANDOM_FILE) 31 | $(CC) $(CFLAGS) $(INCLUDES) -c $(RANDOM_FILE) 32 | 33 | io.o: $(IO_FILE) 34 | $(CC) $(CFLAGS) $(INCLUDES) -c $(IO_FILE) 35 | 36 | libzstd.a: 37 | $(MAKE) -C ../../../lib libzstd.a 38 | mv ../../../lib/libzstd.a . 39 | 40 | .PHONY: clean 41 | clean: 42 | rm -f *.o benchmark libzstd.a 43 | $(MAKE) -C ../../../lib clean 44 | echo "Cleaning is completed" 45 | -------------------------------------------------------------------------------- /lib/zstd/contrib/experimental_dict_builders/benchmarkDictBuilder/dictBuilder.h: -------------------------------------------------------------------------------- 1 | /* ZDICT_trainFromBuffer_legacy() : 2 | * issue : samplesBuffer need to be followed by a noisy guard band. 3 | * work around : duplicate the buffer, and add the noise */ 4 | size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity, 5 | const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, 6 | ZDICT_legacy_params_t params); 7 | -------------------------------------------------------------------------------- /lib/zstd/contrib/experimental_dict_builders/benchmarkDictBuilder/test.sh: -------------------------------------------------------------------------------- 1 | echo "Benchmark with in=../../lib/common" 2 | ./benchmark in=../../../lib/common 3 | -------------------------------------------------------------------------------- /lib/zstd/contrib/experimental_dict_builders/fastCover/Makefile: -------------------------------------------------------------------------------- 1 | ARG := 2 | 3 | CC ?= gcc 4 | CFLAGS ?= -O3 -g 5 | INCLUDES := -I ../../../programs -I ../randomDictBuilder -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder 6 | 7 | IO_FILE := ../randomDictBuilder/io.c 8 | 9 | TEST_INPUT := ../../../lib 10 | TEST_OUTPUT := fastCoverDict 11 | 12 | all: main run clean 13 | 14 | .PHONY: test 15 | test: main testrun testshell clean 16 | 17 | .PHONY: run 18 | run: 19 | echo "Building a fastCover dictionary with given arguments" 20 | ./main $(ARG) 21 | 22 | main: main.o io.o fastCover.o libzstd.a 23 | $(CC) $(CFLAGS) main.o io.o fastCover.o libzstd.a -o main 24 | 25 | main.o: main.c 26 | $(CC) $(CFLAGS) $(INCLUDES) -c main.c 27 | 28 | fastCover.o: fastCover.c 29 | $(CC) $(CFLAGS) $(INCLUDES) -c fastCover.c 30 | 31 | io.o: $(IO_FILE) 32 | $(CC) $(CFLAGS) $(INCLUDES) -c $(IO_FILE) 33 | 34 | libzstd.a: 35 | $(MAKE) MOREFLAGS=-g -C ../../../lib libzstd.a 36 | mv ../../../lib/libzstd.a . 37 | 38 | .PHONY: testrun 39 | testrun: main 40 | echo "Run with $(TEST_INPUT) and $(TEST_OUTPUT) " 41 | ./main in=$(TEST_INPUT) out=$(TEST_OUTPUT) 42 | zstd -be3 -D $(TEST_OUTPUT) -r $(TEST_INPUT) -q 43 | rm -f $(TEST_OUTPUT) 44 | 45 | .PHONY: testshell 46 | testshell: test.sh 47 | sh test.sh 48 | echo "Finish running test.sh" 49 | 50 | .PHONY: clean 51 | clean: 52 | rm -f *.o main libzstd.a 53 | $(MAKE) -C ../../../lib clean 54 | echo "Cleaning is completed" 55 | -------------------------------------------------------------------------------- /lib/zstd/contrib/experimental_dict_builders/fastCover/README.md: -------------------------------------------------------------------------------- 1 | FastCover Dictionary Builder 2 | 3 | ### Permitted Arguments: 4 | Input File/Directory (in=fileName): required; file/directory used to build dictionary; if directory, will operate recursively for files inside directory; can include multiple files/directories, each following "in=" 5 | Output Dictionary (out=dictName): if not provided, default to fastCoverDict 6 | Dictionary ID (dictID=#): nonnegative number; if not provided, default to 0 7 | Maximum Dictionary Size (maxdict=#): positive number; in bytes, if not provided, default to 110KB 8 | Size of Selected Segment (k=#): positive number; in bytes; if not provided, default to 200 9 | Size of Dmer (d=#): either 6 or 8; if not provided, default to 8 10 | Number of steps (steps=#): positive number, if not provided, default to 32 11 | Percentage of samples used for training(split=#): positive number; if not provided, default to 100 12 | 13 | 14 | ###Running Test: 15 | make test 16 | 17 | 18 | ###Usage: 19 | To build a FASTCOVER dictionary with the provided arguments: make ARG= followed by arguments 20 | If k or d is not provided, the optimize version of FASTCOVER is run. 21 | 22 | ### Examples: 23 | make ARG="in=../../../lib/dictBuilder out=dict100 dictID=520" 24 | make ARG="in=../../../lib/dictBuilder in=../../../lib/compress" 25 | -------------------------------------------------------------------------------- /lib/zstd/contrib/experimental_dict_builders/fastCover/test.sh: -------------------------------------------------------------------------------- 1 | echo "Building fastCover dictionary with in=../../lib/common f=20 out=dict1" 2 | ./main in=../../../lib/common f=20 out=dict1 3 | zstd -be3 -D dict1 -r ../../../lib/common -q 4 | echo "Building fastCover dictionary with in=../../lib/common k=500 d=6 f=24 out=dict2 dictID=100 maxdict=140000" 5 | ./main in=../../../lib/common k=500 d=6 f=24 out=dict2 dictID=100 maxdict=140000 6 | zstd -be3 -D dict2 -r ../../../lib/common -q 7 | echo "Building fastCover dictionary with 2 sample sources" 8 | ./main in=../../../lib/common in=../../../lib/compress out=dict3 9 | zstd -be3 -D dict3 -r ../../../lib/common -q 10 | echo "Removing dict1 dict2 dict3" 11 | rm -f dict1 dict2 dict3 12 | 13 | echo "Testing with invalid parameters, should fail" 14 | ! ./main in=../../../lib/common r=10 15 | ! ./main in=../../../lib/common d=10 16 | -------------------------------------------------------------------------------- /lib/zstd/contrib/experimental_dict_builders/randomDictBuilder/Makefile: -------------------------------------------------------------------------------- 1 | ARG := 2 | 3 | CC ?= gcc 4 | CFLAGS ?= -O3 5 | INCLUDES := -I ../../../programs -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder 6 | 7 | TEST_INPUT := ../../../lib 8 | TEST_OUTPUT := randomDict 9 | 10 | all: main run clean 11 | 12 | .PHONY: test 13 | test: main testrun testshell clean 14 | 15 | .PHONY: run 16 | run: 17 | echo "Building a random dictionary with given arguments" 18 | ./main $(ARG) 19 | 20 | main: main.o io.o random.o libzstd.a 21 | $(CC) $(CFLAGS) main.o io.o random.o libzstd.a -o main 22 | 23 | main.o: main.c 24 | $(CC) $(CFLAGS) $(INCLUDES) -c main.c 25 | 26 | random.o: random.c 27 | $(CC) $(CFLAGS) $(INCLUDES) -c random.c 28 | 29 | io.o: io.c 30 | $(CC) $(CFLAGS) $(INCLUDES) -c io.c 31 | 32 | libzstd.a: 33 | $(MAKE) -C ../../../lib libzstd.a 34 | mv ../../../lib/libzstd.a . 35 | 36 | .PHONY: testrun 37 | testrun: main 38 | echo "Run with $(TEST_INPUT) and $(TEST_OUTPUT) " 39 | ./main in=$(TEST_INPUT) out=$(TEST_OUTPUT) 40 | zstd -be3 -D $(TEST_OUTPUT) -r $(TEST_INPUT) -q 41 | rm -f $(TEST_OUTPUT) 42 | 43 | .PHONY: testshell 44 | testshell: test.sh 45 | sh test.sh 46 | echo "Finish running test.sh" 47 | 48 | .PHONY: clean 49 | clean: 50 | rm -f *.o main libzstd.a 51 | $(MAKE) -C ../../../lib clean 52 | echo "Cleaning is completed" 53 | -------------------------------------------------------------------------------- /lib/zstd/contrib/experimental_dict_builders/randomDictBuilder/README.md: -------------------------------------------------------------------------------- 1 | Random Dictionary Builder 2 | 3 | ### Permitted Arguments: 4 | Input File/Directory (in=fileName): required; file/directory used to build dictionary; if directory, will operate recursively for files inside directory; can include multiple files/directories, each following "in=" 5 | Output Dictionary (out=dictName): if not provided, default to defaultDict 6 | Dictionary ID (dictID=#): nonnegative number; if not provided, default to 0 7 | Maximum Dictionary Size (maxdict=#): positive number; in bytes, if not provided, default to 110KB 8 | Size of Randomly Selected Segment (k=#): positive number; in bytes; if not provided, default to 200 9 | 10 | ###Running Test: 11 | make test 12 | 13 | 14 | ###Usage: 15 | To build a random dictionary with the provided arguments: make ARG= followed by arguments 16 | 17 | 18 | ### Examples: 19 | make ARG="in=../../../lib/dictBuilder out=dict100 dictID=520" 20 | make ARG="in=../../../lib/dictBuilder in=../../../lib/compress" 21 | -------------------------------------------------------------------------------- /lib/zstd/contrib/experimental_dict_builders/randomDictBuilder/random.h: -------------------------------------------------------------------------------- 1 | #include /* fprintf */ 2 | #include /* malloc, free, qsort */ 3 | #include /* memset */ 4 | #include /* clock */ 5 | #include "zstd_internal.h" /* includes zstd.h */ 6 | #ifndef ZDICT_STATIC_LINKING_ONLY 7 | #define ZDICT_STATIC_LINKING_ONLY 8 | #endif 9 | #include "zdict.h" 10 | 11 | 12 | 13 | typedef struct { 14 | unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+]; Default to 200 */ 15 | ZDICT_params_t zParams; 16 | } ZDICT_random_params_t; 17 | 18 | 19 | /*! ZDICT_trainFromBuffer_random(): 20 | * Train a dictionary from an array of samples. 21 | * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, 22 | * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. 23 | * The resulting dictionary will be saved into `dictBuffer`. 24 | * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) 25 | * or an error code, which can be tested with ZDICT_isError(). 26 | */ 27 | ZDICTLIB_API size_t ZDICT_trainFromBuffer_random( void *dictBuffer, size_t dictBufferCapacity, 28 | const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, 29 | ZDICT_random_params_t parameters); 30 | -------------------------------------------------------------------------------- /lib/zstd/contrib/experimental_dict_builders/randomDictBuilder/test.sh: -------------------------------------------------------------------------------- 1 | echo "Building random dictionary with in=../../lib/common k=200 out=dict1" 2 | ./main in=../../../lib/common k=200 out=dict1 3 | zstd -be3 -D dict1 -r ../../../lib/common -q 4 | echo "Building random dictionary with in=../../lib/common k=500 out=dict2 dictID=100 maxdict=140000" 5 | ./main in=../../../lib/common k=500 out=dict2 dictID=100 maxdict=140000 6 | zstd -be3 -D dict2 -r ../../../lib/common -q 7 | echo "Building random dictionary with 2 sample sources" 8 | ./main in=../../../lib/common in=../../../lib/compress out=dict3 9 | zstd -be3 -D dict3 -r ../../../lib/common -q 10 | echo "Removing dict1 dict2 dict3" 11 | rm -f dict1 dict2 dict3 12 | 13 | echo "Testing with invalid parameters, should fail" 14 | ! ./main r=10 15 | -------------------------------------------------------------------------------- /lib/zstd/contrib/gen_html/.gitignore: -------------------------------------------------------------------------------- 1 | # make artefact 2 | gen_html 3 | zstd_manual.html 4 | -------------------------------------------------------------------------------- /lib/zstd/contrib/gen_html/README.md: -------------------------------------------------------------------------------- 1 | gen_html - a program for automatic generation of zstd manual 2 | ============================================================ 3 | 4 | #### Introduction 5 | 6 | This simple C++ program generates a single-page HTML manual from `zstd.h`. 7 | 8 | The format of recognized comment blocks is following: 9 | - comments of type `/*!` mean: this is a function declaration; switch comments with declarations 10 | - comments of type `/**` and `/*-` mean: this is a comment; use a `

` header for the first line 11 | - comments of type `/*=` and `/**=` mean: use a `

` header and show also all functions until first empty line 12 | - comments of type `/*X` where `X` is different from above-mentioned are ignored 13 | 14 | Moreover: 15 | - `ZSTDLIB_API` is removed to improve readability 16 | - `typedef` are detected and included even if uncommented 17 | - comments of type `/**<` and `/*!<` are detected and only function declaration is highlighted (bold) 18 | 19 | 20 | #### Usage 21 | 22 | The program requires 3 parameters: 23 | ``` 24 | gen_html [zstd_version] [input_file] [output_html] 25 | ``` 26 | 27 | To compile program and generate zstd manual we have used: 28 | ``` 29 | make 30 | ./gen_html.exe 1.1.1 ../../lib/zstd.h zstd_manual.html 31 | ``` 32 | -------------------------------------------------------------------------------- /lib/zstd/contrib/gen_html/gen-zstd-manual.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | LIBVER_MAJOR_SCRIPT=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../../lib/zstd.h` 4 | LIBVER_MINOR_SCRIPT=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../../lib/zstd.h` 5 | LIBVER_PATCH_SCRIPT=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../../lib/zstd.h` 6 | LIBVER_SCRIPT=$LIBVER_MAJOR_SCRIPT.$LIBVER_MINOR_SCRIPT.$LIBVER_PATCH_SCRIPT 7 | 8 | echo ZSTD_VERSION=$LIBVER_SCRIPT 9 | ./gen_html $LIBVER_SCRIPT ../../lib/zstd.h ./zstd_manual.html 10 | -------------------------------------------------------------------------------- /lib/zstd/contrib/largeNbDicts/.gitignore: -------------------------------------------------------------------------------- 1 | # build artifacts 2 | largeNbDicts 3 | -------------------------------------------------------------------------------- /lib/zstd/contrib/largeNbDicts/README.md: -------------------------------------------------------------------------------- 1 | largeNbDicts 2 | ===================== 3 | 4 | `largeNbDicts` is a benchmark test tool 5 | dedicated to the specific scenario of 6 | dictionary decompression using a very large number of dictionaries. 7 | When dictionaries are constantly changing, they are always "cold", 8 | suffering from increased latency due to cache misses. 9 | 10 | The tool is created in a bid to investigate performance for this scenario, 11 | and experiment mitigation techniques. 12 | 13 | Command line : 14 | ``` 15 | largeNbDicts [Options] filename(s) 16 | 17 | Options : 18 | -r : recursively load all files in subdirectories (default: off) 19 | -B# : split input into blocks of size # (default: no split) 20 | -# : use compression level # (default: 3) 21 | -D # : use # as a dictionary (default: create one) 22 | -i# : nb benchmark rounds (default: 6) 23 | --nbDicts=# : set nb of dictionaries to # (default: one per block) 24 | -h : help (this text) 25 | ``` 26 | -------------------------------------------------------------------------------- /lib/zstd/contrib/linux-kernel/.gitignore: -------------------------------------------------------------------------------- 1 | !lib/zstd 2 | !lib/zstd/* 3 | *.o 4 | *.a 5 | -------------------------------------------------------------------------------- /lib/zstd/contrib/linux-kernel/lib/Kconfig.diff: -------------------------------------------------------------------------------- 1 | diff --git a/lib/Kconfig b/lib/Kconfig 2 | index b6009d7..f00ddab 100644 3 | --- a/lib/Kconfig 4 | +++ b/lib/Kconfig 5 | @@ -241,6 +241,14 @@ config LZ4HC_COMPRESS 6 | config LZ4_DECOMPRESS 7 | tristate 8 | 9 | +config ZSTD_COMPRESS 10 | + select XXHASH 11 | + tristate 12 | + 13 | +config ZSTD_DECOMPRESS 14 | + select XXHASH 15 | + tristate 16 | + 17 | source "lib/xz/Kconfig" 18 | 19 | # 20 | -------------------------------------------------------------------------------- /lib/zstd/contrib/linux-kernel/lib/Makefile.diff: -------------------------------------------------------------------------------- 1 | diff --git a/lib/Makefile b/lib/Makefile 2 | index e16f94a..0cfd529 100644 3 | --- a/lib/Makefile 4 | +++ b/lib/Makefile 5 | @@ -115,6 +115,8 @@ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ 6 | obj-$(CONFIG_LZ4_COMPRESS) += lz4/ 7 | obj-$(CONFIG_LZ4HC_COMPRESS) += lz4/ 8 | obj-$(CONFIG_LZ4_DECOMPRESS) += lz4/ 9 | +obj-$(CONFIG_ZSTD_COMPRESS) += zstd/ 10 | +obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd/ 11 | obj-$(CONFIG_XZ_DEC) += xz/ 12 | obj-$(CONFIG_RAID6_PQ) += raid6/ 13 | 14 | -------------------------------------------------------------------------------- /lib/zstd/contrib/linux-kernel/lib/zstd/.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: LLVM 2 | IndentWidth: 8 3 | UseTab: Always 4 | BreakBeforeBraces: Linux 5 | AllowShortIfStatementsOnASingleLine: false 6 | IndentCaseLabels: false 7 | 8 | ColumnLimit: 160 9 | AlignEscapedNewlinesLeft: true 10 | ReflowComments: true 11 | AllowShortCaseLabelsOnASingleLine: true 12 | -------------------------------------------------------------------------------- /lib/zstd/contrib/linux-kernel/lib/zstd/Makefile: -------------------------------------------------------------------------------- 1 | obj-$(CONFIG_ZSTD_COMPRESS) += zstd_compress.o 2 | obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd_decompress.o 3 | 4 | ccflags-y += -O3 5 | 6 | # Object files unique to zstd_compress and zstd_decompress 7 | zstd_compress-y := fse_compress.o huf_compress.o compress.o 8 | zstd_decompress-y := huf_decompress.o decompress.o 9 | 10 | # These object files are shared between the modules. 11 | # Always add them to zstd_compress. 12 | # Unless both zstd_compress and zstd_decompress are built in 13 | # then also add them to zstd_decompress. 14 | zstd_compress-y += entropy_common.o fse_decompress.o zstd_common.o 15 | 16 | ifneq ($(CONFIG_ZSTD_COMPRESS)$(CONFIG_ZSTD_DECOMPRESS),yy) 17 | zstd_decompress-y += entropy_common.o fse_decompress.o zstd_common.o 18 | endif 19 | -------------------------------------------------------------------------------- /lib/zstd/contrib/linux-kernel/squashfs-benchmark.sh: -------------------------------------------------------------------------------- 1 | # !/bin/sh 2 | set -e 3 | 4 | # Benchmarks run on a Ubuntu 14.04 VM with 2 cores and 4 GiB of RAM. 5 | # The VM is running on a Macbook Pro with a 3.1 GHz Intel Core i7 processor and 6 | # 16 GB of RAM and an SSD. 7 | 8 | # $BENCHMARK_DIR is generated with the following commands, from the Ubuntu image 9 | # ubuntu-16.10-desktop-amd64.iso. 10 | # > mkdir mnt 11 | # > sudo mount -o loop ubuntu-16.10-desktop-amd64.iso mnt 12 | # > cp mnt/casper/filesystem.squashfs . 13 | # > sudo unsquashfs filesystem.squashfs 14 | 15 | # $HOME is on a ext4 filesystem 16 | BENCHMARK_DIR="$HOME/squashfs-root/" 17 | BENCHMARK_FS="$HOME/filesystem.squashfs" 18 | 19 | # Normalize the environment 20 | sudo rm -f $BENCHMARK_FS 2> /dev/null > /dev/null || true 21 | sudo umount /mnt/squashfs 2> /dev/null > /dev/null || true 22 | 23 | # Run the benchmark 24 | echo "Compression" 25 | echo "sudo mksquashfs $BENCHMARK_DIR $BENCHMARK_FS $@" 26 | time sudo mksquashfs $BENCHMARK_DIR $BENCHMARK_FS $@ 2> /dev/null > /dev/null 27 | 28 | echo "Approximate compression ratio" 29 | printf "%d / %d\n" \ 30 | $(sudo du -sx --block-size=1 $BENCHMARK_DIR | cut -f1) \ 31 | $(sudo du -sx --block-size=1 $BENCHMARK_FS | cut -f1); 32 | 33 | # Mount the filesystem 34 | sudo mount -t squashfs $BENCHMARK_FS /mnt/squashfs 35 | 36 | echo "Decompression" 37 | time sudo tar -c /mnt/squashfs 2> /dev/null | wc -c > /dev/null 38 | 39 | sudo umount /mnt/squashfs 40 | -------------------------------------------------------------------------------- /lib/zstd/contrib/linux-kernel/test/.gitignore: -------------------------------------------------------------------------------- 1 | *Test 2 | -------------------------------------------------------------------------------- /lib/zstd/contrib/linux-kernel/test/include/linux/compiler.h: -------------------------------------------------------------------------------- 1 | #ifndef LINUX_COMIPLER_H_ 2 | #define LINUX_COMIPLER_H_ 3 | 4 | #ifndef __always_inline 5 | # define __always_inline inline 6 | #endif 7 | 8 | #ifndef noinline 9 | # define noinline __attribute__((__noinline__)) 10 | #endif 11 | 12 | #endif // LINUX_COMIPLER_H_ 13 | -------------------------------------------------------------------------------- /lib/zstd/contrib/linux-kernel/test/include/linux/errno.h: -------------------------------------------------------------------------------- 1 | #ifndef LINUX_ERRNO_H_ 2 | #define LINUX_ERRNO_H_ 3 | 4 | #define EINVAL 22 5 | 6 | #endif // LINUX_ERRNO_H_ 7 | -------------------------------------------------------------------------------- /lib/zstd/contrib/linux-kernel/test/include/linux/kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef LINUX_KERNEL_H_ 2 | #define LINUX_KERNEL_H_ 3 | 4 | #define ALIGN(x, a) ({ \ 5 | typeof(x) const __xe = (x); \ 6 | typeof(a) const __ae = (a); \ 7 | typeof(a) const __m = __ae - 1; \ 8 | typeof(x) const __r = __xe & __m; \ 9 | __xe + (__r ? (__ae - __r) : 0); \ 10 | }) 11 | 12 | #define PTR_ALIGN(p, a) (typeof(p))ALIGN((unsigned long long)(p), (a)) 13 | 14 | #define current Something that doesn't compile :) 15 | 16 | #endif // LINUX_KERNEL_H_ 17 | -------------------------------------------------------------------------------- /lib/zstd/contrib/linux-kernel/test/include/linux/math64.h: -------------------------------------------------------------------------------- 1 | #ifndef LINUX_MATH64_H 2 | #define LINUX_MATH64_H 3 | 4 | #include 5 | 6 | static uint64_t div_u64(uint64_t n, uint32_t d) 7 | { 8 | return n / d; 9 | } 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /lib/zstd/contrib/linux-kernel/test/include/linux/module.h: -------------------------------------------------------------------------------- 1 | #ifndef LINUX_MODULE_H_ 2 | #define LINUX_MODULE_H_ 3 | 4 | #define EXPORT_SYMBOL(symbol) \ 5 | void* __##symbol = symbol 6 | #define MODULE_LICENSE(license) static char const *const LICENSE = license 7 | #define MODULE_DESCRIPTION(description) \ 8 | static char const *const DESCRIPTION = description 9 | 10 | #endif // LINUX_MODULE_H_ 11 | -------------------------------------------------------------------------------- /lib/zstd/contrib/linux-kernel/test/include/linux/string.h: -------------------------------------------------------------------------------- 1 | #include 2 | -------------------------------------------------------------------------------- /lib/zstd/contrib/linux-kernel/test/include/linux/types.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | -------------------------------------------------------------------------------- /lib/zstd/contrib/premake/premake4.lua: -------------------------------------------------------------------------------- 1 | -- Include zstd.lua in your GENie or premake4 file, which exposes a project_zstd function 2 | dofile('zstd.lua') 3 | 4 | solution 'example' 5 | configurations { 'Debug', 'Release' } 6 | project_zstd('../../lib/') 7 | -------------------------------------------------------------------------------- /lib/zstd/contrib/pzstd/.gitignore: -------------------------------------------------------------------------------- 1 | # compilation result 2 | pzstd 3 | -------------------------------------------------------------------------------- /lib/zstd/contrib/pzstd/ErrorHolder.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-present, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | */ 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace pzstd { 17 | 18 | // Coordinates graceful shutdown of the pzstd pipeline 19 | class ErrorHolder { 20 | std::atomic error_; 21 | std::string message_; 22 | 23 | public: 24 | ErrorHolder() : error_(false) {} 25 | 26 | bool hasError() noexcept { 27 | return error_.load(); 28 | } 29 | 30 | void setError(std::string message) noexcept { 31 | // Given multiple possibly concurrent calls, exactly one will ever succeed. 32 | bool expected = false; 33 | if (error_.compare_exchange_strong(expected, true)) { 34 | message_ = std::move(message); 35 | } 36 | } 37 | 38 | bool check(bool predicate, std::string message) noexcept { 39 | if (!predicate) { 40 | setError(std::move(message)); 41 | } 42 | return !hasError(); 43 | } 44 | 45 | std::string getError() noexcept { 46 | error_.store(false); 47 | return std::move(message_); 48 | } 49 | 50 | ~ErrorHolder() { 51 | assert(!hasError()); 52 | } 53 | }; 54 | } 55 | -------------------------------------------------------------------------------- /lib/zstd/contrib/pzstd/SkippableFrame.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-present, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | */ 9 | #include "SkippableFrame.h" 10 | #include "mem.h" 11 | #include "utils/Range.h" 12 | 13 | #include 14 | 15 | using namespace pzstd; 16 | 17 | SkippableFrame::SkippableFrame(std::uint32_t size) : frameSize_(size) { 18 | MEM_writeLE32(data_.data(), kSkippableFrameMagicNumber); 19 | MEM_writeLE32(data_.data() + 4, kFrameContentsSize); 20 | MEM_writeLE32(data_.data() + 8, frameSize_); 21 | } 22 | 23 | /* static */ std::size_t SkippableFrame::tryRead(ByteRange bytes) { 24 | if (bytes.size() < SkippableFrame::kSize || 25 | MEM_readLE32(bytes.begin()) != kSkippableFrameMagicNumber || 26 | MEM_readLE32(bytes.begin() + 4) != kFrameContentsSize) { 27 | return 0; 28 | } 29 | return MEM_readLE32(bytes.begin() + 8); 30 | } 31 | -------------------------------------------------------------------------------- /lib/zstd/contrib/pzstd/images/Cspeed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soedinglab/MMseqs2/42f129b6bbea1524f739cecf6296eea97bc99911/lib/zstd/contrib/pzstd/images/Cspeed.png -------------------------------------------------------------------------------- /lib/zstd/contrib/pzstd/images/Dspeed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soedinglab/MMseqs2/42f129b6bbea1524f739cecf6296eea97bc99911/lib/zstd/contrib/pzstd/images/Dspeed.png -------------------------------------------------------------------------------- /lib/zstd/contrib/pzstd/main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-present, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | */ 9 | #include "ErrorHolder.h" 10 | #include "Options.h" 11 | #include "Pzstd.h" 12 | 13 | using namespace pzstd; 14 | 15 | int main(int argc, const char** argv) { 16 | Options options; 17 | switch (options.parse(argc, argv)) { 18 | case Options::Status::Failure: 19 | return 1; 20 | case Options::Status::Message: 21 | return 0; 22 | default: 23 | break; 24 | } 25 | 26 | return pzstdMain(options); 27 | } 28 | -------------------------------------------------------------------------------- /lib/zstd/contrib/pzstd/test/BUCK: -------------------------------------------------------------------------------- 1 | cxx_test( 2 | name='options_test', 3 | srcs=['OptionsTest.cpp'], 4 | deps=['//contrib/pzstd:options'], 5 | ) 6 | 7 | cxx_test( 8 | name='pzstd_test', 9 | srcs=['PzstdTest.cpp'], 10 | deps=[ 11 | ':round_trip', 12 | '//contrib/pzstd:libpzstd', 13 | '//contrib/pzstd/utils:scope_guard', 14 | '//programs:datagen', 15 | ], 16 | ) 17 | 18 | cxx_binary( 19 | name='round_trip_test', 20 | srcs=['RoundTripTest.cpp'], 21 | deps=[ 22 | ':round_trip', 23 | '//contrib/pzstd/utils:scope_guard', 24 | '//programs:datagen', 25 | ] 26 | ) 27 | 28 | cxx_library( 29 | name='round_trip', 30 | header_namespace='test', 31 | exported_headers=['RoundTrip.h'], 32 | deps=[ 33 | '//contrib/pzstd:libpzstd', 34 | '//contrib/pzstd:options', 35 | '//contrib/pzstd/utils:scope_guard', 36 | ] 37 | ) 38 | -------------------------------------------------------------------------------- /lib/zstd/contrib/pzstd/utils/Likely.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-present, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | */ 9 | 10 | /** 11 | * Compiler hints to indicate the fast path of an "if" branch: whether 12 | * the if condition is likely to be true or false. 13 | * 14 | * @author Tudor Bosman (tudorb@fb.com) 15 | */ 16 | 17 | #pragma once 18 | 19 | #undef LIKELY 20 | #undef UNLIKELY 21 | 22 | #if defined(__GNUC__) && __GNUC__ >= 4 23 | #define LIKELY(x) (__builtin_expect((x), 1)) 24 | #define UNLIKELY(x) (__builtin_expect((x), 0)) 25 | #else 26 | #define LIKELY(x) (x) 27 | #define UNLIKELY(x) (x) 28 | #endif 29 | -------------------------------------------------------------------------------- /lib/zstd/contrib/pzstd/utils/ScopeGuard.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-present, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | */ 9 | #pragma once 10 | 11 | #include 12 | 13 | namespace pzstd { 14 | 15 | /** 16 | * Dismissable scope guard. 17 | * `Function` must be callable and take no parameters. 18 | * Unless `dissmiss()` is called, the callable is executed upon destruction of 19 | * `ScopeGuard`. 20 | * 21 | * Example: 22 | * 23 | * auto guard = makeScopeGuard([&] { cleanup(); }); 24 | */ 25 | template 26 | class ScopeGuard { 27 | Function function; 28 | bool dismissed; 29 | 30 | public: 31 | explicit ScopeGuard(Function&& function) 32 | : function(std::move(function)), dismissed(false) {} 33 | 34 | void dismiss() { 35 | dismissed = true; 36 | } 37 | 38 | ~ScopeGuard() noexcept { 39 | if (!dismissed) { 40 | function(); 41 | } 42 | } 43 | }; 44 | 45 | /// Creates a scope guard from `function`. 46 | template 47 | ScopeGuard makeScopeGuard(Function&& function) { 48 | return ScopeGuard(std::forward(function)); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /lib/zstd/contrib/pzstd/utils/test/BUCK: -------------------------------------------------------------------------------- 1 | cxx_test( 2 | name='buffer_test', 3 | srcs=['BufferTest.cpp'], 4 | deps=['//contrib/pzstd/utils:buffer'], 5 | ) 6 | 7 | cxx_test( 8 | name='range_test', 9 | srcs=['RangeTest.cpp'], 10 | deps=['//contrib/pzstd/utils:range'], 11 | ) 12 | 13 | cxx_test( 14 | name='resource_pool_test', 15 | srcs=['ResourcePoolTest.cpp'], 16 | deps=['//contrib/pzstd/utils:resource_pool'], 17 | ) 18 | 19 | cxx_test( 20 | name='scope_guard_test', 21 | srcs=['ScopeGuardTest.cpp'], 22 | deps=['//contrib/pzstd/utils:scope_guard'], 23 | ) 24 | 25 | cxx_test( 26 | name='thread_pool_test', 27 | srcs=['ThreadPoolTest.cpp'], 28 | deps=['//contrib/pzstd/utils:thread_pool'], 29 | ) 30 | 31 | cxx_test( 32 | name='work_queue_test', 33 | srcs=['RangeTest.cpp'], 34 | deps=['//contrib/pzstd/utils:work_queue'], 35 | ) 36 | -------------------------------------------------------------------------------- /lib/zstd/contrib/pzstd/utils/test/ScopeGuardTest.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-present, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | */ 9 | #include "utils/ScopeGuard.h" 10 | 11 | #include 12 | 13 | using namespace pzstd; 14 | 15 | TEST(ScopeGuard, Dismiss) { 16 | { 17 | auto guard = makeScopeGuard([&] { EXPECT_TRUE(false); }); 18 | guard.dismiss(); 19 | } 20 | } 21 | 22 | TEST(ScopeGuard, Executes) { 23 | bool executed = false; 24 | { 25 | auto guard = makeScopeGuard([&] { executed = true; }); 26 | } 27 | EXPECT_TRUE(executed); 28 | } 29 | -------------------------------------------------------------------------------- /lib/zstd/contrib/seekable_format/examples/.gitignore: -------------------------------------------------------------------------------- 1 | seekable_compression 2 | seekable_decompression 3 | parallel_processing 4 | parallel_compression 5 | -------------------------------------------------------------------------------- /lib/zstd/contrib/snap/snapcraft.yaml: -------------------------------------------------------------------------------- 1 | name: zstd 2 | version: git 3 | summary: Zstandard - Fast real-time compression algorithm 4 | description: | 5 | Zstandard, or zstd as short version, is a fast lossless compression 6 | algorithm, targeting real-time compression scenarios at zlib-level and better 7 | compression ratios. It's backed by a very fast entropy stage, provided by 8 | Huff0 and FSE library 9 | 10 | grade: devel # must be 'stable' to release into candidate/stable channels 11 | confinement: devmode # use 'strict' once you have the right plugs and slots 12 | 13 | apps: 14 | zstd: 15 | command: usr/local/bin/zstd 16 | plugs: [home, removable-media] 17 | zstdgrep: 18 | command: usr/local/bin/zstdgrep 19 | plugs: [home, removable-media] 20 | zstdless: 21 | command: usr/local/bin/zstdless 22 | plugs: [home, removable-media] 23 | 24 | parts: 25 | zstd: 26 | source: . 27 | plugin: make 28 | build-packages: [g++] 29 | -------------------------------------------------------------------------------- /lib/zstd/doc/README.md: -------------------------------------------------------------------------------- 1 | Zstandard Documentation 2 | ======================= 3 | 4 | This directory contains material defining the Zstandard format, 5 | as well as detailed instructions to use `zstd` library. 6 | 7 | __`zstd_manual.html`__ : Documentation of `zstd.h` API, in html format. 8 | Click on this link: [http://zstd.net/zstd_manual.html](http://zstd.net/zstd_manual.html) 9 | to display documentation of latest release in readable format within a browser. 10 | 11 | __`zstd_compression_format.md`__ : This document defines the Zstandard compression format. 12 | Compliant decoders must adhere to this document, 13 | and compliant encoders must generate data that follows it. 14 | 15 | Should you look for ressources to develop your own port of Zstandard algorithm, 16 | you may find the following ressources useful : 17 | 18 | __`educational_decoder`__ : This directory contains an implementation of a Zstandard decoder, 19 | compliant with the Zstandard compression format. 20 | It can be used, for example, to better understand the format, 21 | or as the basis for a separate implementation of Zstandard decoder. 22 | 23 | [__`decode_corpus`__](https://github.com/facebook/zstd/tree/dev/tests#decodecorpus---tool-to-generate-zstandard-frames-for-decoder-testing) : 24 | This tool, stored in `/tests` directory, is able to generate random valid frames, 25 | which is useful if you wish to test your decoder and verify it fully supports the specification. 26 | -------------------------------------------------------------------------------- /lib/zstd/doc/educational_decoder/Makefile: -------------------------------------------------------------------------------- 1 | HARNESS_FILES=*.c 2 | 3 | MULTITHREAD_LDFLAGS = -pthread 4 | DEBUGFLAGS= -g -DZSTD_DEBUG=1 5 | CPPFLAGS += -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \ 6 | -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR) 7 | CFLAGS ?= -O3 8 | CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ 9 | -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ 10 | -Wstrict-prototypes -Wundef -Wformat-security \ 11 | -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ 12 | -Wredundant-decls 13 | CFLAGS += $(DEBUGFLAGS) 14 | CFLAGS += $(MOREFLAGS) 15 | FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MULTITHREAD_LDFLAGS) 16 | 17 | harness: $(HARNESS_FILES) 18 | $(CC) $(FLAGS) $^ -o $@ 19 | 20 | clean: 21 | @$(RM) -f harness 22 | @$(RM) -rf harness.dSYM 23 | 24 | test: harness 25 | @zstd README.md -o tmp.zst 26 | @./harness tmp.zst tmp 27 | @diff -s tmp README.md 28 | @$(RM) -f tmp* 29 | @zstd --train harness.c zstd_decompress.c zstd_decompress.h README.md 30 | @zstd -D dictionary README.md -o tmp.zst 31 | @./harness tmp.zst tmp dictionary 32 | @diff -s tmp README.md 33 | @$(RM) -f tmp* dictionary 34 | @make clean 35 | -------------------------------------------------------------------------------- /lib/zstd/doc/educational_decoder/README.md: -------------------------------------------------------------------------------- 1 | Educational Decoder 2 | =================== 3 | 4 | `zstd_decompress.c` is a self-contained implementation in C99 of a decoder, 5 | according to the [Zstandard format specification]. 6 | While it does not implement as many features as the reference decoder, 7 | such as the streaming API or content checksums, it is written to be easy to 8 | follow and understand, to help understand how the Zstandard format works. 9 | It's laid out to match the [format specification], 10 | so it can be used to understand how complex segments could be implemented. 11 | It also contains implementations of Huffman and FSE table decoding. 12 | 13 | [Zstandard format specification]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md 14 | [format specification]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md 15 | 16 | `harness.c` provides a simple test harness around the decoder: 17 | 18 | harness [dictionary] 19 | 20 | As an additional resource to be used with this decoder, 21 | see the `decodecorpus` tool in the [tests] directory. 22 | It generates valid Zstandard frames that can be used to verify 23 | a Zstandard decoder implementation. 24 | Note that to use the tool to verify this decoder implementation, 25 | the --content-size flag should be set, 26 | as this decoder does not handle streaming decoding, 27 | and so it must know the decompressed size in advance. 28 | 29 | [tests]: https://github.com/facebook/zstd/blob/dev/tests/ 30 | -------------------------------------------------------------------------------- /lib/zstd/doc/images/CSpeed2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soedinglab/MMseqs2/42f129b6bbea1524f739cecf6296eea97bc99911/lib/zstd/doc/images/CSpeed2.png -------------------------------------------------------------------------------- /lib/zstd/doc/images/DCspeed5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soedinglab/MMseqs2/42f129b6bbea1524f739cecf6296eea97bc99911/lib/zstd/doc/images/DCspeed5.png -------------------------------------------------------------------------------- /lib/zstd/doc/images/DSpeed3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soedinglab/MMseqs2/42f129b6bbea1524f739cecf6296eea97bc99911/lib/zstd/doc/images/DSpeed3.png -------------------------------------------------------------------------------- /lib/zstd/doc/images/cdict_v136.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soedinglab/MMseqs2/42f129b6bbea1524f739cecf6296eea97bc99911/lib/zstd/doc/images/cdict_v136.png -------------------------------------------------------------------------------- /lib/zstd/doc/images/dict-cr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soedinglab/MMseqs2/42f129b6bbea1524f739cecf6296eea97bc99911/lib/zstd/doc/images/dict-cr.png -------------------------------------------------------------------------------- /lib/zstd/doc/images/dict-cs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soedinglab/MMseqs2/42f129b6bbea1524f739cecf6296eea97bc99911/lib/zstd/doc/images/dict-cs.png -------------------------------------------------------------------------------- /lib/zstd/doc/images/dict-ds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soedinglab/MMseqs2/42f129b6bbea1524f739cecf6296eea97bc99911/lib/zstd/doc/images/dict-ds.png -------------------------------------------------------------------------------- /lib/zstd/doc/images/zstd_cdict_v1_3_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soedinglab/MMseqs2/42f129b6bbea1524f739cecf6296eea97bc99911/lib/zstd/doc/images/zstd_cdict_v1_3_5.png -------------------------------------------------------------------------------- /lib/zstd/doc/images/zstd_logo86.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soedinglab/MMseqs2/42f129b6bbea1524f739cecf6296eea97bc99911/lib/zstd/doc/images/zstd_logo86.png -------------------------------------------------------------------------------- /lib/zstd/examples/.gitignore: -------------------------------------------------------------------------------- 1 | #build 2 | simple_compression 3 | simple_decompression 4 | dictionary_compression 5 | dictionary_decompression 6 | streaming_compression 7 | streaming_decompression 8 | multiple_streaming_compression 9 | streaming_memory_usage 10 | 11 | #test artefact 12 | tmp* 13 | test* 14 | *.zst 15 | -------------------------------------------------------------------------------- /lib/zstd/lib/.gitignore: -------------------------------------------------------------------------------- 1 | # make install artefact 2 | libzstd.pc 3 | libzstd-nomt 4 | -------------------------------------------------------------------------------- /lib/zstd/lib/compress/zstd_double_fast.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | #ifndef ZSTD_DOUBLE_FAST_H 12 | #define ZSTD_DOUBLE_FAST_H 13 | 14 | #if defined (__cplusplus) 15 | extern "C" { 16 | #endif 17 | 18 | #include "mem.h" /* U32 */ 19 | #include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */ 20 | 21 | void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, 22 | void const* end, ZSTD_dictTableLoadMethod_e dtlm); 23 | size_t ZSTD_compressBlock_doubleFast( 24 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 25 | void const* src, size_t srcSize); 26 | size_t ZSTD_compressBlock_doubleFast_dictMatchState( 27 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 28 | void const* src, size_t srcSize); 29 | size_t ZSTD_compressBlock_doubleFast_extDict( 30 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 31 | void const* src, size_t srcSize); 32 | 33 | 34 | #if defined (__cplusplus) 35 | } 36 | #endif 37 | 38 | #endif /* ZSTD_DOUBLE_FAST_H */ 39 | -------------------------------------------------------------------------------- /lib/zstd/lib/compress/zstd_fast.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | #ifndef ZSTD_FAST_H 12 | #define ZSTD_FAST_H 13 | 14 | #if defined (__cplusplus) 15 | extern "C" { 16 | #endif 17 | 18 | #include "mem.h" /* U32 */ 19 | #include "zstd_compress_internal.h" 20 | 21 | void ZSTD_fillHashTable(ZSTD_matchState_t* ms, 22 | void const* end, ZSTD_dictTableLoadMethod_e dtlm); 23 | size_t ZSTD_compressBlock_fast( 24 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 25 | void const* src, size_t srcSize); 26 | size_t ZSTD_compressBlock_fast_dictMatchState( 27 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 28 | void const* src, size_t srcSize); 29 | size_t ZSTD_compressBlock_fast_extDict( 30 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 31 | void const* src, size_t srcSize); 32 | 33 | #if defined (__cplusplus) 34 | } 35 | #endif 36 | 37 | #endif /* ZSTD_FAST_H */ 38 | -------------------------------------------------------------------------------- /lib/zstd/lib/decompress/zstd_ddict.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | 12 | #ifndef ZSTD_DDICT_H 13 | #define ZSTD_DDICT_H 14 | 15 | /*-******************************************************* 16 | * Dependencies 17 | *********************************************************/ 18 | #include /* size_t */ 19 | #include "zstd.h" /* ZSTD_DDict, and several public functions */ 20 | 21 | 22 | /*-******************************************************* 23 | * Interface 24 | *********************************************************/ 25 | 26 | /* note: several prototypes are already published in `zstd.h` : 27 | * ZSTD_createDDict() 28 | * ZSTD_createDDict_byReference() 29 | * ZSTD_createDDict_advanced() 30 | * ZSTD_freeDDict() 31 | * ZSTD_initStaticDDict() 32 | * ZSTD_sizeof_DDict() 33 | * ZSTD_estimateDDictSize() 34 | * ZSTD_getDictID_fromDict() 35 | */ 36 | 37 | const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict); 38 | size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict); 39 | 40 | void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); 41 | 42 | 43 | 44 | #endif /* ZSTD_DDICT_H */ 45 | -------------------------------------------------------------------------------- /lib/zstd/lib/deprecated/zbuff_common.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | /*-************************************* 12 | * Dependencies 13 | ***************************************/ 14 | #include "error_private.h" 15 | #include "zbuff.h" 16 | 17 | /*-**************************************** 18 | * ZBUFF Error Management (deprecated) 19 | ******************************************/ 20 | 21 | /*! ZBUFF_isError() : 22 | * tells if a return value is an error code */ 23 | unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); } 24 | /*! ZBUFF_getErrorName() : 25 | * provides error code string from function result (useful for debugging) */ 26 | const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); } 27 | -------------------------------------------------------------------------------- /lib/zstd/lib/dll/example/build_package.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | MKDIR bin\dll bin\static bin\example bin\include 3 | COPY tests\fullbench.c bin\example\ 4 | COPY programs\datagen.c bin\example\ 5 | COPY programs\datagen.h bin\example\ 6 | COPY programs\util.h bin\example\ 7 | COPY programs\platform.h bin\example\ 8 | COPY lib\common\mem.h bin\example\ 9 | COPY lib\common\zstd_errors.h bin\example\ 10 | COPY lib\common\zstd_internal.h bin\example\ 11 | COPY lib\common\error_private.h bin\example\ 12 | COPY lib\common\xxhash.h bin\example\ 13 | COPY lib\zstd.h bin\include\ 14 | COPY lib\libzstd.a bin\static\libzstd_static.lib 15 | COPY lib\dll\libzstd.* bin\dll\ 16 | COPY lib\dll\example\Makefile bin\example\ 17 | COPY lib\dll\example\fullbench-dll.* bin\example\ 18 | COPY lib\dll\example\README.md bin\ 19 | COPY programs\zstd.exe bin\zstd.exe 20 | -------------------------------------------------------------------------------- /lib/zstd/lib/dll/example/fullbench-dll.sln: -------------------------------------------------------------------------------- 1 | Microsoft Visual Studio Solution File, Format Version 12.00 2 | # Visual Studio Express 2012 for Windows Desktop 3 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fullbench-dll", "fullbench-dll.vcxproj", "{13992FD2-077E-4954-B065-A428198201A9}" 4 | EndProject 5 | Global 6 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 7 | Debug|Win32 = Debug|Win32 8 | Debug|x64 = Debug|x64 9 | Release|Win32 = Release|Win32 10 | Release|x64 = Release|x64 11 | EndGlobalSection 12 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 13 | {13992FD2-077E-4954-B065-A428198201A9}.Debug|Win32.ActiveCfg = Debug|Win32 14 | {13992FD2-077E-4954-B065-A428198201A9}.Debug|Win32.Build.0 = Debug|Win32 15 | {13992FD2-077E-4954-B065-A428198201A9}.Debug|x64.ActiveCfg = Debug|x64 16 | {13992FD2-077E-4954-B065-A428198201A9}.Debug|x64.Build.0 = Debug|x64 17 | {13992FD2-077E-4954-B065-A428198201A9}.Release|Win32.ActiveCfg = Release|Win32 18 | {13992FD2-077E-4954-B065-A428198201A9}.Release|Win32.Build.0 = Release|Win32 19 | {13992FD2-077E-4954-B065-A428198201A9}.Release|x64.ActiveCfg = Release|x64 20 | {13992FD2-077E-4954-B065-A428198201A9}.Release|x64.Build.0 = Release|x64 21 | EndGlobalSection 22 | GlobalSection(SolutionProperties) = preSolution 23 | HideSolutionNode = FALSE 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /lib/zstd/lib/libzstd.pc.in: -------------------------------------------------------------------------------- 1 | # ZSTD - standard compression algorithm 2 | # Copyright (C) 2014-2016, Yann Collet, Facebook 3 | # BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 4 | 5 | prefix=@PREFIX@ 6 | libdir=@LIBDIR@ 7 | includedir=@INCLUDEDIR@ 8 | 9 | Name: zstd 10 | Description: fast lossless compression algorithm library 11 | URL: http://www.zstd.net/ 12 | Version: @VERSION@ 13 | Libs: -L${libdir} -lzstd 14 | Cflags: -I${includedir} 15 | -------------------------------------------------------------------------------- /lib/zstd/programs/.gitignore: -------------------------------------------------------------------------------- 1 | # local binary (Makefile) 2 | zstd 3 | zstd32 4 | zstd4 5 | zstd-compress 6 | zstd-decompress 7 | zstd-frugal 8 | zstd-small 9 | zstd-nolegacy 10 | 11 | # Object files 12 | *.o 13 | *.ko 14 | default.profraw 15 | have_zlib 16 | 17 | # Executables 18 | *.exe 19 | *.out 20 | *.app 21 | 22 | # Default result files 23 | dictionary 24 | grillResults.txt 25 | _* 26 | tmp* 27 | *.zst 28 | result 29 | out 30 | 31 | # fuzzer 32 | afl 33 | 34 | # Misc files 35 | *.bat 36 | dirTest* 37 | -------------------------------------------------------------------------------- /lib/zstd/programs/BUCK: -------------------------------------------------------------------------------- 1 | cxx_binary( 2 | name='zstd', 3 | headers=glob(['*.h'], excludes=['datagen.h', 'platform.h', 'util.h']), 4 | srcs=glob(['*.c'], excludes=['datagen.c']), 5 | deps=[ 6 | ':datagen', 7 | ':util', 8 | '//lib:zstd', 9 | '//lib:zdict', 10 | '//lib:mem', 11 | '//lib:xxhash', 12 | ], 13 | preprocessor_flags=[ 14 | '-DZSTD_GZCOMPRESS', 15 | '-DZSTD_GZDECOMPRESS', 16 | '-DZSTD_LZMACOMPRESS', 17 | '-DZSTD_LZMADECOMPRES', 18 | '-DZSTD_LZ4COMPRESS', 19 | '-DZSTD_LZ4DECOMPRES', 20 | ], 21 | linker_flags=[ 22 | '-lz', 23 | '-llzma', 24 | '-llz4', 25 | ], 26 | ) 27 | 28 | cxx_library( 29 | name='datagen', 30 | visibility=['PUBLIC'], 31 | header_namespace='', 32 | exported_headers=['datagen.h'], 33 | srcs=['datagen.c'], 34 | deps=['//lib:mem'], 35 | ) 36 | 37 | 38 | cxx_library( 39 | name='util', 40 | visibility=['PUBLIC'], 41 | header_namespace='', 42 | exported_headers=['util.h', 'platform.h'], 43 | deps=['//lib:mem'], 44 | ) 45 | -------------------------------------------------------------------------------- /lib/zstd/programs/datagen.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | 12 | #ifndef DATAGEN_H 13 | #define DATAGEN_H 14 | 15 | #include /* size_t */ 16 | 17 | void RDG_genStdout(unsigned long long size, double matchProba, double litProba, unsigned seed); 18 | void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed); 19 | /*!RDG_genBuffer 20 | Generate 'size' bytes of compressible data into 'buffer'. 21 | Compressibility can be controlled using 'matchProba', which is floating point value between 0 and 1. 22 | 'LitProba' is optional, it affect variability of individual bytes. If litProba==0.0, default value will be used. 23 | Generated data pattern can be modified using different 'seed'. 24 | For a triplet (matchProba, litProba, seed), the function always generate the same content. 25 | 26 | RDG_genStdout 27 | Same as RDG_genBuffer, but generates data into stdout 28 | */ 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /lib/zstd/programs/windres/generate_res.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | REM http://stackoverflow.com/questions/708238/how-do-i-add-an-icon-to-a-mingw-gcc-compiled-executable 3 | 4 | where /q windres.exe 5 | IF ERRORLEVEL 1 ( 6 | ECHO The windres.exe is missing. Ensure it is installed and placed in your PATH. 7 | EXIT /B 8 | ) ELSE ( 9 | windres.exe -I ../lib -I windres -i windres/zstd.rc -O coff -F pe-x86-64 -o windres/zstd64.res 10 | windres.exe -I ../lib -I windres -i windres/zstd.rc -O coff -F pe-i386 -o windres/zstd32.res 11 | ) 12 | -------------------------------------------------------------------------------- /lib/zstd/programs/windres/verrsrc.h: -------------------------------------------------------------------------------- 1 | /* minimal set of defines required to generate zstd.res from zstd.rc */ 2 | 3 | #define VS_VERSION_INFO 1 4 | 5 | #define VS_FFI_FILEFLAGSMASK 0x0000003FL 6 | #define VOS_NT_WINDOWS32 0x00040004L 7 | #define VFT_DLL 0x00000002L 8 | #define VFT2_UNKNOWN 0x00000000L 9 | -------------------------------------------------------------------------------- /lib/zstd/programs/windres/zstd32.res: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soedinglab/MMseqs2/42f129b6bbea1524f739cecf6296eea97bc99911/lib/zstd/programs/windres/zstd32.res -------------------------------------------------------------------------------- /lib/zstd/programs/windres/zstd64.res: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soedinglab/MMseqs2/42f129b6bbea1524f739cecf6296eea97bc99911/lib/zstd/programs/windres/zstd64.res -------------------------------------------------------------------------------- /lib/zstd/programs/zstdgrep.1: -------------------------------------------------------------------------------- 1 | . 2 | .TH "ZSTDGREP" "1" "November 2018" "zstd 1.3.8" "User Commands" 3 | . 4 | .SH "NAME" 5 | \fBzstdgrep\fR \- print lines matching a pattern in zstandard\-compressed files 6 | . 7 | .SH "SYNOPSIS" 8 | \fBzstdgrep\fR [\fIgrep\-flags\fR] [\-\-] \fIpattern\fR [\fIfiles\fR \.\.\.] 9 | . 10 | .SH "DESCRIPTION" 11 | \fBzstdgrep\fR runs \fBgrep (1)\fR on files or stdin, if no files argument is given, after decompressing them with \fBzstdcat (1)\fR\. 12 | . 13 | .P 14 | The grep\-flags and pattern arguments are passed on to \fBgrep (1)\fR\. If an \fB\-e\fR flag is found in the \fBgrep\-flags\fR, \fBzstdgrep\fR will not look for a pattern argument\. 15 | . 16 | .SH "EXIT STATUS" 17 | In case of missing arguments or missing pattern, 1 will be returned, otherwise 0\. 18 | . 19 | .SH "SEE ALSO" 20 | \fBzstd (1)\fR 21 | . 22 | .SH "AUTHORS" 23 | Thomas Klausner \fIwiz@NetBSD\.org\fR 24 | -------------------------------------------------------------------------------- /lib/zstd/programs/zstdgrep.1.md: -------------------------------------------------------------------------------- 1 | zstdgrep(1) -- print lines matching a pattern in zstandard-compressed files 2 | ============================================================================ 3 | 4 | SYNOPSIS 5 | -------- 6 | 7 | `zstdgrep` [*grep-flags*] [--] _pattern_ [_files_ ...] 8 | 9 | 10 | DESCRIPTION 11 | ----------- 12 | `zstdgrep` runs `grep (1)` on files or stdin, if no files argument is given, after decompressing them with `zstdcat (1)`. 13 | 14 | The grep-flags and pattern arguments are passed on to `grep (1)`. If an `-e` flag is found in the `grep-flags`, `zstdgrep` will not look for a pattern argument. 15 | 16 | EXIT STATUS 17 | ----------- 18 | In case of missing arguments or missing pattern, 1 will be returned, otherwise 0. 19 | 20 | SEE ALSO 21 | -------- 22 | `zstd (1)` 23 | 24 | AUTHORS 25 | ------- 26 | Thomas Klausner 27 | -------------------------------------------------------------------------------- /lib/zstd/programs/zstdless: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | zstdcat "$@" | less 3 | -------------------------------------------------------------------------------- /lib/zstd/programs/zstdless.1: -------------------------------------------------------------------------------- 1 | . 2 | .TH "ZSTDLESS" "1" "November 2018" "zstd 1.3.8" "User Commands" 3 | . 4 | .SH "NAME" 5 | \fBzstdless\fR \- view zstandard\-compressed files 6 | . 7 | .SH "SYNOPSIS" 8 | \fBzstdless\fR [\fIflags\fR] [\fIfile\fR \.\.\.] 9 | . 10 | .SH "DESCRIPTION" 11 | \fBzstdless\fR runs \fBless (1)\fR on files or stdin, if no files argument is given, after decompressing them with \fBzstdcat (1)\fR\. 12 | . 13 | .SH "SEE ALSO" 14 | \fBzstd (1)\fR 15 | -------------------------------------------------------------------------------- /lib/zstd/programs/zstdless.1.md: -------------------------------------------------------------------------------- 1 | zstdless(1) -- view zstandard-compressed files 2 | ============================================================================ 3 | 4 | SYNOPSIS 5 | -------- 6 | 7 | `zstdless` [*flags*] [_file_ ...] 8 | 9 | 10 | DESCRIPTION 11 | ----------- 12 | `zstdless` runs `less (1)` on files or stdin, if no files argument is given, after decompressing them with `zstdcat (1)`. 13 | 14 | SEE ALSO 15 | -------- 16 | `zstd (1)` 17 | -------------------------------------------------------------------------------- /lib/zstd/tests/.gitignore: -------------------------------------------------------------------------------- 1 | # local binary (Makefile) 2 | fullbench 3 | fullbench32 4 | fullbench-lib 5 | fuzzer 6 | fuzzer32 7 | fuzzer-dll 8 | zbufftest 9 | zbufftest32 10 | zbufftest-dll 11 | zstreamtest 12 | zstreamtest32 13 | zstreamtest_asan 14 | zstreamtest_tsan 15 | zstreamtest-dll 16 | datagen 17 | paramgrill 18 | paramgrill32 19 | roundTripCrash 20 | longmatch 21 | symbols 22 | legacy 23 | decodecorpus 24 | pool 25 | poolTests 26 | invalidDictionaries 27 | checkTag 28 | zcat 29 | zstdcat 30 | tm 31 | 32 | # Tmp test directory 33 | zstdtest 34 | speedTest 35 | versionsTest 36 | namespaceTest 37 | 38 | # Local script 39 | startSpeedTest 40 | speedTest.pid 41 | 42 | # Object files 43 | *.o 44 | *.ko 45 | 46 | # Executables 47 | *.exe 48 | *.out 49 | *.app 50 | 51 | # Default result files 52 | dictionary 53 | grillResults.txt 54 | _* 55 | tmp* 56 | *.zst 57 | *.gz 58 | result 59 | out 60 | *.zstd 61 | 62 | # fuzzer 63 | afl 64 | 65 | # Misc files 66 | *.bat 67 | dirTest* 68 | -------------------------------------------------------------------------------- /lib/zstd/tests/files/huffman-compressed-larger: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soedinglab/MMseqs2/42f129b6bbea1524f739cecf6296eea97bc99911/lib/zstd/tests/files/huffman-compressed-larger -------------------------------------------------------------------------------- /lib/zstd/tests/fuzz/.gitignore: -------------------------------------------------------------------------------- 1 | # test artefacts 2 | corpora 3 | block_decompress 4 | block_round_trip 5 | simple_decompress 6 | simple_round_trip 7 | stream_decompress 8 | stream_round_trip 9 | -------------------------------------------------------------------------------- /lib/zstd/tests/fuzz/default.options: -------------------------------------------------------------------------------- 1 | [libfuzzer] 2 | max_len = 8192 3 | -------------------------------------------------------------------------------- /lib/zstd/tests/fuzz/simple_decompress.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-present, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | */ 9 | 10 | /** 11 | * This fuzz target attempts to decompress the fuzzed data with the simple 12 | * decompression function to ensure the decompressor never crashes. 13 | */ 14 | 15 | #include 16 | #include 17 | #include 18 | #include "fuzz_helpers.h" 19 | #include "zstd.h" 20 | 21 | static ZSTD_DCtx *dctx = NULL; 22 | static void* rBuf = NULL; 23 | static size_t bufSize = 0; 24 | 25 | int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) 26 | { 27 | size_t neededBufSize; 28 | 29 | FUZZ_seed(&src, &size); 30 | neededBufSize = MAX(20 * size, (size_t)256 << 10); 31 | 32 | /* Allocate all buffers and contexts if not already allocated */ 33 | if (neededBufSize > bufSize) { 34 | free(rBuf); 35 | rBuf = malloc(neededBufSize); 36 | bufSize = neededBufSize; 37 | FUZZ_ASSERT(rBuf); 38 | } 39 | if (!dctx) { 40 | dctx = ZSTD_createDCtx(); 41 | FUZZ_ASSERT(dctx); 42 | } 43 | ZSTD_decompressDCtx(dctx, rBuf, neededBufSize, src, size); 44 | 45 | #ifndef STATEFUL_FUZZING 46 | ZSTD_freeDCtx(dctx); dctx = NULL; 47 | #endif 48 | return 0; 49 | } 50 | -------------------------------------------------------------------------------- /lib/zstd/tests/fuzz/zstd_helpers.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-present, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | */ 9 | 10 | /** 11 | * Helper functions for fuzzing. 12 | */ 13 | 14 | #ifndef ZSTD_HELPERS_H 15 | #define ZSTD_HELPERS_H 16 | 17 | #include "zstd.h" 18 | #include 19 | 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | 24 | void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state); 25 | 26 | ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, uint32_t *state); 27 | ZSTD_frameParameters FUZZ_randomFParams(uint32_t *state); 28 | ZSTD_parameters FUZZ_randomParams(size_t srcSize, uint32_t *state); 29 | 30 | 31 | #ifdef __cplusplus 32 | } 33 | #endif 34 | 35 | #endif /* ZSTD_HELPERS_H */ 36 | -------------------------------------------------------------------------------- /lib/zstd/tests/gzip/helin-segv.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Before gzip-1.4, gzip -d would segfault on some inputs. 3 | 4 | # Copyright (C) 2010-2016 Free Software Foundation, Inc. 5 | 6 | # This program is free software: you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License as published by 8 | # the Free Software Foundation, either version 3 of the License, or 9 | # (at your option) any later version. 10 | 11 | # This program is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | # GNU General Public License for more details. 15 | 16 | # You should have received a copy of the GNU General Public License 17 | # along with this program. If not, see . 18 | # limit so don't run it by default. 19 | 20 | . "${srcdir=.}/init.sh"; path_prepend_ . 21 | 22 | # This test case was provided by Aki Helin. 23 | printf '\037\235\220\0\0\0\304' > helin.gz || framework_failure_ 24 | printf '\0\0' > exp || framework_failure_ 25 | 26 | fail=0 27 | 28 | gzip -dc helin.gz > out || fail=1 29 | compare exp out || fail=1 30 | 31 | Exit $fail 32 | -------------------------------------------------------------------------------- /lib/zstd/tests/gzip/hufts-segv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soedinglab/MMseqs2/42f129b6bbea1524f739cecf6296eea97bc99911/lib/zstd/tests/gzip/hufts-segv.gz -------------------------------------------------------------------------------- /lib/zstd/tests/gzip/hufts.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Exercise a bug whereby an invalid input could make gzip -d misbehave. 3 | 4 | # Copyright (C) 2009-2016 Free Software Foundation, Inc. 5 | 6 | # This program is free software: you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License as published by 8 | # the Free Software Foundation, either version 3 of the License, or 9 | # (at your option) any later version. 10 | 11 | # This program is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | # GNU General Public License for more details. 15 | 16 | # You should have received a copy of the GNU General Public License 17 | # along with this program. If not, see . 18 | # limit so don't run it by default. 19 | 20 | . "${srcdir=.}/init.sh"; path_prepend_ . 21 | 22 | printf '\n...: invalid compressed data--format violated\n' > exp \ 23 | || framework_failure_ 24 | 25 | fail=0 26 | gzip -dc "$abs_srcdir/hufts-segv.gz" > out 2> err 27 | test $? = 1 || fail=1 28 | 29 | compare /dev/null out || fail=1 30 | 31 | sed 's/.*hufts-segv.gz: /...: /' err > k; mv k err || fail=1 32 | compare exp err || fail=1 33 | 34 | Exit $fail 35 | -------------------------------------------------------------------------------- /lib/zstd/tests/gzip/init.cfg: -------------------------------------------------------------------------------- 1 | # This file is sourced by init.sh, *before* its initialization. 2 | 3 | # This goes hand in hand with the "exec 9>&2;" in Makefile.am's 4 | # TESTS_ENVIRONMENT definition. 5 | stderr_fileno_=9 6 | -------------------------------------------------------------------------------- /lib/zstd/tests/gzip/list.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Exercise the --list option. 3 | 4 | # Copyright 2016 Free Software Foundation, Inc. 5 | 6 | # This program is free software: you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License as published by 8 | # the Free Software Foundation, either version 3 of the License, or 9 | # (at your option) any later version. 10 | 11 | # This program is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | # GNU General Public License for more details. 15 | 16 | # You should have received a copy of the GNU General Public License 17 | # along with this program. If not, see . 18 | # limit so don't run it by default. 19 | 20 | . "${srcdir=.}/init.sh"; path_prepend_ . 21 | 22 | echo zoology zucchini > in || framework_failure_ 23 | cp in orig || framework_failure_ 24 | 25 | gzip -l in && fail=1 26 | gzip -9 in || fail=1 27 | gzip -l in.gz >out1 || fail=1 28 | gzip -l in.gz | cat >out2 || fail=1 29 | compare out1 out2 || fail=1 30 | 31 | Exit $fail 32 | -------------------------------------------------------------------------------- /lib/zstd/tests/gzip/memcpy-abuse.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Before gzip-1.4, this the use of memcpy in inflate_codes could 3 | # mistakenly operate on overlapping regions. Exercise that code. 4 | 5 | # Copyright (C) 2010-2016 Free Software Foundation, Inc. 6 | 7 | # This program is free software: you can redistribute it and/or modify 8 | # it under the terms of the GNU General Public License as published by 9 | # the Free Software Foundation, either version 3 of the License, or 10 | # (at your option) any later version. 11 | 12 | # This program is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | # GNU General Public License for more details. 16 | 17 | # You should have received a copy of the GNU General Public License 18 | # along with this program. If not, see . 19 | # limit so don't run it by default. 20 | 21 | . "${srcdir=.}/init.sh"; path_prepend_ . 22 | 23 | # The input must be larger than 32KiB and slightly 24 | # less uniform than e.g., all zeros. 25 | printf wxy%032767d 0 | tee in | gzip > in.gz || framework_failure_ 26 | 27 | fail=0 28 | 29 | # Before the fix, this would call memcpy with overlapping regions. 30 | gzip -dc in.gz > out || fail=1 31 | 32 | compare in out || fail=1 33 | 34 | Exit $fail 35 | -------------------------------------------------------------------------------- /lib/zstd/tests/gzip/null-suffix-clobber.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Before gzip-1.5, gzip -d -S '' k.gz would delete F.gz and not create "F" 3 | 4 | # Copyright (C) 2010-2016 Free Software Foundation, Inc. 5 | 6 | # This program is free software: you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License as published by 8 | # the Free Software Foundation, either version 3 of the License, or 9 | # (at your option) any later version. 10 | 11 | # This program is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | # GNU General Public License for more details. 15 | 16 | # You should have received a copy of the GNU General Public License 17 | # along with this program. If not, see . 18 | # limit so don't run it by default. 19 | 20 | . "${srcdir=.}/init.sh"; path_prepend_ . 21 | 22 | printf anything | gzip > F.gz || framework_failure_ 23 | echo y > yes || framework_failure_ 24 | echo "gzip: invalid suffix ''" > expected-err || framework_failure_ 25 | 26 | fail=0 27 | 28 | gzip ---presume-input-tty -d -S '' F.gz < yes > out 2>err && fail=1 29 | 30 | compare /dev/null out || fail=1 31 | compare expected-err err || fail=1 32 | 33 | test -f F.gz || fail=1 34 | 35 | Exit $fail 36 | -------------------------------------------------------------------------------- /lib/zstd/tests/gzip/stdin.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Ensure that gzip interprets "-" as stdin. 3 | 4 | # Copyright (C) 2009-2016 Free Software Foundation, Inc. 5 | 6 | # This program is free software: you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License as published by 8 | # the Free Software Foundation, either version 3 of the License, or 9 | # (at your option) any later version. 10 | 11 | # This program is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | # GNU General Public License for more details. 15 | 16 | # You should have received a copy of the GNU General Public License 17 | # along with this program. If not, see . 18 | # limit so don't run it by default. 19 | 20 | . "${srcdir=.}/init.sh"; path_prepend_ . 21 | 22 | printf a | gzip > in || framework_failure_ 23 | printf aaa > exp || framework_failure_ 24 | 25 | fail=0 26 | gzip -dc in - in < in > out 2>err || fail=1 27 | 28 | compare exp out || fail=1 29 | compare /dev/null err || fail=1 30 | 31 | Exit $fail 32 | -------------------------------------------------------------------------------- /lib/zstd/tests/gzip/trailing-nul.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # gzip accepts trailing NUL bytes; don't fail if there is exactly one. 3 | # Before gzip-1.4, this would fail. 4 | 5 | # Copyright (C) 2009-2016 Free Software Foundation, Inc. 6 | 7 | # This program is free software: you can redistribute it and/or modify 8 | # it under the terms of the GNU General Public License as published by 9 | # the Free Software Foundation, either version 3 of the License, or 10 | # (at your option) any later version. 11 | 12 | # This program is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | # GNU General Public License for more details. 16 | 17 | # You should have received a copy of the GNU General Public License 18 | # along with this program. If not, see . 19 | # limit so don't run it by default. 20 | 21 | . "${srcdir=.}/init.sh"; path_prepend_ . 22 | 23 | (echo 0 | gzip; printf '\0') > 0.gz || framework_failure_ 24 | (echo 00 | gzip; printf '\0\0') > 00.gz || framework_failure_ 25 | (echo 1 | gzip; printf '\1') > 1.gz || framework_failure_ 26 | 27 | fail=0 28 | 29 | for i in 0 00 1; do 30 | gzip -d $i.gz; ret=$? 31 | test $ret -eq $i || fail=1 32 | test $ret = 1 && continue 33 | echo $i > exp || fail=1 34 | compare exp $i || fail=1 35 | done 36 | 37 | Exit $fail 38 | -------------------------------------------------------------------------------- /lib/zstd/tests/gzip/z-suffix.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Check that -Sz works. 3 | 4 | # Copyright 2014-2016 Free Software Foundation, Inc. 5 | 6 | # This program is free software: you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License as published by 8 | # the Free Software Foundation, either version 3 of the License, or 9 | # (at your option) any later version. 10 | 11 | # This program is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | # GNU General Public License for more details. 15 | 16 | # You should have received a copy of the GNU General Public License 17 | # along with this program. If not, see . 18 | # limit so don't run it by default. 19 | 20 | . "${srcdir=.}/init.sh"; path_prepend_ . 21 | 22 | printf anything > F && cp F G || framework_failure_ 23 | gzip -Sz F || fail=1 24 | test ! -f F || fail=1 25 | test -f Fz || fail=1 26 | gzip -dSz F || fail=1 27 | test ! -f Fz || fail=1 28 | compare F G || fail\1 29 | 30 | Exit $fail 31 | -------------------------------------------------------------------------------- /lib/zstd/tests/gzip/zdiff.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Exercise zdiff with two compressed inputs. 3 | # Before gzip-1.4, this would fail. 4 | 5 | # Copyright (C) 2009-2016 Free Software Foundation, Inc. 6 | 7 | # This program is free software: you can redistribute it and/or modify 8 | # it under the terms of the GNU General Public License as published by 9 | # the Free Software Foundation, either version 3 of the License, or 10 | # (at your option) any later version. 11 | 12 | # This program is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | # GNU General Public License for more details. 16 | 17 | # You should have received a copy of the GNU General Public License 18 | # along with this program. If not, see . 19 | # limit so don't run it by default. 20 | 21 | . "${srcdir=.}/init.sh"; path_prepend_ . 22 | 23 | echo a > a || framework_failure_ 24 | echo b > b || framework_failure_ 25 | gzip a b || framework_failure_ 26 | 27 | cat < exp 28 | 1c1 29 | < a 30 | --- 31 | > b 32 | EOF 33 | 34 | fail=0 35 | zdiff a.gz b.gz > out 2>&1 36 | test $? = 1 || fail=1 37 | 38 | compare exp out || fail=1 39 | 40 | rm -f out 41 | # expect success, for equal files 42 | zdiff a.gz a.gz > out 2> err || fail=1 43 | # expect no output 44 | test -s out && fail=1 45 | # expect no stderr 46 | test -s err && fail=1 47 | 48 | Exit $fail 49 | -------------------------------------------------------------------------------- /lib/zstd/tests/gzip/znew-k.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Check that znew -K works without compress(1). 3 | 4 | # Copyright (C) 2010-2016 Free Software Foundation, Inc. 5 | 6 | # This program is free software: you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License as published by 8 | # the Free Software Foundation, either version 3 of the License, or 9 | # (at your option) any later version. 10 | 11 | # This program is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | # GNU General Public License for more details. 15 | 16 | # You should have received a copy of the GNU General Public License 17 | # along with this program. If not, see . 18 | # limit so don't run it by default. 19 | 20 | . "${srcdir=.}/init.sh"; path_prepend_ . 21 | 22 | cat <<'EOF' >compress || framework_failure_ 23 | #!/bin/sh 24 | echo >&2 'compress has been invoked' 25 | exit 1 26 | EOF 27 | chmod +x compress || framework_failure_ 28 | 29 | # Note that the basename must have a length of 6 or greater. 30 | # Otherwise, "test -f $name" below would fail. 31 | name=123456.Z 32 | 33 | printf '%1012977s' ' ' | gzip -c > $name || framework_failure_ 34 | 35 | fail=0 36 | 37 | znew -K $name || fail=1 38 | test -f $name || fail=1 39 | 40 | Exit $fail 41 | -------------------------------------------------------------------------------- /lib/zstd/tests/rateLimiter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # ################################################################ 4 | # Copyright (c) 2018-present, Facebook, Inc. 5 | # All rights reserved. 6 | # 7 | # This source code is licensed under both the BSD-style license (found in the 8 | # LICENSE file in the root directory of this source tree) and the GPLv2 (found 9 | # in the COPYING file in the root directory of this source tree). 10 | # ########################################################################## 11 | 12 | # Rate limiter, replacement for pv 13 | # this rate limiter does not "catch up" after a blocking period 14 | # Limitations: 15 | # - only accepts limit speed in MB/s 16 | 17 | import sys 18 | import time 19 | 20 | MB = 1024 * 1024 21 | rate = float(sys.argv[1]) * MB 22 | start = time.time() 23 | total_read = 0 24 | 25 | # sys.stderr.close() # remove error message, for Ctrl+C 26 | 27 | try: 28 | buf = " " 29 | while len(buf): 30 | now = time.time() 31 | to_read = max(int(rate * (now - start)), 1) 32 | max_buf_size = 1 * MB 33 | to_read = min(to_read, max_buf_size) 34 | start = now 35 | 36 | buf = sys.stdin.buffer.read(to_read) 37 | sys.stdout.buffer.write(buf) 38 | 39 | except (KeyboardInterrupt, BrokenPipeError) as e: 40 | pass 41 | -------------------------------------------------------------------------------- /lib/zstd/tests/regression/levels.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | #ifndef LEVEL 12 | # error LEVEL(x) must be defined 13 | #endif 14 | #ifndef FAST_LEVEL 15 | # error FAST_LEVEL(x) must be defined 16 | #endif 17 | 18 | /** 19 | * The levels are chosen to trigger every strategy in every source size, 20 | * as well as some fast levels and the default level. 21 | * If you change the compression levels, you should probably update these. 22 | */ 23 | 24 | FAST_LEVEL(5) 25 | 26 | FAST_LEVEL(3) 27 | 28 | FAST_LEVEL(1) 29 | LEVEL(0) 30 | LEVEL(1) 31 | 32 | LEVEL(3) 33 | LEVEL(4) 34 | LEVEL(5) 35 | LEVEL(6) 36 | LEVEL(7) 37 | 38 | LEVEL(9) 39 | 40 | LEVEL(13) 41 | 42 | LEVEL(16) 43 | 44 | LEVEL(19) 45 | -------------------------------------------------------------------------------- /lib/zstd/tests/regression/result.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | #include "result.h" 12 | 13 | char const* result_get_error_string(result_t result) { 14 | switch (result_get_error(result)) { 15 | case result_error_ok: 16 | return "okay"; 17 | case result_error_skip: 18 | return "skip"; 19 | case result_error_system_error: 20 | return "system error"; 21 | case result_error_compression_error: 22 | return "compression error"; 23 | case result_error_decompression_error: 24 | return "decompression error"; 25 | case result_error_round_trip_error: 26 | return "round trip error"; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /lib/zstd/zlibWrapper/.gitignore: -------------------------------------------------------------------------------- 1 | # Default result files 2 | _* 3 | example.* 4 | example_zstd.* 5 | example_gz.* 6 | fitblk.* 7 | fitblk_zstd.* 8 | zwrapbench.* 9 | foo.gz 10 | 11 | minigzip 12 | minigzip_zstd 13 | example 14 | example_zstd 15 | fitblk 16 | fitblk_zstd 17 | zwrapbench 18 | 19 | # Misc files 20 | *.bat 21 | *.zip 22 | *.txt 23 | 24 | # Directories 25 | minizip/ 26 | -------------------------------------------------------------------------------- /lib/zstd/zlibWrapper/BUCK: -------------------------------------------------------------------------------- 1 | cxx_library( 2 | name='zlib_wrapper', 3 | visibility=['PUBLIC'], 4 | exported_linker_flags=['-lz'], 5 | header_namespace='', 6 | exported_headers=['zstd_zlibwrapper.h'], 7 | headers=[ 8 | 'gzcompatibility.h', 9 | 'gzguts.h', 10 | ], 11 | srcs=glob(['*.c']), 12 | deps=[ 13 | '//lib:zstd', 14 | '//lib:zstd_common', 15 | ], 16 | ) 17 | 18 | cxx_binary( 19 | name='minigzip', 20 | srcs=['examples/minigzip.c'], 21 | deps=[':zlib_wrapper'], 22 | ) 23 | -------------------------------------------------------------------------------- /lib/zstd/zlibWrapper/gzclose.c: -------------------------------------------------------------------------------- 1 | /* gzclose.c contains minimal changes required to be compiled with zlibWrapper: 2 | * - gz_statep was converted to union to work with -Wstrict-aliasing=1 */ 3 | 4 | /* gzclose.c -- zlib gzclose() function 5 | * Copyright (C) 2004, 2010 Mark Adler 6 | * For conditions of distribution and use, see http://www.zlib.net/zlib_license.html 7 | */ 8 | 9 | #include "gzguts.h" 10 | 11 | /* gzclose() is in a separate file so that it is linked in only if it is used. 12 | That way the other gzclose functions can be used instead to avoid linking in 13 | unneeded compression or decompression routines. */ 14 | int ZEXPORT gzclose(file) 15 | gzFile file; 16 | { 17 | #ifndef NO_GZCOMPRESS 18 | gz_statep state; 19 | 20 | if (file == NULL) 21 | return Z_STREAM_ERROR; 22 | state = (gz_statep)file; 23 | 24 | return state.state->mode == GZ_READ ? gzclose_r(file) : gzclose_w(file); 25 | #else 26 | return gzclose_r(file); 27 | #endif 28 | } 29 | -------------------------------------------------------------------------------- /src/alignment/BandedNucleotideAligner.h: -------------------------------------------------------------------------------- 1 | // 2 | // Written by Martin Steinegger 3 | // 4 | // Wrapper for KSW2 aligner. 5 | // Local banded nucleotide aligner 6 | // 7 | #include 8 | #include 9 | #include "StripedSmithWaterman.h" 10 | 11 | #include "Util.h" 12 | #include "SubstitutionMatrix.h" 13 | #include "Debug.h" 14 | 15 | 16 | class BandedNucleotideAligner { 17 | public: 18 | 19 | 20 | BandedNucleotideAligner(BaseMatrix *subMat, size_t maxSequenceLength, int gapo, int gape, int zdrop); 21 | 22 | ~BandedNucleotideAligner(); 23 | 24 | void initQuery(Sequence *q); 25 | 26 | s_align align(Sequence * targetSeqObj, int diagonal, bool reverse, 27 | std::string & backtrace, EvalueComputation * evaluer, bool wrappedScoring=false); 28 | 29 | private: 30 | SubstitutionMatrix::FastMatrix fastMatrix; 31 | uint8_t * targetSeqRev; 32 | int targetSeqRevDataLen; 33 | uint8_t * querySeq; 34 | uint8_t * querySeqRev; 35 | int querySeqRevDataLen; 36 | uint8_t * queryRevCompSeq; 37 | char * queryRevCompCharSeq; 38 | uint8_t * queryRevCompSeqRev; 39 | Sequence * querySeqObj; 40 | int8_t * mat; 41 | NucleotideMatrix * subMat; 42 | // uint32_t * cigar; 43 | int gapo; 44 | int gape; 45 | int zdrop; 46 | }; 47 | -------------------------------------------------------------------------------- /src/alignment/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(alignment_header_files 2 | alignment/Alignment.h 3 | alignment/CompressedA3M.h 4 | alignment/EvalueComputation.h 5 | alignment/Matcher.h 6 | alignment/MsaFilter.h 7 | alignment/MultipleAlignment.h 8 | alignment/PSSMCalculator.h 9 | alignment/StripedSmithWaterman.h 10 | alignment/BandedNucleotideAligner.h 11 | alignment/DistanceCalculator.h 12 | PARENT_SCOPE 13 | ) 14 | 15 | set(alignment_source_files 16 | alignment/Alignment.cpp 17 | alignment/CompressedA3M.cpp 18 | alignment/Main.cpp 19 | alignment/Matcher.cpp 20 | alignment/MsaFilter.cpp 21 | alignment/MultipleAlignment.cpp 22 | alignment/PSSMCalculator.cpp 23 | alignment/StripedSmithWaterman.cpp 24 | alignment/BandedNucleotideAligner.cpp 25 | alignment/rescorediagonal.cpp 26 | PARENT_SCOPE 27 | ) 28 | -------------------------------------------------------------------------------- /src/alignment/CompressedA3M.h: -------------------------------------------------------------------------------- 1 | #ifndef MMSEQS_COMPRESSEDA3M_H 2 | #define MMSEQS_COMPRESSEDA3M_H 3 | 4 | #include "Matcher.h" 5 | #include "DBReader.h" 6 | 7 | class DBConcat; 8 | 9 | class CompressedA3M { 10 | public: 11 | static void hitToBuffer(unsigned int targetId, const Matcher::result_t& hit, std::string& buffer); 12 | 13 | static std::string extractA3M(const char *data, size_t data_size, 14 | DBReader& sequenceReader, 15 | DBReader& headerReader, int thread_idx); 16 | 17 | static void extractMatcherResults(unsigned int &key, std::vector &results, 18 | const char *data, size_t dataSize, DBReader& sequenceReader, bool skipFirst); 19 | }; 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /src/clustering/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(clustering_header_files 2 | clustering/AlignmentSymmetry.h 3 | clustering/Clustering.h 4 | clustering/ClusteringAlgorithms.h 5 | clustering/Main.cpp 6 | PARENT_SCOPE 7 | ) 8 | 9 | set(clustering_source_files 10 | clustering/AlignmentSymmetry.cpp 11 | clustering/Clustering.cpp 12 | clustering/ClusteringAlgorithms.cpp 13 | clustering/Main.cpp 14 | PARENT_SCOPE 15 | ) 16 | -------------------------------------------------------------------------------- /src/clustering/Clustering.h: -------------------------------------------------------------------------------- 1 | #ifndef CLUSTERING_H 2 | #define CLUSTERING_H 3 | 4 | #include 5 | 6 | #include "DBReader.h" 7 | #include "DBWriter.h" 8 | 9 | class Clustering { 10 | public: 11 | Clustering(const std::string &seqDB, const std::string &seqDBIndex, 12 | const std::string &alnResultsDB, const std::string &alnResultsDBIndex, 13 | const std::string &outDB, const std::string &outDBIndex, 14 | const std::string &weightFileName, 15 | unsigned int maxIteration, int similarityScoreType, int threads, int compressed); 16 | 17 | void run(int mode); 18 | 19 | 20 | ~Clustering(); 21 | 22 | private: 23 | 24 | void writeData(DBWriter *dbw, const std::pair * ret, size_t dbSize); 25 | 26 | DBReader *seqDbr; 27 | DBReader *alnDbr; 28 | 29 | //values for affinity clustering 30 | unsigned int maxIteration; 31 | int similarityScoreType; 32 | 33 | int threads; 34 | int compressed; 35 | std::string outDB; 36 | std::string outDBIndex; 37 | }; 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /src/clustering/Main.cpp: -------------------------------------------------------------------------------- 1 | #include "Clustering.h" 2 | #include "Parameters.h" 3 | 4 | int clust(int argc, const char **argv, const Command& command) { 5 | Parameters& par = Parameters::getInstance(); 6 | par.parseParameters(argc, argv, command, true, 0, 0); 7 | 8 | Clustering clu(par.db1, par.db1Index, par.db2, par.db2Index, 9 | par.db3, par.db3Index, par.weightFile, par.maxIteration, 10 | par.similarityScoreType, par.threads, par.compressed); 11 | clu.run(par.clusteringMode); 12 | return EXIT_SUCCESS; 13 | } 14 | 15 | -------------------------------------------------------------------------------- /src/commons/A3MReader.h: -------------------------------------------------------------------------------- 1 | #ifndef PHP_A3M_READER_H 2 | #define PHP_A3M_READER_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | class A3mReader { 9 | public: 10 | A3mReader(std::string a3m); 11 | 12 | std::string getFasta(); 13 | 14 | private: 15 | void addSequence(const std::string& sequence); 16 | 17 | bool columnHasInsertion(size_t col); 18 | 19 | std::vector headers; 20 | std::vector> entries; 21 | size_t length; 22 | }; 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /src/commons/CommandCaller.h: -------------------------------------------------------------------------------- 1 | #ifndef MMSEQS_COMMANDCALLER_H 2 | #define MMSEQS_COMMANDCALLER_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | class CommandCaller { 9 | public: 10 | CommandCaller(); 11 | 12 | void addVariable(const char* key, const char* value); 13 | 14 | int callProgram(const char* program, size_t argc, const char **argv); 15 | 16 | static unsigned int getCallDepth(); 17 | 18 | // Does not return on success 19 | void execProgram(const char* program, const std::vector &argv); 20 | }; 21 | 22 | #endif //MMSEQS_COMMANDCALLER_H 23 | -------------------------------------------------------------------------------- /src/commons/DBConcat.h: -------------------------------------------------------------------------------- 1 | #ifndef DBCONCAT_H 2 | #define DBCONCAT_H 3 | 4 | #include 5 | #include 6 | 7 | class DBConcat { 8 | public: 9 | DBConcat(const std::string &dataFileNameA, const std::string &indexFileNameA, 10 | const std::string &dataFileNameB, const std::string &indexFileNameB, 11 | const std::string &dataFileNameC, const std::string &indexFileNameC, 12 | unsigned int threads, bool write = true, bool preserveKeysA = false, bool preserveKeysB = false, bool takeLargerEntry = false, size_t trimRight = 0); 13 | 14 | ~DBConcat(); 15 | 16 | unsigned int dbAKeyMap(unsigned int); 17 | unsigned int dbBKeyMap(unsigned int); 18 | 19 | private: 20 | size_t indexSizeA; 21 | size_t indexSizeB; 22 | 23 | std::pair *keysA, *keysB; 24 | 25 | bool sameDatabase; 26 | 27 | struct compareFirstEntry { 28 | bool operator()(const std::pair &lhs, 29 | const std::pair &rhs) const { 30 | return (lhs.first < rhs.first); 31 | } 32 | }; 33 | 34 | struct compareKeyToFirstEntry { 35 | bool operator()(const unsigned int &lhs, const std::pair &rhs) const { 36 | return (lhs <= rhs.first); 37 | } 38 | }; 39 | }; 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /src/commons/Debug.cpp: -------------------------------------------------------------------------------- 1 | #include "Debug.h" 2 | 3 | 4 | int Debug::debugLevel = Debug::INFO; 5 | 6 | 7 | 8 | void Debug::setDebugLevel (int i) { 9 | debugLevel = i; 10 | } 11 | 12 | -------------------------------------------------------------------------------- /src/commons/Domain.h: -------------------------------------------------------------------------------- 1 | #ifndef DOMAIN_H 2 | #define DOMAIN_H 3 | 4 | #include 5 | #include 6 | 7 | struct Domain { 8 | std::string query; 9 | 10 | unsigned int qStart; 11 | unsigned int qEnd; 12 | unsigned int qLength; 13 | 14 | std::string target; 15 | 16 | unsigned int tStart; 17 | unsigned int tEnd; 18 | unsigned int tLength; 19 | 20 | double eValue; 21 | 22 | Domain(const std::string &query, unsigned int qStart, unsigned int qEnd, unsigned int qLength, 23 | const std::string &target, unsigned int tStart, unsigned int tEnd, unsigned int tLength, double eValue) : 24 | query(query), qStart(qStart), qEnd(qEnd), qLength(qLength), 25 | target(target), tStart(tStart), tEnd(tEnd), tLength(tLength), eValue(eValue) { } 26 | 27 | friend bool operator<(const Domain &h1, const Domain &h2) { 28 | return h1.eValue < h2.eValue; 29 | } 30 | 31 | void writeResult(std::ostream &out) const { 32 | const char sep = '\t'; 33 | out << query << sep << target << sep << qStart << sep << qEnd << sep << qLength; 34 | out << sep << tStart << sep << tEnd << sep << tLength << sep << eValue; 35 | } 36 | }; 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /src/commons/DownloadDatabase.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Martin Steinegger on 10/1/21. 3 | // 4 | 5 | #ifndef MMSEQS_DOWNLOADDATABASE_H 6 | #define MMSEQS_DOWNLOADDATABASE_H 7 | #include 8 | #include 9 | 10 | 11 | struct EnvironmentEntry { 12 | const char* key; 13 | const char* value; 14 | }; 15 | 16 | struct DatabaseDownload { 17 | const char *name; 18 | const char *description; 19 | const char *citation; 20 | const char *url; 21 | bool hasTaxonomy; 22 | int dbType; 23 | const unsigned char *script; 24 | size_t scriptLength; 25 | std::vector environment; 26 | }; 27 | 28 | 29 | #endif //MMSEQS_DOWNLOADDATABASE_H 30 | -------------------------------------------------------------------------------- /src/commons/ExpressionParser.h: -------------------------------------------------------------------------------- 1 | #ifndef EXPRESSION_PARSER_H 2 | #define EXPRESSION_PARSER_H 3 | 4 | #include 5 | #include 6 | 7 | class ExpressionParser { 8 | public: 9 | ExpressionParser(const char* expression); 10 | ExpressionParser(const char* expression, const std::vector& lookup); 11 | 12 | ~ExpressionParser() { 13 | if (expr) { 14 | te_free(expr); 15 | } 16 | } 17 | 18 | bool isOk() { 19 | return err == 0; 20 | } 21 | 22 | std::vector findBindableIndices(); 23 | 24 | void bind(unsigned int index, double value) { 25 | if (index > 127) { 26 | return; 27 | } 28 | variables[index] = value; 29 | } 30 | 31 | double evaluate() { 32 | return te_eval(expr); 33 | } 34 | 35 | private: 36 | void findBound(const te_expr *n, int depth, std::vector &bound); 37 | 38 | te_expr *expr; 39 | std::vector vars; 40 | double variables[128]; 41 | int err; 42 | }; 43 | 44 | #endif 45 | -------------------------------------------------------------------------------- /src/commons/FastSort.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef ENABLE_IPS4O 4 | # include "simde/hedley.h" 5 | # if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(0,0,0) && !HEDLEY_GCC_VERSION_CHECK(5,1,0) && defined(__cplusplus) 6 | # define is_trivially_default_constructible has_trivial_default_constructor 7 | # endif 8 | # pragma GCC system_header 9 | # include "ips4o.hpp" 10 | # undef is_trivially_default_constructible 11 | # ifdef OPENMP 12 | # define SORT_PARALLEL ips4o::parallel::sort 13 | # else 14 | # define SORT_PARALLEL ips4o::sort 15 | # endif 16 | # define SORT_SERIAL std::sort 17 | #else 18 | # define SORT_PARALLEL std::sort 19 | # define SORT_SERIAL std::sort 20 | #endif 21 | -------------------------------------------------------------------------------- /src/commons/HeaderSummarizer.h: -------------------------------------------------------------------------------- 1 | #ifndef MMSEQS_HEADERSUMMARIZER_H 2 | #define MMSEQS_HEADERSUMMARIZER_H 3 | 4 | #include 5 | #include 6 | 7 | class HeaderSummarizer { 8 | public: 9 | virtual std::string summarize(const std::vector& headers) = 0; 10 | virtual ~HeaderSummarizer() {}; 11 | }; 12 | 13 | class UniprotHeaderSummarizer : public HeaderSummarizer { 14 | public: 15 | std::string summarize(const std::vector& headers); 16 | ~UniprotHeaderSummarizer() {}; 17 | }; 18 | 19 | class MetaclustHeaderSummarizer : public HeaderSummarizer { 20 | public: 21 | std::string summarize(const std::vector& headers); 22 | ~MetaclustHeaderSummarizer() {}; 23 | }; 24 | 25 | 26 | #endif //MMSEQS_HEADERSUMMARIZER_H 27 | -------------------------------------------------------------------------------- /src/commons/KSeqBufferReader.h: -------------------------------------------------------------------------------- 1 | #ifndef KSEQ_BUFFER_READER_H 2 | #define KSEQ_BUFFER_READER_H 3 | 4 | #include 5 | 6 | typedef struct kseq_buffer { 7 | char* buffer; 8 | size_t position, length; 9 | 10 | kseq_buffer () : buffer(NULL), position(0), length(0) {}; 11 | kseq_buffer (char* buffer, size_t length) : buffer(buffer), position(0), length(length) {}; 12 | } kseq_buffer_t; 13 | 14 | inline ssize_t kseq_buffer_reader(kseq_buffer_t *inBuffer, char *outBuffer, size_t nbyte) { 15 | if (inBuffer->position > inBuffer->length) { 16 | return 0; 17 | } 18 | 19 | size_t bytes = nbyte; 20 | if (inBuffer->position + bytes > inBuffer->length) { 21 | bytes = inBuffer->length - inBuffer->position; 22 | } 23 | 24 | if (bytes == 0) { 25 | return 0; 26 | } 27 | 28 | for (size_t i = inBuffer->position; i < inBuffer->position + bytes; ++i) { 29 | size_t index = i - inBuffer->position; 30 | outBuffer[index] = inBuffer->buffer[i]; 31 | } 32 | 33 | inBuffer->position += bytes; 34 | 35 | return bytes; 36 | } 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /src/commons/LibraryReader.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by mad on 11/29/16. 3 | // 4 | 5 | #ifndef MMSEQS_LIBRARYREADER_H 6 | #define MMSEQS_LIBRARYREADER_H 7 | 8 | 9 | #include 10 | #include 11 | 12 | class LibraryReader { 13 | public: 14 | bool StreamStartsWith(std::stringstream &in, const char* id); 15 | int ReadInt(const char* line,const char* label,const char* errmsg); 16 | double ReadDouble(const char* line,const char* label,const char* errmsg); 17 | std::string ReadString(const char* line,const char* label,const char* errmsg); 18 | bool ReadBool(const char* line,const char* label,const char* errmsg); 19 | const char* strscn(const char* str) ; 20 | static std::vector tokenize(const char* str, char sep); 21 | std::string getline(std::stringstream &in); 22 | }; 23 | 24 | 25 | #endif //MMSEQS_LIBRARYREADER_H 26 | -------------------------------------------------------------------------------- /src/commons/MMseqsMPI.cpp: -------------------------------------------------------------------------------- 1 | #include "MMseqsMPI.h" 2 | #include "Debug.h" 3 | #include "Parameters.h" 4 | 5 | bool MMseqsMPI::active = false; 6 | int MMseqsMPI::rank = -1; 7 | int MMseqsMPI::numProc = -1; 8 | 9 | #ifdef HAVE_MPI 10 | void MMseqsMPI::init(int argc, const char **argv) { 11 | MPI_Init(&argc, const_cast(&argv)); 12 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 13 | MPI_Comm_size(MPI_COMM_WORLD, &numProc); 14 | 15 | active = true; 16 | 17 | if(!isMaster()) { 18 | Parameters& par = Parameters::getInstance(); 19 | par.verbosity = Debug::ERROR; 20 | Debug::setDebugLevel(Debug::ERROR); 21 | } 22 | 23 | Debug(Debug::INFO) << "MPI Init\n"; 24 | Debug(Debug::INFO) << "Rank: " << rank << " Size: " << numProc << "\n"; 25 | } 26 | #else 27 | void MMseqsMPI::init(int, const char **) { 28 | rank = 0; 29 | } 30 | #endif 31 | -------------------------------------------------------------------------------- /src/commons/MMseqsMPI.h: -------------------------------------------------------------------------------- 1 | #ifndef MMSEQS_MPI_H 2 | #define MMSEQS_MPI_H 3 | 4 | #ifdef HAVE_MPI 5 | #include 6 | #endif 7 | 8 | class MMseqsMPI { 9 | public: 10 | static const int MASTER = 0; 11 | 12 | static bool active; 13 | static int rank; 14 | static int numProc; 15 | 16 | static void init(int argc, const char **argv); 17 | static inline bool isMaster() { 18 | #ifdef HAVE_MPI 19 | return rank == MASTER; 20 | #else 21 | return true; 22 | #endif 23 | }; 24 | }; 25 | 26 | // if we are in an error case, do not call MPI_Finalize, it might still be in a Barrier 27 | #ifdef HAVE_MPI 28 | #define EXIT(exitCode) do { \ 29 | int __status = (exitCode); \ 30 | if(MMseqsMPI::active && __status == 0) { \ 31 | MPI_Finalize(); \ 32 | MMseqsMPI::active = false; \ 33 | } \ 34 | std::cerr.flush(); \ 35 | std::cout.flush(); \ 36 | exit(__status); \ 37 | } while(0) 38 | #endif 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /src/commons/Masker.h: -------------------------------------------------------------------------------- 1 | #ifndef MMSEQS_MASKER_H 2 | #define MMSEQS_MASKER_H 3 | 4 | #include "Parameters.h" 5 | #include "Sequence.h" 6 | #include "SubstitutionMatrix.h" 7 | #include "tantan.h" 8 | #include "PSSMCalculator.h" 9 | #include 10 | 11 | class Masker { 12 | public: 13 | Masker(BaseMatrix &subMat); 14 | 15 | ~Masker(); 16 | 17 | int maskSequence(Sequence & seq, bool maskTantan, double maskProb, 18 | bool maskLowerCaseLetter, int maskNrepeating); 19 | 20 | void maskPssm(Sequence& centerSequence, float maskProb, PSSMCalculator::Profile& pssmRes); 21 | 22 | void applySoftmasking(unsigned char *charSequence, const unsigned char * numSequence, unsigned int seqLen); 23 | 24 | char maskLetterNum; 25 | 26 | private: 27 | int maskRepeats(unsigned char *numSequence, const unsigned int seqLen, int maskNrepeating, char maskChar); 28 | 29 | void finalizeMasking(unsigned char * numSequence, const unsigned int seqLen); 30 | 31 | BaseMatrix &subMat; 32 | ProbabilityMatrix probMatrix; 33 | 34 | unsigned char * charSequence; 35 | size_t maxSeqLen; 36 | }; 37 | #endif 38 | -------------------------------------------------------------------------------- /src/commons/MemoryTracker.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Martin Steinegger on 6/21/20. 3 | // 4 | 5 | #include "MemoryTracker.h" 6 | size_t MemoryTracker::totalMemorySizeInst = 0; 7 | 8 | -------------------------------------------------------------------------------- /src/commons/MemoryTracker.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Martin Steinegger on 6/21/20. 3 | // 4 | 5 | #ifndef MMSEQS_MEMORYTRACKER_H 6 | #define MMSEQS_MEMORYTRACKER_H 7 | #include "stddef.h" 8 | class MemoryTracker{ 9 | public: 10 | static size_t getSize() { return totalMemorySizeInst;}; 11 | protected: 12 | static size_t totalMemorySizeInst; 13 | static void incrementMemory(size_t memorySize) { totalMemorySizeInst+=memorySize; } 14 | static void decrementMemory(size_t memorySize) { totalMemorySizeInst-=memorySize; } 15 | }; 16 | #endif //MMSEQS_MEMORYTRACKER_H 17 | -------------------------------------------------------------------------------- /src/commons/NucleotideMatrix.h: -------------------------------------------------------------------------------- 1 | #ifndef NUCLEOTIDE_MATRIX_H 2 | #define NUCLEOTIDE_MATRIX_H 3 | 4 | #include "SubstitutionMatrix.h" 5 | 6 | class NucleotideMatrix : public SubstitutionMatrix { 7 | public: 8 | NucleotideMatrix(const char *scoringMatrixFileName, float bitFactor, float scoreBias); 9 | 10 | virtual ~NucleotideMatrix(); 11 | 12 | using BaseMatrix::getBitFactor; 13 | 14 | void setupLetterMapping(); 15 | 16 | int reverseResidue(int res) { 17 | return reverseLookup[res]; 18 | } 19 | 20 | private: 21 | int *reverseLookup; 22 | }; 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /src/commons/PatternCompiler.h: -------------------------------------------------------------------------------- 1 | #ifndef MMSEQS_PATTERNCOMPILER_H 2 | #define MMSEQS_PATTERNCOMPILER_H 3 | #include 4 | #include "Debug.h" 5 | #include "Util.h" 6 | 7 | class PatternCompiler { 8 | public: 9 | PatternCompiler(const char* pattern) { 10 | if (regcomp(®ex, pattern, REG_EXTENDED | REG_NEWLINE) != 0 ){ 11 | Debug(Debug::ERROR) << "Error in regex " << pattern << "\n"; 12 | EXIT(EXIT_FAILURE); 13 | } 14 | } 15 | 16 | ~PatternCompiler() { 17 | regfree(®ex); 18 | } 19 | 20 | bool isMatch(const char *target) { 21 | return regexec(®ex, target, 0, NULL, 0) == 0; 22 | } 23 | 24 | bool isMatch(const char *target, size_t nmatch, regmatch_t *pmatch) { 25 | return regexec(®ex, target, nmatch, pmatch, 0) == 0; 26 | } 27 | 28 | 29 | private: 30 | regex_t regex; 31 | }; 32 | 33 | 34 | #endif //MMSEQS_PATTERNCOMPILER_H 35 | -------------------------------------------------------------------------------- /src/commons/SequenceWeights.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by annika on 09.11.22. 3 | // 4 | 5 | #ifndef MMSEQS_SEQUENCEWEIGHTS_H 6 | #define MMSEQS_SEQUENCEWEIGHTS_H 7 | 8 | class SequenceWeights{ 9 | public: 10 | struct WeightIndexEntry { 11 | unsigned int id; 12 | float weight; 13 | 14 | static bool compareByIdOnly(const WeightIndexEntry &x, const WeightIndexEntry &y) { 15 | return x.id <= y.id; 16 | } 17 | }; 18 | 19 | WeightIndexEntry *weightIndex; 20 | unsigned int indexSize; 21 | 22 | SequenceWeights(const char* dataFileName); 23 | 24 | ~SequenceWeights(); 25 | 26 | float getWeightById(unsigned int id); 27 | }; 28 | 29 | 30 | #endif //MMSEQS_SEQUENCEWEIGHTS_H 31 | -------------------------------------------------------------------------------- /src/commons/UniprotKB.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Milot on 23/05/16. 3 | // 4 | 5 | #ifndef MMSEQS_UNIPROTKB_H 6 | #define MMSEQS_UNIPROTKB_H 7 | 8 | class UniprotKB { 9 | public: 10 | UniprotKB() : dbColumns(17), isInEntry(false), hasEntry(false) { 11 | streams = new std::ostringstream[dbColumns]; 12 | }; 13 | 14 | ~UniprotKB() { 15 | delete[] streams; 16 | } 17 | 18 | size_t getColumnCount() const { 19 | return dbColumns; 20 | } 21 | 22 | bool readLine (const char* line); 23 | std::string getColumn (size_t column); 24 | 25 | enum { 26 | COL_KB_ID = 0, 27 | COL_KB_AC, 28 | COL_KB_DT, 29 | COL_KB_DE, 30 | COL_KB_GN, 31 | COL_KB_OS, 32 | COL_KB_OG, 33 | COL_KB_OC, 34 | COL_KB_OX, 35 | COL_KB_OH, 36 | COL_KB_REF, 37 | COL_KB_CC, 38 | COL_KB_DR, 39 | COL_KB_PE, 40 | COL_KB_KW, 41 | COL_KB_FT, 42 | COL_KB_SEQ 43 | }; 44 | 45 | static const std::string columnNames[]; 46 | 47 | private: 48 | const size_t dbColumns; 49 | bool isInEntry; 50 | bool hasEntry; 51 | std::ostringstream* streams; 52 | 53 | }; 54 | 55 | 56 | 57 | #endif //MMSEQS_UNIPROTKB_H 58 | -------------------------------------------------------------------------------- /src/linclust/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(linclust_source_files 2 | linclust/kmermatcher.cpp 3 | linclust/kmerindexdb.cpp 4 | linclust/kmersearch.cpp 5 | linclust/LinsearchIndexReader.cpp 6 | PARENT_SCOPE 7 | ) 8 | -------------------------------------------------------------------------------- /src/linclust/kmersearch.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Martin Steinegger on 2019-01-04. 3 | // 4 | 5 | #ifndef MMSEQS_KMERSEARCH_H 6 | #define MMSEQS_KMERSEARCH_H 7 | 8 | #include "kmermatcher.h" 9 | #include "KmerIndex.h" 10 | 11 | class KmerSearch{ 12 | 13 | public: 14 | template 15 | static std::pair *,size_t > searchInIndex( KmerPosition *kmers, size_t kmersSize, KmerIndex &kmerIndex, int resultDirection); 16 | 17 | template 18 | static void writeResult(DBWriter & dbw, KmerPosition *kmers, size_t kmerCount); 19 | 20 | 21 | struct ExtractKmerAndSortResult{ 22 | ExtractKmerAndSortResult(size_t kmerCount, KmerPosition * kmers, size_t adjustedKmer) 23 | : kmerCount(kmerCount), kmers(kmers), adjustedKmer(adjustedKmer) {} 24 | size_t kmerCount; 25 | KmerPosition * kmers; 26 | size_t adjustedKmer; 27 | }; 28 | static ExtractKmerAndSortResult extractKmerAndSort(size_t splitKmerCount, size_t split, size_t splits, 29 | DBReader &seqDbr, Parameters &par, BaseMatrix *subMat); 30 | }; 31 | 32 | 33 | #endif //MMSEQS_KMERSEARCH_H 34 | -------------------------------------------------------------------------------- /src/mmseqs.cpp: -------------------------------------------------------------------------------- 1 | #include "Command.h" 2 | #include "DownloadDatabase.h" 3 | #include "Prefiltering.h" 4 | #include "Parameters.h" 5 | 6 | const char* binary_name = "mmseqs"; 7 | const char* tool_name = "MMseqs2"; 8 | const char* tool_introduction = "MMseqs2 (Many against Many sequence searching) is an open-source software suite for very fast, \nparallelized protein sequence searches and clustering of huge protein sequence data sets.\n\nPlease cite: M. Steinegger and J. Soding. MMseqs2 enables sensitive protein sequence searching for the analysis of massive data sets. Nature Biotechnology, doi:10.1038/nbt.3988 (2017)."; 9 | const char* main_author = "Martin Steinegger (martin.steinegger@snu.ac.kr)"; 10 | const char* show_extended_help = "1"; 11 | const char* show_bash_info = "1"; 12 | extern const char* MMSEQS_CURRENT_INDEX_VERSION; 13 | const char* index_version_compatible = MMSEQS_CURRENT_INDEX_VERSION; 14 | bool hide_base_commands = false; 15 | void (*validatorUpdate)(void) = 0; 16 | 17 | extern std::vector baseCommands; 18 | void init() { 19 | registerCommands(&baseCommands); 20 | } 21 | void (*initCommands)(void) = init; 22 | 23 | DEFAULT_PARAMETER_SINGLETON_INIT 24 | 25 | std::vector externalDownloads = {}; 26 | std::vector externalThreshold = {}; 27 | 28 | bool hide_base_downloads = false; 29 | -------------------------------------------------------------------------------- /src/multihit/Aggregation.h: -------------------------------------------------------------------------------- 1 | #ifndef MMSEQS_AGGREGATION_H 2 | #define MMSEQS_AGGREGATION_H 3 | 4 | #include "DBReader.h" 5 | #include "DBWriter.h" 6 | 7 | #include 8 | #include 9 | 10 | class Aggregation { 11 | public: 12 | Aggregation(const std::string &targetDbName, const std::string &resultDbName, const std::string &outputDbName, 13 | unsigned int threads, unsigned int compressed); 14 | 15 | virtual ~Aggregation(); 16 | 17 | int run(); 18 | virtual void prepareInput(unsigned int querySetKey, unsigned int thread_idx) = 0; 19 | virtual std::string aggregateEntry(std::vector> &dataToAggregate, unsigned int querySetKey, unsigned int targetSetKey, unsigned int thread_idx) = 0; 20 | 21 | protected: 22 | std::string resultDbName; 23 | std::string outputDbName; 24 | DBReader *targetSetReader; 25 | unsigned int threads; 26 | unsigned int compressed; 27 | 28 | void buildMap(char *data, int thread_idx, std::map>> &dataToAggregate); 29 | }; 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /src/multihit/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(multihit_header_files 2 | multihit/Aggregation.h 3 | PARENT_SCOPE 4 | ) 5 | 6 | 7 | set(multihit_source_files 8 | multihit/besthitperset.cpp 9 | multihit/combinepvalperset.cpp 10 | multihit/Aggregation.h 11 | multihit/Aggregation.cpp 12 | multihit/MultiHitDb.cpp 13 | multihit/MultiHitSearch.cpp 14 | PARENT_SCOPE 15 | ) 16 | -------------------------------------------------------------------------------- /src/prefiltering/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(prefiltering_header_files 2 | prefiltering/CacheFriendlyOperations.h 3 | prefiltering/ExtendedSubstitutionMatrix.h 4 | prefiltering/Indexer.h 5 | prefiltering/IndexBuilder.h 6 | prefiltering/IndexTable.h 7 | prefiltering/KmerGenerator.h 8 | prefiltering/Prefiltering.h 9 | prefiltering/PrefilteringIndexReader.h 10 | prefiltering/QueryMatcher.h 11 | prefiltering/QueryMatcherTaxonomyHook.h 12 | prefiltering/ReducedMatrix.h 13 | prefiltering/SequenceLookup.h 14 | prefiltering/UngappedAlignment.h 15 | PARENT_SCOPE 16 | ) 17 | 18 | set(prefiltering_source_files 19 | prefiltering/CacheFriendlyOperations.cpp 20 | prefiltering/ExtendedSubstitutionMatrix.cpp 21 | prefiltering/Indexer.cpp 22 | prefiltering/IndexBuilder.cpp 23 | prefiltering/KmerGenerator.cpp 24 | prefiltering/Main.cpp 25 | prefiltering/Prefiltering.cpp 26 | prefiltering/PrefilteringIndexReader.cpp 27 | prefiltering/QueryMatcher.cpp 28 | prefiltering/ReducedMatrix.cpp 29 | prefiltering/SequenceLookup.cpp 30 | prefiltering/UngappedAlignment.cpp 31 | prefiltering/ungappedprefilter.cpp 32 | PARENT_SCOPE 33 | ) 34 | -------------------------------------------------------------------------------- /src/prefiltering/ExtendedSubstitutionMatrix.h: -------------------------------------------------------------------------------- 1 | #ifndef EXTENDEDSUBSTITUIONMATRIXH 2 | #define EXTENDEDSUBSTITUIONMATRIXH 3 | #include 4 | #include 5 | #include 6 | 7 | #include "ScoreMatrix.h" // ScoreMatrix 8 | #include "BaseMatrix.h" 9 | 10 | class ExtendedSubstitutionMatrix 11 | { 12 | public: 13 | static ScoreMatrix calcScoreMatrix(const BaseMatrix& matrix, const size_t kmerSize); 14 | static void freeScoreMatrix(ScoreMatrix& matrix); 15 | 16 | static short calcScore(unsigned char * i_seq, unsigned char * j_seq,size_t seq_size,short **subMatrix); 17 | 18 | private: 19 | static std::vector > buildInput(size_t dimension,size_t range); 20 | 21 | static void createCartesianProduct( 22 | std::vector > & output, // final result 23 | std::vector& current_result, // current result 24 | std::vector >::const_iterator current_input, // current input 25 | std::vector >::const_iterator end); // final input 26 | 27 | }; 28 | #endif 29 | -------------------------------------------------------------------------------- /src/prefiltering/IndexBuilder.h: -------------------------------------------------------------------------------- 1 | #ifndef MMSEQS_INDEXBUILDER_H 2 | #define MMSEQS_INDEXBUILDER_H 3 | 4 | #include "IndexTable.h" 5 | #include "ExtendedSubstitutionMatrix.h" 6 | 7 | class IndexBuilder { 8 | public: 9 | static void fillDatabase(IndexTable *indexTable, SequenceLookup **externalLookup, BaseMatrix &subMat, 10 | ScoreMatrix & three, ScoreMatrix & two, Sequence *seq, 11 | DBReader *dbr, size_t dbFrom, size_t dbTo, int kmerThr, 12 | bool mask, bool maskLowerCaseMode, float maskProb, int maskNrepeats, int targetSearchMode); 13 | }; 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /src/prefiltering/Indexer.cpp: -------------------------------------------------------------------------------- 1 | #include "Indexer.h" 2 | #include "Debug.h" 3 | 4 | Indexer::Indexer(const size_t alphabetSize, const int maxKmerSize){ 5 | this->maxKmerSize = maxKmerSize; 6 | this->powers = new size_t[maxKmerSize]; 7 | this->alphabetSize = alphabetSize; 8 | size_t pow = 1; 9 | for( int i=0; ipowers[i] = pow; 11 | pow *= alphabetSize; 12 | } 13 | this->maxKmerIndex = 0; 14 | for( int i=0; imaxKmerIndex += alphabetSize*this->powers[i]; 16 | } 17 | 18 | this->lastKmerIndex = this->maxKmerIndex; 19 | 20 | workspace = new size_t[100]; 21 | } 22 | 23 | Indexer::~Indexer(){ 24 | delete[] this->powers; 25 | delete[] workspace; 26 | } 27 | 28 | 29 | 30 | 31 | void Indexer::reset(){ 32 | this->lastKmerIndex = this->maxKmerIndex; 33 | } 34 | 35 | void Indexer::printKmer(size_t kmerIdx, int kmerSize, char* num2aa){ 36 | index2int(workspace, kmerIdx, kmerSize); 37 | for (int j = 0; j < kmerSize; j++) 38 | Debug(Debug::INFO) << num2aa[workspace[j]]; 39 | } 40 | 41 | void Indexer::printKmer(const unsigned char* kmer, int kmerSize, char* num2aa){ 42 | for (int j = 0; j < kmerSize; j++) 43 | Debug(Debug::INFO) << num2aa[kmer[j]]; 44 | } -------------------------------------------------------------------------------- /src/prefiltering/SequenceLookup.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by mad on 12/14/15. 3 | // 4 | 5 | #ifndef MMSEQS_SEQUENCEINDEX_H 6 | #define MMSEQS_SEQUENCEINDEX_H 7 | 8 | 9 | #include 10 | #include "Sequence.h" 11 | 12 | class SequenceLookup { 13 | public: 14 | SequenceLookup(size_t dbSize, size_t entrySize); 15 | SequenceLookup(size_t dbSize); 16 | ~SequenceLookup(); 17 | 18 | // add sequence at offset 19 | void addSequence(unsigned char *seq, int L, size_t index, size_t offset); 20 | 21 | // add sequence to index 22 | void addSequence(Sequence * seq); 23 | 24 | // get sequence data 25 | std::pair getSequence(size_t id); 26 | 27 | const char *getData(); 28 | 29 | int64_t getDataSize(); 30 | 31 | size_t getSequenceCount(); 32 | 33 | size_t *getOffsets(); 34 | 35 | void initLookupByExternalData(char *seqData, size_t dataSize, size_t *seqOffsets); 36 | void initLookupByExternalDataCopy(char *seqData, size_t *seqOffsets); 37 | 38 | private: 39 | size_t sequenceCount; 40 | 41 | // data contains sequence data 42 | char *data; 43 | size_t dataSize; 44 | 45 | size_t *offsets; 46 | 47 | // write position 48 | size_t currentIndex; 49 | size_t currentOffset; 50 | 51 | // if data are read from mmap 52 | bool externalData; 53 | }; 54 | 55 | 56 | #endif //MMSEQS_SEQUENCEINDEX_H 57 | -------------------------------------------------------------------------------- /src/taxonomy/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(taxonomy_header_files 2 | taxonomy/NcbiTaxonomy.h 3 | PARENT_SCOPE 4 | ) 5 | 6 | 7 | set(taxonomy_source_files 8 | taxonomy/lca.cpp 9 | taxonomy/addtaxonomy.cpp 10 | taxonomy/NcbiTaxonomy.cpp 11 | taxonomy/filtertaxdb.cpp 12 | taxonomy/filtertaxseqdb.cpp 13 | taxonomy/aggregatetax.cpp 14 | taxonomy/createtaxdb.cpp 15 | taxonomy/createbintaxonomy.cpp 16 | taxonomy/createbintaxmapping.cpp 17 | taxonomy/taxonomyreport.cpp 18 | taxonomy/TaxonomyExpression.h 19 | PARENT_SCOPE 20 | ) 21 | -------------------------------------------------------------------------------- /src/taxonomy/createbintaxmapping.cpp: -------------------------------------------------------------------------------- 1 | #include "Debug.h" 2 | #include "Parameters.h" 3 | #include "FileUtil.h" 4 | #include "MappingReader.h" 5 | 6 | int createbintaxmapping(int argc, const char **argv, const Command &command) { 7 | Parameters &par = Parameters::getInstance(); 8 | par.parseParameters(argc, argv, command, true, 0, 0); 9 | MappingReader reader(par.db1, false); 10 | std::pair serialized = MappingReader::serialize(reader); 11 | FILE* handle = fopen(par.db2.c_str(), "w"); 12 | if (handle == NULL) { 13 | Debug(Debug::ERROR) << "Could not open " << par.db2 << " for writing\n"; 14 | return EXIT_FAILURE; 15 | } 16 | size_t written = fwrite(serialized.first, serialized.second * sizeof(char), 1, handle); 17 | free(serialized.first); 18 | if (written != 1) { 19 | Debug(Debug::ERROR) << "Could not write to " << par.db2 << "\n"; 20 | return EXIT_FAILURE; 21 | } 22 | if (fclose(handle) != 0) { 23 | Debug(Debug::ERROR) << "Cannot close " << par.db2 << "\n"; 24 | return EXIT_FAILURE; 25 | } 26 | return EXIT_SUCCESS; 27 | } 28 | -------------------------------------------------------------------------------- /src/taxonomy/createbintaxonomy.cpp: -------------------------------------------------------------------------------- 1 | #include "Debug.h" 2 | #include "Parameters.h" 3 | #include "FileUtil.h" 4 | #include "NcbiTaxonomy.h" 5 | 6 | int createbintaxonomy(int argc, const char **argv, const Command &command) { 7 | Parameters &par = Parameters::getInstance(); 8 | par.parseParameters(argc, argv, command, false, 0, 0); 9 | NcbiTaxonomy taxonomy(par.db1, par.db2, par.db3); 10 | std::pair serialized = NcbiTaxonomy::serialize(taxonomy); 11 | FILE* handle = fopen(par.db4.c_str(), "w"); 12 | if (handle == NULL) { 13 | Debug(Debug::ERROR) << "Could not open " << par.db4 << " for writing\n"; 14 | EXIT(EXIT_FAILURE); 15 | } 16 | fwrite(serialized.first, serialized.second, sizeof(char), handle); 17 | fclose(handle); 18 | free(serialized.first); 19 | EXIT(EXIT_SUCCESS); 20 | } 21 | -------------------------------------------------------------------------------- /src/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include(MMseqsSetupTest) 2 | 3 | set(TESTS 4 | #TestAdjustedKmerIterator.cpp 5 | TestAlignment.cpp 6 | TestAlignmentPerformance.cpp 7 | TestAlignmentTraceback.cpp 8 | TestAlp.cpp 9 | TestBacktraceTranslator.cpp 10 | TestCompositionBias.cpp 11 | TestCounting.cpp 12 | TestDBReader.cpp 13 | TestDBReaderIndexSerialization.cpp 14 | TestDiagonalScoring.cpp 15 | TestDiagonalScoringPerformance.cpp 16 | TestKmerGenerator.cpp 17 | TestKmerNucl.cpp 18 | TestKmerScore.cpp 19 | TestKwayMerge.cpp 20 | TestMultipleAlignment.cpp 21 | TestProfileAlignment.cpp 22 | TestPSSM.cpp 23 | TestPSSMPrune.cpp 24 | TestDBReaderZstd.cpp 25 | TestReduceMatrix.cpp 26 | TestScoreMatrixSerialization.cpp 27 | TestSequenceIndex.cpp 28 | TestTanTan.cpp 29 | TestTaxonomy.cpp 30 | TestTranslate.cpp 31 | TestTinyExpr.cpp 32 | TestTaxExpr.cpp 33 | TestUtil.cpp 34 | TestKsw2.cpp 35 | TestBestAlphabet.cpp 36 | TestUngappedCpuPerf.cpp 37 | ) 38 | 39 | 40 | FOREACH (TEST ${TESTS}) 41 | mmseqs_setup_test(${TEST}) 42 | ENDFOREACH () 43 | -------------------------------------------------------------------------------- /src/test/TestCounting.cpp: -------------------------------------------------------------------------------- 1 | #include "Util.h" 2 | 3 | #include 4 | #include 5 | 6 | const char* binary_name = "test_counting"; 7 | 8 | //u_int64_t revcomp64_v2 (const u_int64_t& x, size_t sizeKmer) 9 | //{ 10 | // u_int64_t res = x; 11 | // 12 | // res = ((res>> 2 & 0x3333333333333333) | (res & 0x3333333333333333) << 2); 13 | // res = ((res>> 4 & 0x0F0F0F0F0F0F0F0F) | (res & 0x0F0F0F0F0F0F0F0F) << 4); 14 | // res = ((res>> 8 & 0x00FF00FF00FF00FF) | (res & 0x00FF00FF00FF00FF) << 8); 15 | // res = ((res>>16 & 0x0000FFFF0000FFFF) | (res & 0x0000FFFF0000FFFF) << 16); 16 | // res = ((res>>32 & 0x00000000FFFFFFFF) | (res & 0x00000000FFFFFFFF) << 32); 17 | // res = res ^ 0xAAAAAAAAAAAAAAAA; 18 | // 19 | // return (res >> (2*( 32 - sizeKmer))) ; 20 | //} 21 | 22 | int main(int, char **){ 23 | auto start = std::chrono::system_clock::now(); 24 | size_t revComp = 0; 25 | for(u_int64_t i = 0; i < 10001010101011; i++){ 26 | revComp += Util::revComplement(i, 21); 27 | } 28 | auto end = std::chrono::system_clock::now(); 29 | auto elapsed = end - start; 30 | std::cout << revComp << "\t" << elapsed.count() << '\n'; 31 | return 0; 32 | } 33 | -------------------------------------------------------------------------------- /src/test/TestDBReaderIndexSerialization.cpp: -------------------------------------------------------------------------------- 1 | #include "Debug.h" 2 | #include "DBReader.h" 3 | 4 | const char* binary_name = "test_dbreaderindexserialization"; 5 | 6 | int main (int, const char**) { 7 | DBReader reader("", "/Users/mirdita/tmp/db.index", 1, DBReader::USE_INDEX); 8 | reader.open(DBReader::NOSORT); 9 | 10 | Debug(Debug::INFO) << reader.getSize() << " " << reader.getAminoAcidDBSize() << "\n"; 11 | Debug(Debug::INFO) << reader.getIndex()[0].id << " " << reader.getIndex()[0].offset << " " << reader.getIndex()[0].length << "\n"; 12 | 13 | char* data = DBReader::serialize(reader); 14 | DBReader* newdbr = DBReader::unserialize(data, 1); 15 | newdbr->open(DBReader::NOSORT); 16 | 17 | Debug(Debug::INFO) << newdbr->getSize() << " " << newdbr->getAminoAcidDBSize() << "\n"; 18 | Debug(Debug::INFO) << newdbr->getIndex()[0].id << " " << newdbr->getIndex()[0].offset << " " << newdbr->getIndex()[0].length << "\n"; 19 | free(data); 20 | 21 | newdbr->close(); 22 | delete newdbr; 23 | reader.close(); 24 | return EXIT_SUCCESS; 25 | } 26 | -------------------------------------------------------------------------------- /src/test/TestKmerScore.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by mad on 10/26/15. 3 | // 4 | #include 5 | 6 | #include "SubstitutionMatrix.h" 7 | #include "Sequence.h" 8 | #include "Parameters.h" 9 | 10 | const char* binary_name = "test_kmerscore"; 11 | DEFAULT_PARAMETER_SINGLETON_INIT 12 | 13 | int main (int, const char**) { 14 | const size_t kmer_size = 6; 15 | 16 | Parameters& par = Parameters::getInstance(); 17 | par.initMatrices(); 18 | SubstitutionMatrix subMat(par.scoringMatrixFile.values.aminoacid().c_str(), 8.0, 0); 19 | std::cout << "Substitution matrix:\n"; 20 | SubstitutionMatrix::print(subMat.subMatrix, subMat.num2aa, subMat.alphabetSize); 21 | 22 | const char *ref = "GKILII"; 23 | Sequence refSeq(1000, 0, &subMat, kmer_size, false, true); 24 | refSeq.mapSequence(0, 0, ref, strlen(ref)); 25 | 26 | const char *similar = "GKVLYL"; 27 | Sequence similarSeq(1000, 0, &subMat, kmer_size, false, true); 28 | similarSeq.mapSequence(0, 1, similar, strlen(similar)); 29 | 30 | 31 | short score = 0; 32 | for(size_t i = 0; i < kmer_size; i++){ 33 | score += subMat.subMatrix[refSeq.numSequence[i]][similarSeq.numSequence[i]]; 34 | } 35 | std::cout << score << std::endl; 36 | 37 | return 0; 38 | } 39 | -------------------------------------------------------------------------------- /src/test/TestMerge.cpp: -------------------------------------------------------------------------------- 1 | #include "Concat.h" 2 | #include 3 | 4 | const char* binary_name = "test_merge"; 5 | 6 | int main(int argc, char **argv) { 7 | std::vector files; 8 | for(size_t i = 1; i <= argc; i++){ 9 | files.emplace_back(fopen(argv[i], "rb")); 10 | } 11 | 12 | FILE* outfile = fopen("/tmp/test_out","wb"); 13 | Concat::concatFiles(files, outfile); 14 | fclose(outfile); 15 | 16 | for(size_t i = 0; i < files.size(); i++){ 17 | fclose(files[i]); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/test/TestScoreMatrixSerialization.cpp: -------------------------------------------------------------------------------- 1 | #include "ExtendedSubstitutionMatrix.h" 2 | #include "SubstitutionMatrix.h" 3 | #include "ScoreMatrix.h" 4 | #include "Parameters.h" 5 | #include "Debug.h" 6 | 7 | const char* binary_name = "test_scorematrixserialization"; 8 | DEFAULT_PARAMETER_SINGLETON_INIT 9 | 10 | int main (int, const char**) { 11 | Parameters& par = Parameters::getInstance(); 12 | par.initMatrices(); 13 | SubstitutionMatrix subMat(par.scoringMatrixFile.values.aminoacid().c_str(), 8.0, 0); 14 | ScoreMatrix extMattwo = ExtendedSubstitutionMatrix::calcScoreMatrix(subMat, 2); 15 | 16 | Debug(Debug::INFO) << extMattwo.elementSize << " " << extMattwo.rowSize << " " 17 | << extMattwo.score[0] << " " << extMattwo.index[0] << "\n"; 18 | 19 | char* serialized = ScoreMatrix::serialize(extMattwo); 20 | ExtendedSubstitutionMatrix::freeScoreMatrix(extMattwo); 21 | 22 | ScoreMatrix unserialized = ScoreMatrix::unserialize(serialized, subMat.alphabetSize, 2); 23 | Debug(Debug::INFO) << unserialized.elementSize << " " << unserialized.rowSize << " " 24 | << unserialized.score[0] << " " << unserialized.index[0] << "\n"; 25 | free(serialized); 26 | 27 | return EXIT_SUCCESS; 28 | } 29 | -------------------------------------------------------------------------------- /src/test/TestTaxonomy.cpp: -------------------------------------------------------------------------------- 1 | #include "NcbiTaxonomy.h" 2 | #include "Debug.h" 3 | 4 | const char* binary_name = "test_taxonomy"; 5 | 6 | int main (int, const char**) { 7 | NcbiTaxonomy t("/Users/mirdita/tmp/taxonomy/names.dmp", 8 | "/Users/mirdita/tmp/taxonomy/nodes.dmp", 9 | "/Users/mirdita/tmp/taxonomy/merged.dmp"); 10 | //"/Users/mirdita/tmp/taxonomy/delnodes.dmp"); 11 | std::vector taxa; 12 | taxa.push_back(9); 13 | taxa.push_back(7); 14 | TaxonNode const * node = t.LCA(taxa); 15 | Debug(Debug::INFO) << t.getString(node->nameIdx) << "\n"; 16 | } 17 | -------------------------------------------------------------------------------- /src/test/TestTinyExpr.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "ExpressionParser.h" 5 | 6 | const char* binary_name = "test_tinyexpr"; 7 | 8 | int main (int, const char**) { 9 | ExpressionParser expression("sqrt($11^2+$2^2)"); 10 | if (expression.isOk()) { 11 | std::vector indices = expression.findBindableIndices(); 12 | assert(indices[0] == 11); 13 | assert(indices[1] == 2); 14 | expression.bind(11, 3); 15 | expression.bind(2, 4); 16 | double result = expression.evaluate(); 17 | std::cout << result << std::endl; 18 | assert(result == 5); 19 | } else { 20 | std::cerr << "Failed to parse expression" << std::endl; 21 | assert(false); 22 | } 23 | } 24 | 25 | -------------------------------------------------------------------------------- /src/test/TestTranslate.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "TranslateNucl.h" 5 | 6 | const char* binary_name = "test_translate"; 7 | 8 | int main (int, const char**) { 9 | TranslateNucl * translateNucl = new TranslateNucl(TranslateNucl::CANONICAL); 10 | // translateNucl->initConversionTable(); 11 | std::string nuclStr = "ATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACC"; 12 | int length = nuclStr.size(); 13 | char* aa = new char[length/3 + 1]; 14 | translateNucl->translate(aa, (char*)nuclStr.c_str(), length); 15 | aa[length/3] = '\n'; 16 | std::cout << aa << std::endl; 17 | delete translateNucl; 18 | return 0; 19 | } 20 | 21 | -------------------------------------------------------------------------------- /src/test/TestUtil.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "Debug.h" 6 | #include "Util.h" 7 | 8 | const char* binary_name = "test_util"; 9 | 10 | int main (int, const char**) { 11 | assert(SSTR((unsigned int)1) == "1"); 12 | assert(SSTR((int)1) == "1"); 13 | assert(SSTR((unsigned long long int)1) == "1"); 14 | assert(SSTR((long long int)1) == "1"); 15 | assert(SSTR((unsigned int)1) == "1"); 16 | assert(SSTR((unsigned int)1) != "2"); 17 | assert(SSTR('c') == "c"); 18 | assert(SSTR(0.00314) == "3.140E-03"); 19 | assert(SSTR((double)0.00314) == "3.140E-03"); 20 | assert(SSTR("TEST") == "TEST"); 21 | 22 | // unsigned int sizes[5] = {1882, 150, 630, 929, 167}; 23 | 24 | // for (size_t i = 0; i < 5; ++i) { 25 | // size_t start = 0; 26 | // size_t end = i; 27 | // 28 | // Util::decomposeDomainByAminoAcid(3758, sizes, 5, i, 5, &start, &end); 29 | // std::cout << start << " " << end << std::endl; 30 | // } 31 | 32 | for (size_t i = 0; i < 5; ++i) { 33 | size_t start = 0; 34 | size_t end = i; 35 | 36 | Util::decomposeDomain(5, i, 5, &start, &end); 37 | std::cout << start << " " << end << std::endl; 38 | } 39 | } 40 | 41 | -------------------------------------------------------------------------------- /src/test/dataGap: -------------------------------------------------------------------------------- 1 | 111 111 111 2 | 12 12 12 3 | 333 4 | 444 5 | 222 6 | 666 7 | 111 8 | -------------------------------------------------------------------------------- /src/test/dataGap.index: -------------------------------------------------------------------------------- 1 | 1 43 5 2 | 111 0 13 3 | 12 13 10 4 | 2 33 5 5 | 3 23 5 6 | 4 28 5 7 | 6 38 5 8 | -------------------------------------------------------------------------------- /src/test/dataLinear: -------------------------------------------------------------------------------- 1 | 111 2 | 222 3 | 333 4 | 444 5 | 555 6 | 666 7 | -------------------------------------------------------------------------------- /src/test/dataLinear.index: -------------------------------------------------------------------------------- 1 | 1 0 5 2 | 2 5 5 3 | 3 10 5 4 | 4 15 5 5 | 5 20 5 6 | 6 25 5 7 | -------------------------------------------------------------------------------- /src/util/dbtype.cpp: -------------------------------------------------------------------------------- 1 | #include "Debug.h" 2 | #include "DBReader.h" 3 | #include "Parameters.h" 4 | #include "Util.h" 5 | #include "FileUtil.h" 6 | 7 | int dbtype(int argc, const char **argv, const Command &command) { 8 | Parameters &par = Parameters::getInstance(); 9 | par.parseParameters(argc, argv, command, false, 0, 0); 10 | Debug(Debug::INFO) << Parameters::getDbTypeName(FileUtil::parseDbType(par.db1.c_str())); 11 | EXIT(EXIT_SUCCESS); 12 | } 13 | -------------------------------------------------------------------------------- /src/util/diskspaceavail.cpp: -------------------------------------------------------------------------------- 1 | #include "Command.h" 2 | #include "Debug.h" 3 | #include "Util.h" 4 | #include "FileUtil.h" 5 | #include "Parameters.h" 6 | 7 | int diskspaceavail(int, const char**, const Command&) { 8 | Parameters &par = Parameters::getInstance(); 9 | size_t diskLimit = FileUtil::getFreeSpace(FileUtil::dirName(par.db1).c_str()); 10 | Debug(Debug::INFO) << diskLimit << "\n"; // in bytes 11 | EXIT(EXIT_SUCCESS); 12 | } 13 | -------------------------------------------------------------------------------- /src/util/result2stats.h: -------------------------------------------------------------------------------- 1 | #ifndef RESULT2PROFILE_H 2 | #define RESULT2PROFILE_H 3 | 4 | #include "Parameters.h" 5 | #include "DBReader.h" 6 | #include "DBWriter.h" 7 | 8 | #include 9 | 10 | class StatsComputer { 11 | public: 12 | StatsComputer(const Parameters &par); 13 | ~StatsComputer(); 14 | 15 | int run(); 16 | private: 17 | int stat; 18 | 19 | std::string queryDb; 20 | std::string queryDbIndex; 21 | 22 | std::string targetDb; 23 | std::string targetDbIndex; 24 | 25 | const bool tsvOut; 26 | 27 | DBReader *resultReader; 28 | DBWriter *statWriter; 29 | 30 | int threads; 31 | 32 | template 33 | struct PerSequence { 34 | typedef T(*type)(const char *); 35 | }; 36 | 37 | template 38 | int sequenceWise(typename PerSequence::type call, bool onlyResultDb = false); 39 | 40 | int countNumberOfLines(); 41 | int meanValue(); 42 | int minValue(); 43 | int maxValue(); 44 | int sumValue(); 45 | }; 46 | 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /src/util/setextendeddbtype.cpp: -------------------------------------------------------------------------------- 1 | #include "Parameters.h" 2 | #include "Util.h" 3 | #include "Debug.h" 4 | #include "DBReader.h" 5 | #include "DBWriter.h" 6 | #include "HeaderSummarizer.h" 7 | 8 | #ifdef OPENMP 9 | #include 10 | #endif 11 | 12 | int setextendeddbtype(int argc, const char **argv, const Command& command) { 13 | Parameters& par = Parameters::getInstance(); 14 | par.parseParameters(argc, argv, command, true, 0, 0); 15 | int dbtype = FileUtil::parseDbType(par.db1.c_str()); 16 | // check if dbtype uses isCompressed flag 17 | bool isCompressed = (dbtype & (1 << 31)); 18 | dbtype = DBReader::setExtendedDbtype(dbtype, par.extendedDbtype); 19 | DBWriter::writeDbtypeFile(par.db1.c_str(), dbtype, isCompressed); 20 | return EXIT_SUCCESS; 21 | } 22 | -------------------------------------------------------------------------------- /src/util/touchdb.cpp: -------------------------------------------------------------------------------- 1 | #include "Parameters.h" 2 | #include "Util.h" 3 | #include "PrefilteringIndexReader.h" 4 | #include "MemoryMapped.h" 5 | 6 | int touchdb(int argc, const char **argv, const Command& command) { 7 | Parameters& par = Parameters::getInstance(); 8 | par.parseParameters(argc, argv, command, true, 0, 0); 9 | 10 | std::string db = par.db1; 11 | 12 | std::string indexDB = PrefilteringIndexReader::searchForIndex(db); 13 | if (indexDB.empty() == false) { 14 | db = indexDB; 15 | } 16 | 17 | MemoryMapped map(db, MemoryMapped::WholeFile, MemoryMapped::CacheHint::SequentialScan); 18 | Util::touchMemory(reinterpret_cast(map.getData()), map.mappedSize()); 19 | 20 | return EXIT_SUCCESS; 21 | } 22 | -------------------------------------------------------------------------------- /src/util/tsv2exprofiledb.cpp: -------------------------------------------------------------------------------- 1 | #include "Parameters.h" 2 | #include "FileUtil.h" 3 | #include "CommandCaller.h" 4 | #include "Debug.h" 5 | 6 | #include 7 | 8 | #include "tsv2exprofiledb.sh.h" 9 | 10 | void setTsv2ExProfileDbDefaults(Parameters *p) { 11 | p->compressed = true; 12 | } 13 | 14 | int tsv2exprofiledb(int argc, const char **argv, const Command &command) { 15 | Parameters &par = Parameters::getInstance(); 16 | setTsv2ExProfileDbDefaults(&par); 17 | par.parseParameters(argc, argv, command, true, 0, 0); 18 | 19 | std::string program = par.db2 + ".sh"; 20 | FileUtil::writeFile(program, tsv2exprofiledb_sh, tsv2exprofiledb_sh_len); 21 | 22 | if (par.gpu) { 23 | Debug(Debug::INFO) << "Disabling compression for GPU-databases\n"; 24 | par.compressed = false; 25 | } 26 | 27 | CommandCaller cmd; 28 | cmd.addVariable("COMPRESSED", par.compressed ? "TRUE" : NULL); 29 | cmd.addVariable("GPU", par.gpu ? "TRUE" : NULL); 30 | cmd.addVariable("THREADS", par.createParameterString(par.onlythreads).c_str()); 31 | cmd.addVariable("VERBOSITY", par.createParameterString(par.onlyverbosity).c_str()); 32 | cmd.execProgram(FileUtil::getRealPathFromSymLink(program).c_str(), par.filenames); 33 | 34 | // Should never get here 35 | assert(false); 36 | return EXIT_FAILURE; 37 | } 38 | -------------------------------------------------------------------------------- /src/util/versionstring.cpp: -------------------------------------------------------------------------------- 1 | #include "Command.h" 2 | #include "Debug.h" 3 | #include "Util.h" 4 | 5 | extern const char* version; 6 | 7 | int versionstring(int, const char**, const Command&) { 8 | Debug(Debug::INFO) << version << "\n"; 9 | EXIT(EXIT_SUCCESS); 10 | } 11 | -------------------------------------------------------------------------------- /src/version/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(version Version.cpp) 2 | set_target_properties(version PROPERTIES COMPILE_FLAGS "${MMSEQS_CXX_FLAGS}" LINK_FLAGS "${MMSEQS_CXX_FLAGS}") 3 | 4 | if (VERSION_OVERRIDE) 5 | target_compile_definitions(version PRIVATE -DGIT_SHA1=${VERSION_OVERRIDE}) 6 | else() 7 | include(GetGitRevisionDescription) 8 | get_git_head_revision(GIT_REFSPEC GIT_SHA1) 9 | target_compile_definitions(version PRIVATE -DGIT_SHA1=${GIT_SHA1}) 10 | endif() 11 | 12 | if (${HAVE_MPI}) 13 | target_compile_definitions(version PRIVATE -DHAVE_MPI=${HAVE_MPI}) 14 | endif() 15 | -------------------------------------------------------------------------------- /src/version/Version.cpp: -------------------------------------------------------------------------------- 1 | #ifdef HAVE_MPI 2 | #define VERSION_MPI_SUFFIX "-MPI" 3 | #else 4 | #define VERSION_MPI_SUFFIX "" 5 | #endif 6 | 7 | #ifdef GIT_SHA1 8 | #define str2(s) #s 9 | #define str(s) str2(s) 10 | const char *version = str(GIT_SHA1) VERSION_MPI_SUFFIX; 11 | #undef str 12 | #undef str2 13 | #else 14 | const char *version = "UNKNOWN" VERSION_MPI_SUFFIX; 15 | #endif 16 | 17 | #undef VERSION_MPI_SUFFIX 18 | -------------------------------------------------------------------------------- /src/workflow/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(workflow_source_files 2 | workflow/Cluster.cpp 3 | workflow/ClusterUpdate.cpp 4 | workflow/Databases.cpp 5 | workflow/Linclust.cpp 6 | workflow/EasySearch.cpp 7 | workflow/EasyRbh.cpp 8 | workflow/EasyCluster.cpp 9 | workflow/EasyLinclust.cpp 10 | workflow/Enrich.cpp 11 | workflow/Linsearch.cpp 12 | workflow/Map.cpp 13 | workflow/Rbh.cpp 14 | workflow/Search.cpp 15 | workflow/Taxonomy.cpp 16 | workflow/PickConsensusRep.cpp 17 | workflow/EasyTaxonomy.cpp 18 | workflow/CreateIndex.cpp 19 | PARENT_SCOPE 20 | ) 21 | -------------------------------------------------------------------------------- /util/.gitattributes: -------------------------------------------------------------------------------- 1 | mmseqs_wrapper.bat text eol=crlf 2 | -------------------------------------------------------------------------------- /util/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | install(PROGRAMS 2 | bash-completion.sh 3 | DESTINATION util 4 | ) 5 | -------------------------------------------------------------------------------- /util/bash-completion.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | _mmseqs() { 3 | local cur 4 | COMPREPLY=() 5 | cur="${COMP_WORDS[COMP_CWORD]}" 6 | 7 | if [[ ${COMP_CWORD} -eq 1 ]] ; then 8 | COMPREPLY=( $(LC_COLLATE=C compgen -W "$(mmseqs shellcompletion 2> /dev/null)" -- "${cur}") ) 9 | return 0 10 | fi 11 | 12 | if [[ ${COMP_CWORD} -gt 1 ]] ; then 13 | COMPREPLY=( $(LC_COLLATE=C compgen -f -W "$(mmseqs shellcompletion "${COMP_WORDS[1]}" 2> /dev/null)" -- "${cur}") ) 14 | return 0 15 | fi 16 | 17 | } 18 | complete -o plusdirs -F _mmseqs mmseqs 19 | -------------------------------------------------------------------------------- /util/format_substitution_matrix.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | input <- file('stdin', 'r') 4 | lines <- readLines(input) 5 | 6 | mat <- as.matrix(read.table(textConnection(lines) , comment.char = "#", header = T)) 7 | alphabet <- rownames(mat) 8 | alphabet <- sort(alphabet) 9 | if ("X" %in% alphabet) { 10 | alphabet <- c(alphabet[!(alphabet %in% c("B", "Z", "X"))], "X") 11 | mat <- mat[alphabet, alphabet] 12 | R <- 2**(0.5*mat) 13 | A <- nrow(R) 14 | p <- solve(R[-A,-A]) %*% rep(1, A-1) 15 | avgSeqId <- sum(diag(R[-A,-A])*p*p) 16 | } else { 17 | alphabet <- alphabet[!(alphabet %in% c("B", "Z"))] 18 | mat <- mat[alphabet, alphabet] 19 | R <- 2**(0.5*mat) 20 | A <- nrow(R) 21 | p <- solve(R) %*% rep(1, A) 22 | avgSeqId <- sum(diag(R)*p*p) 23 | } 24 | 25 | header <- paste0(lines[startsWith(lines, "#")], collapse = "\n") 26 | p0 <- paste0("# Background (precomputed optional): ", paste(unlist(as.list(round(p, 4))), collapse = " "), " 0.00001") 27 | # FIXME: find out how to compute lambda 28 | lambda <- "# Lambda (precomputed optional): 0.34657" 29 | avg <-paste0("# Avg SeqId (precomputed optional): ", round(avgSeqId, 5)) 30 | out <- paste0(capture.output(write.table(mat, quote = F, sep = "\t")), collapse = "\n") 31 | cat(paste0(header, "\n", p0, "\n", lambda, "\n", avg, "\n\t", out, "\n")) 32 | 33 | -------------------------------------------------------------------------------- /util/mmseqs_wrapper.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | if not exist "%~dp0\bin\bash" ( goto installBusyBox ) 3 | goto mmseqs 4 | 5 | :installBusyBox 6 | "%~dp0\bin\busybox.exe" --install -s "%~dp0\bin" > nul 2>&1 7 | if not exist "%~dp0\bin\bash" ( goto installBusyBoxUAC ) 8 | goto mmseqs 9 | 10 | :installBusyBoxUAC 11 | echo MMseqs2 will now request administrator permissions to install helper tools through Busybox in a subdirectory. 12 | echo WScript.Sleep 2000 > "%temp%\~ElevateBusyBox.vbs" 13 | echo set UAC = CreateObject^("Shell.Application"^) >> "%temp%\~ElevateBusyBox.vbs" 14 | echo UAC.ShellExecute "%~dp0\bin\busybox.exe", "--install -s ""%~dp0\bin""", , "runas", 0 >> "%temp%\~ElevateBusyBox.vbs" 15 | echo WScript.Sleep 2000 >> "%temp%\~ElevateBusyBox.vbs" 16 | cscript "%temp%\~ElevateBusyBox.vbs" > nul 2>&1 17 | del /Q /F "%temp%\~ElevateBusyBox.vbs" 18 | if not exist "%~dp0\bin\bash" ( goto noBusyBox ) 19 | goto mmseqs 20 | 21 | :mmseqs 22 | "%~dp0\bin\mmseqs.exe" %* 23 | exit /b 0 24 | 25 | :noBusyBox 26 | echo Error: Could not install BusyBox helper tools. Please execute the following command manually: >&2 27 | echo "%~dp0\bin\busybox.exe" --install -s "%~dp0\bin" >&2 28 | exit /b 1 29 | -------------------------------------------------------------------------------- /util/mmseqs_wrapper.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | FLAGS="$(grep -m 1 '^flags' /proc/cpuinfo)" 3 | case "${FLAGS}" in 4 | *avx2*) 5 | exec /usr/local/bin/mmseqs_avx2 "$@" 6 | ;; 7 | *sse4_1*) 8 | exec /usr/local/bin/mmseqs_sse41 "$@" 9 | ;; 10 | *) 11 | exec /usr/local/bin/mmseqs_sse2 "$@" 12 | ;; 13 | esac 14 | -------------------------------------------------------------------------------- /util/update_libmarv.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh -e 2 | git subtree pull --prefix lib/libmarv git@github.com:steineggerlab/libmarv.git main --squash 3 | -------------------------------------------------------------------------------- /util/update_simde.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh -e 2 | git subtree pull --prefix lib/simde/simde https://github.com/simd-everywhere/simde-no-tests master --squash 3 | -------------------------------------------------------------------------------- /util/update_zstd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh -e 2 | git subtree pull --prefix lib/zstd https://github.com/facebook/zstd.git dev --squash 3 | --------------------------------------------------------------------------------