├── COPYRIGHT ├── LICENSE ├── Makefile ├── README ├── UPDATES ├── base ├── config.h ├── const.h ├── data_process.c ├── data_process.h ├── data_struct.h ├── data_struct_heap.c ├── data_struct_heap.h ├── data_struct_ll.c ├── data_struct_ll.h ├── general_alg.h ├── gsl │ ├── blas.h │ ├── cdf.h │ ├── errno.h │ ├── histogram.h │ ├── math.h │ ├── matrix.h │ ├── permutation.h │ ├── randist.h │ ├── rng.h │ ├── sf.h │ ├── sort.h │ ├── statistics.h │ └── vector.h ├── histogram.c ├── histogram.h ├── lib.c ├── lib.h ├── logger.c ├── logger.h ├── macros.h ├── math.c ├── math.h ├── os.h ├── random.c ├── random.h ├── supernormalize.c ├── supernormalize.h ├── threading.h └── types.h ├── cycle ├── cycle.h ├── vg.c └── vg.h ├── doc.pdf ├── external └── R.c ├── netr ├── one.c └── one.h └── pij ├── cassist ├── cassist.c ├── cassist.h ├── llr.c ├── llr.h ├── llrtopij.c ├── llrtopij.h └── llrtopv.h ├── gassist ├── gassist.c ├── gassist.h ├── llr.c ├── llr.h ├── llrtopij.c ├── llrtopij.h ├── llrtopv.c ├── llrtopv.h ├── nullhist.c └── nullhist.h ├── llrtopij.c ├── llrtopij.h ├── llrtopv.c ├── llrtopv.h ├── nulldist.c ├── nulldist.h ├── nullhist.c ├── nullhist.h ├── rank.c └── rank.h /COPYRIGHT: -------------------------------------------------------------------------------- 1 | Copyright 2016-2018 Lingfei Wang 2 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | FTYPEBITS=32 2 | GTYPEBITS=8 3 | LIB_NAME=findr 4 | LIB_NAMEFULL="Fast Inference of Networks from Directed Regulations" 5 | LIB_FNAME=lib$(LIB_NAME).so 6 | AUTHOR="Lingfei Wang" 7 | AUTHOR_EMAIL="Lingfei.Wang.github@outlook.com" 8 | URL_LIB="https://github.com/lingfeiwang/findr" 9 | URL_BIN="https://github.com/lingfeiwang/findr-bin" 10 | URL_PYTHON="https://github.com/lingfeiwang/findr-python" 11 | URL_R="https://github.com/lingfeiwang/findr-R" 12 | URL_DOC="https://github.com/lingfeiwang/findr/blob/master/doc.pdf" 13 | URL_LIB_REL="$(URL_LIB)/releases" 14 | URL_BIN_REL="$(URL_BIN)/releases" 15 | URL_R_REL="$(URL_R)/releases" 16 | VERSION1=1 17 | VERSION2=0 18 | VERSION3=8 19 | LICENSE=AGPL-3 20 | LICENSE_FULL="GNU Affero General Public License, Version 3" 21 | LICENSE_URL="https://www.gnu.org/licenses/agpl-3.0" 22 | ifdef INCLUDE_MAKEFILE_BEFORE 23 | #Input package info here 24 | include $(INCLUDE_MAKEFILE_BEFORE) 25 | endif 26 | include Makefile.flags 27 | ifndef LIB_FNAME 28 | LIB_FNAME=lib 29 | endif 30 | ifndef PREFIX 31 | PREFIX=/usr/local 32 | endif 33 | ifndef DIR_BUILD 34 | DIR_BUILD=. 35 | endif 36 | ifndef DIR_SRC 37 | DIR_SRC=. 38 | endif 39 | DIR_INSTALL_PREFIX=$(PREFIX) 40 | DIR_INSTALL_LIB=$(DIR_INSTALL_PREFIX)/lib 41 | DIR_INSTALL_INC0=$(DIR_INSTALL_PREFIX)/include 42 | DIR_INSTALL_INC=$(DIR_INSTALL_INC0)/$(LIB_NAME) 43 | 44 | CC=gcc 45 | CFLAGSI=$(addprefix -I ,. $(R_INCLUDE_DIR) $(PREFIX)/include /usr/local/include) 46 | F90C=gfortran 47 | F90FLAGS=-fPIC -fdefault-real-8 -ffixed-form -O3 48 | LD=gcc 49 | #INSTALL=install 50 | OPTFLAGS=-O3 -DNDEBUG=1 -DGSL_RANGE_CHECK_OFF=1 -DHAVE_INLINE=1 51 | 52 | LIB_CONFIG=base/config_auto.h 53 | LIB_C=$(wildcard $(DIR_SRC)/*/*.c) $(wildcard $(DIR_SRC)/*/*/*.c) 54 | LIB_C_B=$(basename $(LIB_C)) 55 | LIB_F90=$(wildcard $(DIR_SRC)/*/*.f90) $(wildcard $(DIR_SRC)/*/*/*.f90) 56 | LIB_F90_B=$(basename $(LIB_F90)) 57 | LIB_H=$(wildcard $(DIR_SRC)/*/*.h) $(wildcard $(DIR_SRC)/*/*/*.h) $(LIB_CONFIG) 58 | LIB_H_B=$(basename $(LIB_H)) 59 | LIB_O_C=$(addsuffix .o,$(LIB_C_B)) 60 | LIB_O_F90=$(addsuffix .o,$(LIB_F90_B)) 61 | LIB_O=$(LIB_O_C) $(LIB_O_F90) 62 | LIB_PRODUCT=$(LIB_O) 63 | LIB_DPRODUCT=$(DIR_BUILD)/$(LIB_FNAME) 64 | INC_DPRODUCT=$(LIB_CONFIG) 65 | INC_INSTALL_FILES=$(LIB_H) 66 | INC_INSTALL_DIRS=$(dir $(LIB_H)) 67 | LIB_UNINSTALL=$(addprefix $(DIR_INSTALL_LIB)/,$(notdir $(LIB_DPRODUCT))) 68 | INC_UNINSTALL=$(DIR_INSTALL_INC) 69 | PKGCONFIG=$(LIB_NAME).pc 70 | PKGCONFIG_UNINSTALL=$(DIR_INSTALL_LIB)/pkgconfig/$(LIB_NAME).pc 71 | 72 | .PHONY: all clean distclean install-lib install-inc install uninstall 73 | 74 | all: $(LIB_DPRODUCT) $(PKGCONFIG) 75 | 76 | $(PKGCONFIG): 77 | @echo "prefix=$(DIR_INSTALL_PREFIX)" > $@ 78 | @echo "exec_prefix=$(DIR_INSTALL_PREFIX)" >> $@ 79 | @echo "libdir=$(DIR_INSTALL_LIB)" >> $@ 80 | @echo "includedir=$(DIR_INSTALL_INC0)" >> $@ 81 | @echo >> $@ 82 | @echo "Name: $(LIB_NAME)" >> $@ 83 | @echo "Description: Fast Inference of Networks from Directed Regulations" >> $@ 84 | @echo "Version: $(VERSION1).$(VERSION2).$(VERSION3)" >> $@ 85 | @echo "Libs: -L$(DIR_INSTALL_LIB) -l$(LIB_NAME) -lgsl" >> $@ 86 | @echo "Cflags: -I$(DIR_INSTALL_INC0)" >> $@ 87 | 88 | $(LIB_CONFIG): 89 | @echo "#ifndef _HEADER_LIB_CONFIG_AUTO_H_" > $@ 90 | @echo "#define _HEADER_LIB_CONFIG_AUTO_H_" >> $@ 91 | @echo "#define FTYPEBITS $(FTYPEBITS)" >> $@ 92 | @echo "#define GTYPEBITS $(GTYPEBITS)" >> $@ 93 | @echo "#define LIB_NAME $(LIB_NAME)" >> $@ 94 | @echo "#define VERSION1 $(VERSION1)" >> $@ 95 | @echo "#define VERSION2 $(VERSION2)" >> $@ 96 | @echo "#define VERSION3 $(VERSION3)" >> $@ 97 | @if [ -n "$(DIR_SRC_GSL)" ]; then \ 98 | echo "#define LIBGSL_LOCAL $(LIBGSL_LOCAL)" >> $@; \ 99 | fi 100 | @echo "#endif" >> $@ 101 | 102 | $(DIR_BUILD): 103 | mkdir -p $@ 104 | 105 | $(LIB_O_C): $(LIB_CONFIG) 106 | 107 | $(LIB_O_F90): 108 | $(F90C) -o $@ -c $(F90FLAGS) $(addsuffix .f90,$(basename $@)) 109 | 110 | $(LIB_DPRODUCT): $(LIB_PRODUCT) $(DIR_BUILD) 111 | $(LD) -o $@ $(LIB_PRODUCT) $(LDFLAGS) 112 | 113 | clean: 114 | $(RM) $(LIB_PRODUCT) 115 | 116 | distclean: clean 117 | $(RM) $(LIB_DPRODUCT) $(PKGCONFIG) $(LIB_CONFIG) Makefile.flags $(TMP_FILE) 118 | 119 | install-lib: SHELL:=/bin/bash 120 | install-lib: all 121 | umask 0022 && mkdir -p $(DIR_INSTALL_LIB) && \ 122 | cp $(LIB_DPRODUCT) $(DIR_INSTALL_LIB)/ && \ 123 | chmod 0755 $(DIR_INSTALL_LIB)/$(notdir $(LIB_DPRODUCT)) && \ 124 | ldconfig $(DIR_INSTALL_LIB) || true 125 | 126 | install-inc: SHELL:=/bin/bash 127 | install-inc: $(LIB_CONFIG) 128 | umask 0022 && mkdir -p $(DIR_INSTALL_INC) && \ 129 | for dname in $(INC_INSTALL_DIRS); do \ 130 | mkdir -p $(DIR_INSTALL_INC)/$$dname || exit 1; \ 131 | done 132 | # Then Files 133 | umask 0022 && for fname in $(INC_INSTALL_FILES); do \ 134 | cp $$fname $(DIR_INSTALL_INC)/$$fname || exit 1; \ 135 | chmod 0644 $(DIR_INSTALL_INC)/$$fname || exit 1; \ 136 | done 137 | 138 | install-pkgconfig: $(PKGCONFIG) 139 | umask 0022 && mkdir -p $(DIR_INSTALL_LIB)/pkgconfig && \ 140 | cp $< $(DIR_INSTALL_LIB)/pkgconfig/ 141 | chmod 0644 $(DIR_INSTALL_LIB)/pkgconfig/$(notdir $<) 142 | 143 | install: install-lib install-inc install-pkgconfig 144 | 145 | uninstall: 146 | $(RM) -R $(LIB_UNINSTALL) $(INC_UNINSTALL) $(PKGCONFIG_UNINSTALL) 147 | 148 | TMP_FILE=.tmp 149 | Makefile.flags: 150 | @echo "Testing gcc" 151 | if ! $(CC) --version > /dev/null 2>&1; then echo "GCC not found. Please download the latest GCC or specify its location in CC variable in Makefile."; exit 1; fi 152 | gver="$$($(CC) --version)"; \ 153 | t1=$$(echo "$$gver" | grep -io gcc); \ 154 | if ! [ -n "$$t1" ]; then echo "Invalid GCC version. Please download the latest GCC."; exit 1; fi 155 | cflags="$(CFLAGS) $(CFLAGS_EXTRA) $(CFLAGSI) -fopenmp -ggdb -fPIC -Wall -Wextra -Wconversion -Wsign-conversion -Wundef -Wendif-labels -std=c99 -pedantic-errors $(OPTFLAGS)"; \ 156 | ldflags="$(LDFLAGS) $(LDFLAGS_EXTRA) -L $(PREFIX)/lib -L /usr/local/lib -L /usr/lib -fopenmp -lm -shared -lc"; \ 157 | echo "Testing test method"; \ 158 | if ! $(LD) $$ldflags -o $(TMP_FILE) > /dev/null 2>&1; then \ 159 | echo "Linking with default flags failed."; exit 1; fi; \ 160 | echo "Testing gfortran"; \ 161 | $(LD) $$ldflags -lgfortran -o $(TMP_FILE) > /dev/null 2>&1 && \ 162 | ldflags="$$ldflags -lgfortran"; \ 163 | echo "Testing local GSL" ; \ 164 | if [ -n "$(DIR_SRC_GSL)" ] ; then \ 165 | echo "Testing -Wl,--whole-archive" ; \ 166 | ldflags2="$(DIR_SRC_GSL)/.libs/libgsl.a $(DIR_SRC_GSL)/cblas/.libs/libgslcblas.a"; \ 167 | $(LD) $$ldflags "-Wl,--whole-archive $$ldflags2 -Wl,--no-whole-archive" --shared -o $(TMP_FILE) > /dev/null 2>&1 && \ 168 | ldflags2="-Wl,--whole-archive $$ldflags2 -Wl,--no-whole-archive"; \ 169 | if ! $(LD) $$ldflags $$ldflags2 --shared -o $(TMP_FILE) > /dev/null 2>&1; then \ 170 | echo "Can't link to embedded GSL with right flag." ; exit 1; fi; \ 171 | cflags="-I $(DIR_SRC_GSL) $$cflags" ; \ 172 | ldflags="$$ldflags $$ldflags2" ; \ 173 | else \ 174 | ldflags="$$ldflags -lgsl -lgslcblas"; \ 175 | if ! $(LD) $$ldflags --shared -o $(TMP_FILE) > /dev/null 2>&1; then \ 176 | echo "Link to installed GSL failed."; exit 1; fi; \ 177 | fi ; \ 178 | echo "Testing -Wl,--no-as-needed" ; \ 179 | $(LD) -Wl,--no-as-needed $$ldflags --shared -o $(TMP_FILE) > /dev/null 2>&1 && \ 180 | ldflags="-Wl,--no-as-needed $$ldflags"; \ 181 | echo "CFLAGS=$$cflags" > $@ && \ 182 | echo "LDFLAGS=$$ldflags" >> $@ && \ 183 | $(RM) $(TMP_FILE) 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | ifdef INCLUDE_MAKEFILE_AFTER 192 | include $(INCLUDE_MAKEFILE_AFTER) 193 | endif 194 | 195 | 196 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | Findr (Fast Inference of Networks from Directed Regulations) is a statistical inference tool for genetics. It predicts the probability of pairwise gene regulation probability based on gene expression level data. When genotype data is available for the best eQTLs, higher prediction accuracy can be achieved. The pairwise regulation probability is then applied for the reconstruction of gene regulation networks. 2 | 3 | Findr obtains much higher accuracy and faster speed than existing programs [1]. This is due to the analytical and implementational advances. Findr's unprecedented speed allows for whole-transcriptome causal network reconstruction, with a tutorial in [2]. Findr library can be downloaded from [3]. 4 | 5 | This package is the C implementation of Findr library. It requires recent builds of GCC, GNU make, and GNU Scientific Library (GSL). Users can use the provided binary and python interfaces, or R package to interact with Findr library to perform calculations, or write one's own program and call Findr. The binary, python, and R entry points can be downloaded from [4], [5], [6] respectively. On Windows, we recommend building and running Findr on "Bash on Windows" [7], rather than building everything natively from scratch. 6 | 7 | A more detailed documentation of Findr can be found as doc.pdf. 8 | 9 | [1] Lingfei Wang and Tom Michoel (2017) Efficient and accurate causal inference with hidden confounders from genome-transcriptome variation data. PLOS Computational Biology 13(8): e1005703. https://doi.org/10.1371/journal.pcbi.1005703 10 | [2] Lingfei Wang and Tom Michoel (2017) Whole-transcriptome causal network inference with genomic and transcriptomic data. bioRxiv 213371. https://doi.org/10.1101/213371 11 | [3] https://github.com/lingfeiwang/findr 12 | [4] https://github.com/lingfeiwang/findr-bin 13 | [5] https://github.com/lingfeiwang/findr-python 14 | [6] https://github.com/lingfeiwang/findr-R 15 | [7] https://msdn.microsoft.com/commandline/wsl/about 16 | -------------------------------------------------------------------------------- /UPDATES: -------------------------------------------------------------------------------- 1 | 1.0.8: 2 | Reversed compatibility warning due to lack of schedule for version 2. 3 | 1.0.7: 4 | Added compatibility warning for interface changes in future versions. 5 | 1.0.6: 6 | Corrected a bug that may produce biased output in pij_gassist, pij_gassist_trad, and pijs_gassist when nodiag is set or when memlimit is small so computation is split into chunks. 7 | Now setting histogram bounds based on the maximum of all LLRs (as opposed to the maximum of the chunk when memlimit is small) in pij_gassist, pij_gassist_trad, and pijs_gassist. This ensures the output is independent of memlimit (related to question from sritchie73@github). 8 | Added sanity checks for agreement between input data and nodiag flag for pij functions excluding _pv (suggested by sritchie73@github). 9 | Lots of internal function renaming and code restructuring. 10 | Removed some unneeded files and functions. 11 | Now support pkg-config setup. 12 | 1.0.5: 13 | Updated Makefiles to account for different make environments (reported by sritchie73@github). 14 | 1.0.1: 15 | Bug correction: 16 | Updated LDFLAGS for R interface (reported by audreyqyfu@github). 17 | 1.0.0: 18 | New functions: 19 | Included P-value computation for 4 tests in pijs_gassist_pv and correlation test in pij_rank_pv. 20 | Included using continuous anchors (instead of discrete genotypes) for causal inference in pij(s)_cassist*. 21 | Revisions: 22 | Minor adjustments on adding fluctuations for supernormalized data when the number of samples is small (<30). 23 | Updated logging. 24 | Minor adjustments in conversion from LLR to local FDR. 25 | Minor bug corrections on defensive programming. 26 | 0.5.0: 27 | New function: reconstruction of directed acyclic graph from prior information of edge significance. For details, see library function netr_one_greedy, doc.pdf, or UPDATES in any interface. 28 | 0.4.1: 29 | Extreme situation behavior: 30 | Added error checking for few samples (<4). 31 | Added special consideration for extremely skewed LLR during local FDR estimation. 32 | Updated copyright notice. 33 | 0.4.0: 34 | Interface changes: 35 | Removed trailing _a's for all interface function names. 36 | Removed _tot functions. (See Obsoletes.) 37 | Binary interface has an extra parameter specifying memory usage limit. 38 | Python interface has an optional parameter specifying memory usage limit. 39 | Improvements: 40 | We have introduced the function pij_gassist_trad for traditional inference test. 41 | We have parallelized the remaining part of major computation and obtained even faster speed. 42 | We have modified functions to perform inplace operations as much as possible to save memory. 43 | We have introduced memory usage limit as a parameter (except R interface). Large datasets will be automatically split before calculation to keep memory usage under limit. 44 | We have included p1 calculation for nodiag=1 case. 45 | Better input validity checks for interfaces. 46 | Bug corrections. 47 | Obsoletes: 48 | In order to reduce effort in improving and maintaining the package, we decided to obsolete several non-essential functions. They include: 49 | pij(s)_*_tot functions: they will be hardly needed as more genes can be measured by advancing technologies. 50 | Windows native support: The latest version of Windows has provided support for Ubuntu Bash. All interfaces should integrate better with Bash on Windows. (https://msdn.microsoft.com/commandline/wsl) 51 | 52 | 0.3.0: 53 | Function pijs_gassist_*: changed function definition to provide latest test statistics. 54 | Function pij_gassist_*: added new funciton for recommended combination of tests. 55 | -------------------------------------------------------------------------------- /base/config.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | //This is the general configuration header 19 | 20 | #ifndef _HEADER_LIB_CONFIG_H_ 21 | #define _HEADER_LIB_CONFIG_H_ 22 | #include "config_auto.h" 23 | #ifdef LIBEXTENSION_R 24 | #include 25 | #define logprintf REprintf 26 | #define logvprintf REvprintf 27 | #ifdef NDEBUG 28 | #undef NDEBUG 29 | #endif 30 | #ifdef GSL_RANGE_CHECK_OFF 31 | #undef GSL_RANGE_CHECK_OFF 32 | #endif 33 | #ifdef HAVE_INLINE 34 | #undef HAVE_INLINE 35 | #endif 36 | #define NDEBUG 1 37 | #define GSL_RANGE_CHECK_OFF 1 38 | #define HAVE_INLINE 1 39 | #else 40 | #define logprintf(...) fprintf(stderr,__VA_ARGS__) 41 | #define logvprintf(...) vfprintf(stderr,__VA_ARGS__) 42 | #endif 43 | #endif 44 | -------------------------------------------------------------------------------- /base/const.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | /* This file contains the constants defined. 19 | */ 20 | 21 | #ifndef _HEADER_LIB_CONST_H_ 22 | #define _HEADER_LIB_CONST_H_ 23 | 24 | //Maximum number of values for genotypes 25 | #define CONST_NV_MAX 128 26 | //Minimum number of values for genotypes 27 | #define CONST_NV_MIN 2 28 | #endif 29 | -------------------------------------------------------------------------------- /base/data_struct.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | /* This lib contains definitions of data structures 19 | */ 20 | 21 | #ifndef _HEADER_LIB_DATA_STRUCT_H_ 22 | #define _HEADER_LIB_DATA_STRUCT_H_ 23 | #include "config.h" 24 | #include "data_struct_ll.h" 25 | #include "data_struct_heap.h" 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /base/data_struct_heap.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #include "config.h" 19 | #include 20 | #include "logger.h" 21 | #include "macros.h" 22 | #include "data_struct_heap.h" 23 | 24 | int data_heap_init(struct data_heap* h,size_t nmax) 25 | { 26 | h->nmax=nmax; 27 | h->n=0; 28 | MALLOCSIZE(h->d,nmax); 29 | return !h->d; 30 | } 31 | 32 | void data_heap_free(struct data_heap* h) 33 | { 34 | h->nmax=0; 35 | if(h->d) 36 | free(h->d); 37 | h->d=0; 38 | } 39 | 40 | void data_heap_empty(struct data_heap* h) 41 | { 42 | h->n=0; 43 | } 44 | 45 | int data_heap_push(struct data_heap* h, HTYPE d) 46 | { 47 | size_t c,p; 48 | 49 | if(h->nmax==h->n) 50 | { 51 | LOG(10,"Heap push failed: heap full.") 52 | return 1; 53 | } 54 | h->d[h->n]=d; 55 | c=h->n++; 56 | while(c) 57 | { 58 | p=(c-1)/2; 59 | if(d>h->d[p]) 60 | return 0; 61 | h->d[c]=h->d[p]; 62 | h->d[p]=d; 63 | c=p; 64 | } 65 | return 0; 66 | } 67 | 68 | HTYPE data_heap_pop(struct data_heap* h) 69 | { 70 | size_t p,c2,cm,pn; 71 | HTYPE v,ret; 72 | 73 | assert(h->n); 74 | ret=h->d[0]; 75 | h->d[0]=h->d[--(h->n)]; 76 | if(!h->n) 77 | return ret; 78 | p=0; 79 | pn=h->n/2; 80 | while(p+1d[c2]d[cm]) 85 | cm=c2; 86 | if(h->d[p]d[cm]) 87 | return ret; 88 | v=h->d[p]; 89 | h->d[p]=h->d[cm]; 90 | h->d[cm]=v; 91 | p=cm; 92 | } 93 | if(p+1==pn) 94 | { 95 | cm=2*p+1; 96 | c2=cm+1; 97 | if((c2n)&&(h->d[c2]d[cm])) 98 | cm=c2; 99 | if(h->d[p]d[cm]) 100 | return ret; 101 | v=h->d[p]; 102 | h->d[p]=h->d[cm]; 103 | h->d[cm]=v; 104 | } 105 | return ret; 106 | } 107 | 108 | int data_heapdec_push(struct data_heapdec* h, HTYPE d) 109 | { 110 | size_t c,p; 111 | 112 | if(h->nmax==h->n) 113 | { 114 | LOG(10,"Heap push failed: heap full.") 115 | return 1; 116 | } 117 | h->d[h->n]=d; 118 | c=h->n++; 119 | while(c) 120 | { 121 | p=(c-1)/2; 122 | if(dd[p]) 123 | return 0; 124 | h->d[c]=h->d[p]; 125 | h->d[p]=d; 126 | c=p; 127 | } 128 | return 0; 129 | } 130 | 131 | HTYPE data_heapdec_pop(struct data_heapdec* h) 132 | { 133 | size_t p,c2,cm,pn; 134 | HTYPE v,ret; 135 | 136 | assert(h->n); 137 | ret=h->d[0]; 138 | h->d[0]=h->d[--(h->n)]; 139 | if(!h->n) 140 | return ret; 141 | p=0; 142 | pn=h->n/2; 143 | while(p+1d[c2]>h->d[cm]) 148 | cm=c2; 149 | if(h->d[p]>h->d[cm]) 150 | return ret; 151 | v=h->d[p]; 152 | h->d[p]=h->d[cm]; 153 | h->d[cm]=v; 154 | p=cm; 155 | } 156 | if(p+1==pn) 157 | { 158 | cm=2*p+1; 159 | c2=cm+1; 160 | if((c2n)&&(h->d[c2]>h->d[cm])) 161 | cm=c2; 162 | if(h->d[p]>h->d[cm]) 163 | return ret; 164 | v=h->d[p]; 165 | h->d[p]=h->d[cm]; 166 | h->d[cm]=v; 167 | } 168 | return ret; 169 | } 170 | 171 | 172 | 173 | -------------------------------------------------------------------------------- /base/data_struct_heap.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | /* This lib contains definition of heap data structure 19 | */ 20 | 21 | #ifndef _HEADER_LIB_DATA_STRUCT_HEAP_H_ 22 | #define _HEADER_LIB_DATA_STRUCT_HEAP_H_ 23 | #include "config.h" 24 | #include 25 | #include 26 | 27 | #ifdef __cplusplus 28 | extern "C" 29 | { 30 | #endif 31 | 32 | 33 | #define HTYPE size_t 34 | 35 | //Incremental heap 36 | struct data_heap 37 | { 38 | size_t nmax; 39 | size_t n; 40 | HTYPE* restrict d; 41 | }; 42 | 43 | int data_heap_init(struct data_heap* h,size_t nmax); 44 | void data_heap_free(struct data_heap* h); 45 | void data_heap_empty(struct data_heap* h); 46 | int data_heap_push(struct data_heap* h, HTYPE d); 47 | HTYPE data_heap_pop(struct data_heap* h); 48 | // int data_heap_popto(struct data_heap* h, HTYPE* d); 49 | static inline HTYPE data_heap_get(const struct data_heap* h,size_t n); 50 | static inline HTYPE data_heap_top(const struct data_heap* h); 51 | 52 | //Decremental heap 53 | #define data_heapdec data_heap 54 | #define data_heapdec_init data_heap_init 55 | #define data_heapdec_free data_heap_free 56 | #define data_heapdec_empty data_heap_empty 57 | int data_heapdec_push(struct data_heapdec* h, HTYPE d); 58 | HTYPE data_heapdec_pop(struct data_heapdec* h); 59 | #define data_heapdec_get data_heap_get 60 | #define data_heapdec_top data_heap_top 61 | 62 | 63 | static inline HTYPE data_heap_get(const struct data_heap* h,size_t n) 64 | { 65 | assert(h->n>n); 66 | return h->d[n]; 67 | } 68 | 69 | static inline HTYPE data_heap_top(const struct data_heap* h) 70 | { 71 | return data_heap_get(h,0); 72 | } 73 | 74 | #ifdef __cplusplus 75 | } 76 | #endif 77 | #endif 78 | -------------------------------------------------------------------------------- /base/data_struct_ll.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #include "config.h" 19 | #include 20 | #include "macros.h" 21 | #include "data_struct_ll.h" 22 | 23 | int data_ll_init(struct data_ll* ll,size_t nmax) 24 | { 25 | assert(ll); 26 | ll->nmax=nmax; 27 | ll->n=0; 28 | MALLOCSIZE(ll->d,2*nmax); 29 | if(!ll->d) 30 | { 31 | LOG(1,"Not enough memory.") 32 | return 1; 33 | } 34 | memset(ll->d,-1,2*nmax*sizeof(*ll->d)); 35 | return 0; 36 | } 37 | 38 | void data_ll_free(struct data_ll* ll) 39 | { 40 | assert(ll); 41 | if(ll->d) 42 | { 43 | free(ll->d); 44 | ll->d=0; 45 | } 46 | ll->nmax=0; 47 | } 48 | 49 | void data_ll_empty(struct data_ll* ll) 50 | { 51 | assert(ll); 52 | ll->n=0; 53 | memset(ll->d,-1,2*ll->nmax*sizeof(*ll->d)); 54 | } 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /base/data_struct_ll.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | /* This lib contains definition of linked list data structure 19 | */ 20 | 21 | #ifndef _HEADER_LIB_DATA_STRUCT_LL_H_ 22 | #define _HEADER_LIB_DATA_STRUCT_LL_H_ 23 | #include "config.h" 24 | #include 25 | #include 26 | #include "logger.h" 27 | 28 | #ifdef __cplusplus 29 | extern "C" 30 | { 31 | #endif 32 | 33 | 34 | //Linked list for size_t 35 | struct data_ll 36 | { 37 | //Max number of items 38 | size_t nmax; 39 | //Current number of items 40 | size_t n; 41 | /* Data and links 42 | * d[2*i] is the child and d[2*i+1] is data. 43 | * Item i has child j (at d[2*j] and d[2*j+1]) if d[2*i]=j. 44 | * For data[2*i]=-1 is no child. 45 | */ 46 | size_t* restrict d; 47 | }; 48 | 49 | int data_ll_init(struct data_ll* ll,size_t nmax); 50 | void data_ll_free(struct data_ll* ll); 51 | void data_ll_empty(struct data_ll* ll); 52 | //Insert entry with value val with no parent. Returns id. 53 | static inline size_t data_ll_insert(struct data_ll* ll,size_t val); 54 | //Insert entry with value val with parent id. Returns self id. 55 | static inline size_t data_ll_insert_after(struct data_ll* ll,size_t id,size_t val); 56 | /* Insert entry with value val with child id. Returns self id. 57 | * NOTE: Does not fix child of father of id. 58 | */ 59 | static inline size_t data_ll_insert_before(struct data_ll* ll,size_t id,size_t val); 60 | //Return child id 61 | static inline size_t data_ll_child(const struct data_ll* ll,size_t id); 62 | //Return value 63 | static inline size_t data_ll_val(const struct data_ll* ll,size_t id); 64 | 65 | static inline size_t data_ll_insert(struct data_ll* ll,size_t val) 66 | { 67 | size_t loc; 68 | 69 | if(ll->n==ll->nmax) 70 | { 71 | LOG(5,"Linked list insertion failed: linked list full.") 72 | return (size_t)-1; 73 | } 74 | loc=2*ll->n; 75 | ll->d[loc+1]=val; 76 | return ll->n++; 77 | } 78 | 79 | static inline size_t data_ll_insert_after(struct data_ll* ll,size_t id,size_t val) 80 | { 81 | size_t loc; 82 | 83 | loc=data_ll_insert(ll,val); 84 | if(loc==(size_t)-1) 85 | return loc; 86 | ll->d[2*loc]=ll->d[2*id]; 87 | ll->d[2*id]=loc; 88 | return loc; 89 | } 90 | 91 | static inline size_t data_ll_insert_before(struct data_ll* ll,size_t id,size_t val) 92 | { 93 | size_t loc; 94 | 95 | loc=data_ll_insert(ll,val); 96 | if(loc==(size_t)-1) 97 | return loc; 98 | ll->d[2*loc]=id; 99 | return loc; 100 | } 101 | 102 | static inline size_t data_ll_child(const struct data_ll* ll,size_t id) 103 | { 104 | assert(idn); 105 | return ll->d[2*id]; 106 | } 107 | 108 | static inline size_t data_ll_val(const struct data_ll* ll,size_t id) 109 | { 110 | assert(idn); 111 | return ll->d[2*id+1]; 112 | } 113 | 114 | 115 | #ifdef __cplusplus 116 | } 117 | #endif 118 | #endif 119 | -------------------------------------------------------------------------------- /base/general_alg.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | /* This lib contains general algorithms helpful in other c programs, 19 | * such as breaking strings, counting instances, and binary search. 20 | */ 21 | 22 | #ifndef _HEADER_LIB_GENERAL_ALG_H_ 23 | #define _HEADER_LIB_GENERAL_ALG_H_ 24 | #include "config.h" 25 | #include 26 | #ifdef __cplusplus 27 | extern "C" 28 | { 29 | #endif 30 | 31 | /* Categorize data according to categorical information into separate arrays. 32 | * s: Source of data. 33 | * c: Categorical information. Each element contains a category of the corresponding element of s. 34 | * d: Destination of categorization. Element i with c[i]=j is put into d[j]. 35 | * Modified value after this function indicates size of outcome arrays. 36 | * n: Size of s and c. 37 | */ 38 | static inline void general_alg_categorize(const size_t* restrict s,const unsigned char* restrict c,size_t* restrict* restrict d,size_t n); 39 | 40 | /* Categorize data according to embedded categorical information into separate arrays. 41 | * s: Source of data. 42 | * c: Categorical information. Each element contains a category of the corresponding element of s. 43 | * d: Destination of categorization. Element i with c[s[i]]=j is put into d[j]. 44 | * Modified value after this function indicates size of outcome arrays. 45 | * n: Size of s and c. 46 | */ 47 | static inline void general_alg_categorize_embed(const size_t* restrict s,const unsigned char* restrict c,size_t* restrict* restrict d,size_t n); 48 | 49 | /* Removes duplicates in a sorted array of double, and shifts unique values to 50 | * the front of the array. 51 | * a: array 52 | * n: size of array 53 | * Return: Size of new array 54 | */ 55 | static inline size_t remove_sorted_duplicates(double* restrict a,size_t n); 56 | 57 | 58 | 59 | 60 | 61 | 62 | static inline void general_alg_categorize(const size_t* restrict s,const unsigned char* restrict c,size_t* restrict* restrict d,size_t n) 63 | { 64 | size_t i; 65 | for(i=0;i. 17 | */ 18 | #ifndef __HEADER_LIB_GSL_BLAS_H__ 19 | #define __HEADER_LIB_GSL_BLAS_H__ 20 | #include "../config.h" 21 | #ifndef LIBGSL_LOCAL 22 | #include 23 | #else 24 | #include 25 | //#include "../../../gsl/blas/gsl_blas.h" 26 | #endif 27 | #endif 28 | -------------------------------------------------------------------------------- /base/gsl/cdf.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #ifndef __HEADER_LIB_GSL_CDF_H__ 19 | #define __HEADER_LIB_GSL_CDF_H__ 20 | #include "../config.h" 21 | #ifndef LIBGSL_LOCAL 22 | #include 23 | #else 24 | #include 25 | //#include "../../../gsl/cdf/gsl_cdf.h" 26 | #endif 27 | #endif 28 | -------------------------------------------------------------------------------- /base/gsl/errno.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #ifndef __HEADER_LIB_GSL_ERRNO_H__ 19 | #define __HEADER_LIB_GSL_ERRNO_H__ 20 | #include "../config.h" 21 | #ifndef LIBGSL_LOCAL 22 | #include 23 | #else 24 | #include 25 | //#include "../../../gsl/err/gsl_errno.h" 26 | #endif 27 | #endif 28 | -------------------------------------------------------------------------------- /base/gsl/histogram.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #ifndef __HEADER_LIB_GSL_HISTOGRAM_H__ 19 | #define __HEADER_LIB_GSL_HISTOGRAM_H__ 20 | #include "../config.h" 21 | #ifndef LIBGSL_LOCAL 22 | #include 23 | #else 24 | #include 25 | //#include "../../../gsl/histogram/gsl_histogram.h" 26 | #endif 27 | #endif 28 | -------------------------------------------------------------------------------- /base/gsl/math.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #ifndef __HEADER_LIB_GSL_MATH_H__ 19 | #define __HEADER_LIB_GSL_MATH_H__ 20 | #include "../config.h" 21 | #ifndef LIBGSL_LOCAL 22 | #include 23 | #else 24 | #include 25 | //#include "../../../gsl/gsl_math.h" 26 | #endif 27 | #endif 28 | -------------------------------------------------------------------------------- /base/gsl/matrix.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #ifndef __HEADER_LIB_GSL_MATRIX_H__ 19 | #define __HEADER_LIB_GSL_MATRIX_H__ 20 | #include "../config.h" 21 | #ifndef LIBGSL_LOCAL 22 | #include 23 | #else 24 | #include 25 | //#include "../../../gsl/matrix/gsl_matrix.h" 26 | #endif 27 | #endif 28 | -------------------------------------------------------------------------------- /base/gsl/permutation.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #ifndef __HEADER_LIB_GSL_PERMUTATION_H__ 19 | #define __HEADER_LIB_GSL_PERMUTATION_H__ 20 | #include "../config.h" 21 | #ifndef LIBGSL_LOCAL 22 | #include 23 | #else 24 | #include 25 | //#include "../../../gsl/permutation/gsl_permutation.h" 26 | #endif 27 | #endif 28 | -------------------------------------------------------------------------------- /base/gsl/randist.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #ifndef __HEADER_LIB_GSL_RANDIST_H__ 19 | #define __HEADER_LIB_GSL_RANDIST_H__ 20 | #include "../config.h" 21 | #ifndef LIBGSL_LOCAL 22 | #include 23 | #else 24 | #include 25 | //#include "../../../gsl/randist/gsl_randist.h" 26 | #endif 27 | #endif 28 | -------------------------------------------------------------------------------- /base/gsl/rng.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #ifndef __HEADER_LIB_GSL_RNG_H__ 19 | #define __HEADER_LIB_GSL_RNG_H__ 20 | #include "../config.h" 21 | #ifndef LIBGSL_LOCAL 22 | #include 23 | #else 24 | #include 25 | //#include "../../../gsl/rng/gsl_rng.h" 26 | #endif 27 | #endif 28 | -------------------------------------------------------------------------------- /base/gsl/sf.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #ifndef __HEADER_LIB_GSL_SF_H__ 19 | #define __HEADER_LIB_GSL_SF_H__ 20 | #include "../config.h" 21 | #ifndef LIBGSL_LOCAL 22 | #include 23 | #else 24 | #include 25 | //#include "../../../gsl/specfunc/gsl_sf.h" 26 | #endif 27 | #endif 28 | -------------------------------------------------------------------------------- /base/gsl/sort.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #ifndef __HEADER_LIB_GSL_SORT_H__ 19 | #define __HEADER_LIB_GSL_SORT_H__ 20 | #include "../config.h" 21 | #ifndef LIBGSL_LOCAL 22 | #include 23 | #include 24 | #else 25 | #include 26 | #include 27 | //#include "../../../gsl/sort/gsl_sort.h" 28 | //#include "../../../gsl/sort/gsl_sort_vector.h" 29 | #endif 30 | #endif 31 | -------------------------------------------------------------------------------- /base/gsl/statistics.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #ifndef __HEADER_LIB_GSL_STATISTICS_H__ 19 | #define __HEADER_LIB_GSL_STATISTICS_H__ 20 | #include "../config.h" 21 | #ifndef LIBGSL_LOCAL 22 | #include 23 | #else 24 | #include 25 | //#include "../../../gsl/statistics/gsl_statistics.h" 26 | #endif 27 | #endif 28 | -------------------------------------------------------------------------------- /base/gsl/vector.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #ifndef __HEADER_LIB_GSL_VECTOR_H__ 19 | #define __HEADER_LIB_GSL_VECTOR_H__ 20 | #include "../config.h" 21 | #ifndef LIBGSL_LOCAL 22 | #include 23 | #else 24 | #include 25 | //#include "../../../gsl/vector/gsl_vector.h" 26 | #endif 27 | #endif 28 | -------------------------------------------------------------------------------- /base/lib.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #include "config.h" 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include "gsl/errno.h" 24 | #include "random.h" 25 | #include "logger.h" 26 | #include "lib.h" 27 | 28 | #define MACROSTR(X) #X 29 | #define STR(X) MACROSTR(X) 30 | #define VERSION1_S STR(VERSION1) 31 | #define VERSION2_S STR(VERSION2) 32 | #define VERSION3_S STR(VERSION3) 33 | #define LIBVERSION VERSION1_S "." VERSION2_S "." VERSION3_S 34 | #define LIBNAME STR(LIB_NAME) 35 | 36 | #ifndef LIBINFO 37 | #define LIBINFONAME(X) X 38 | #else 39 | #define LIBINFONAME(X) LIBINFO##X 40 | #endif 41 | 42 | 43 | void LIBINFONAME(lib_init)(unsigned char loglv,unsigned long rs0,size_t nthread) 44 | { 45 | unsigned long rs; 46 | size_t nth; 47 | LOGLV(loglv); 48 | random_init(); 49 | rs=rs0?rs0:(unsigned long)time(NULL); 50 | random_seed(rs); 51 | if(nthread) 52 | omp_set_num_threads((int)nthread); 53 | omp_set_nested(0); 54 | nth=(size_t)omp_get_max_threads(); 55 | gsl_set_error_handler_off(); 56 | LOG(7,"Library started with log level %u, initial random seed %lu, and max thread count "PRINTFSIZET".",loglv,rs,nth) 57 | } 58 | 59 | const char* LIBINFONAME(lib_name)() 60 | { 61 | return LIBNAME; 62 | } 63 | 64 | size_t LIBINFONAME(lib_version1)() 65 | { 66 | return VERSION1; 67 | } 68 | 69 | size_t LIBINFONAME(lib_version2)() 70 | { 71 | return VERSION2; 72 | } 73 | 74 | size_t LIBINFONAME(lib_version3)() 75 | { 76 | return VERSION3; 77 | } 78 | 79 | const char* LIBINFONAME(lib_version)() 80 | { 81 | return LIBVERSION; 82 | } 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | -------------------------------------------------------------------------------- /base/lib.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | //This file contains library related functions 19 | #ifndef _HEADER_LIB_LIB_H_ 20 | #define _HEADER_LIB_LIB_H_ 21 | #ifdef __cplusplus 22 | extern "C" 23 | { 24 | #endif 25 | 26 | 27 | /* The library needs to be initialized before any other function is called, 28 | * to perform correctly with desired log level and random seed. 29 | * loglv: Logging level, see logger.h. 30 | * rs: Initial random seed. If rs=0, use current time as random seed. 31 | * nthread: Maximum number of threads, If nthread=0, use default setting. 32 | */ 33 | void lib_init(unsigned char loglv,unsigned long rs,size_t nthread); 34 | 35 | /* Returns library name 36 | */ 37 | const char* lib_name(); 38 | /* Returns library version in a.b.c format, or a, b, or c, for subfunctions ending with 1, 2, or 3 respectively. 39 | */ 40 | const char* lib_version(); 41 | size_t lib_version1(); 42 | size_t lib_version2(); 43 | size_t lib_version3(); 44 | 45 | #ifdef __cplusplus 46 | } 47 | #endif 48 | #endif 49 | -------------------------------------------------------------------------------- /base/logger.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #include "config.h" 19 | #include 20 | #include 21 | #include "os.h" 22 | #include "macros.h" 23 | #include "logger.h" 24 | 25 | struct logger LOGGER_VARIABLE; 26 | 27 | const char* logger_mname(size_t lv) 28 | { 29 | static const char names[13][15]={"CRITICAL(0)","ERROR(1)","ERROR(2)","ERROR(3)","WARNING(4)","WARNING(5)","WARNING(6)","INFO(7)","INFO(8)","INFO(9)","DEBUG(10)","DEBUG(11)","DEBUG(12)"}; 30 | if(lv>12) 31 | return 0; 32 | return names[lv]; 33 | } 34 | 35 | void logger_voutput(size_t lv,const char* file,size_t line,const char* fmt,va_list args) 36 | { 37 | char timing[100]; 38 | struct tm *str_time; 39 | time_t rawtime; 40 | 41 | time(&rawtime); 42 | str_time=localtime(&rawtime); 43 | strftime(timing,99,"%Y-%m-%d %H:%M:%S",str_time); 44 | 45 | logprintf("%s:%s:%s:"PRINTFSIZET": ",logger_mname(lv),timing,file,line); 46 | logvprintf(fmt,args); 47 | logprintf("%s",_NEWLINE_); 48 | } 49 | 50 | void logger_output(size_t lv,const char* file,size_t line,const char* fmt,...) 51 | { 52 | va_list args; 53 | va_start (args, fmt); 54 | logger_voutput(lv,file,line,fmt,args); 55 | } 56 | 57 | int logger_log(const struct logger* l,size_t lv,const char* file,size_t line,const char* fmt,...) 58 | { 59 | va_list args; 60 | va_start (args, fmt); 61 | if(lv>l->lv) 62 | return 1; 63 | logger_voutput(lv,file,line,fmt,args); 64 | return 0; 65 | } 66 | 67 | int logger_init(struct logger* l,size_t lv) 68 | { 69 | if(!l) 70 | { 71 | logger_output(1,__FILE__,__LINE__,"NULL logger."); 72 | return 1; 73 | } 74 | l->lv=lv; 75 | return 0; 76 | } 77 | 78 | int logger_default_init(size_t lv) 79 | { 80 | return logger_init(&LOGGER_VARIABLE,lv); 81 | } 82 | 83 | struct logger* logger_new(size_t lv) 84 | { 85 | struct logger* l; 86 | CALLOCSIZE(l,1); 87 | if(!l) 88 | { 89 | logger_output(1,__FILE__,__LINE__,"Logger allocation failed."); 90 | return 0; 91 | } 92 | if(logger_init(l,lv)) 93 | { 94 | free(l); 95 | return 0; 96 | } 97 | return l; 98 | } 99 | -------------------------------------------------------------------------------- /base/logger.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | // This file contains the logger and error functions of different levels 19 | #ifndef _HEADER_LIB_LOGGER_H_ 20 | #define _HEADER_LIB_LOGGER_H_ 21 | #include "config.h" 22 | #include 23 | #include 24 | #include "os.h" 25 | #ifdef __cplusplus 26 | extern "C" 27 | { 28 | #endif 29 | 30 | // global variable name for struct logger 31 | #define LOGGER_VARIABLE logger_variable 32 | // Logging macro. logs with significance level LV, and the rest are in printf format. 33 | #define LOGS(LOGGERX,LV,...) logger_log(LOGGERX,LV,__FILE__,__LINE__,__VA_ARGS__); 34 | #define LOG(LV,...) LOGS(&LOGGER_VARIABLE,LV,__VA_ARGS__) 35 | #define LOGLV(LV) LOGGER_VARIABLE.lv=LV 36 | /* Logging levels: 37 | * CRITICAL(0),ERROR(1),ERROR(2),ERROR(3),WARNING(4),WARNING(5),WARNING(6),INFO(7),INFO(8),INFO(9),DEBUG(10),DEBUG(11),DEBUG(12) 38 | */ 39 | 40 | struct logger{ 41 | // logger output level. Only message levels. 17 | */ 18 | // This file contains the macro definitions, such as cleanup macros 19 | 20 | #ifndef _HEADER_LIB_MACROS_H_ 21 | #define _HEADER_LIB_MACROS_H_ 22 | #include "config.h" 23 | #include 24 | #include "types.h" 25 | #include "logger.h" 26 | 27 | #define ERRRETV(V,...) {LOG(1,__VA_ARGS__) CLEANUP return V;} 28 | #define ERRRET(...) ERRRETV(1,__VA_ARGS__) 29 | 30 | #define AUTOALLOCHEADER _autoalloc_ 31 | #ifndef __STDC_NO_VLA__ 32 | /* Automatically allocate memory depending on size. For count<=countmax, 33 | * allocation is through stack. For count>countmax, allocation is through 34 | * heap. 35 | */ 36 | 37 | #define AUTOALLOCSUF(TYPE,NAME,COUNT,COUNTMAX,SUFFIX) \ 38 | TYPE AUTOALLOCHEADER##NAME[(COUNT)<=(COUNTMAX)?(COUNT):0];\ 39 | TYPE * SUFFIX NAME;\ 40 | if((COUNT)<=(COUNTMAX))NAME=AUTOALLOCHEADER##NAME;\ 41 | else{if(COUNT) NAME=(TYPE*)malloc((COUNT)*(sizeof(TYPE)));\ 42 | else NAME=0;} 43 | #define AUTOCALLOCSUF(TYPE,NAME,COUNT,COUNTMAX,SUFFIX) \ 44 | TYPE AUTOALLOCHEADER##NAME[(COUNT)<=(COUNTMAX)?(COUNT):0];\ 45 | TYPE * SUFFIX NAME;\ 46 | if((COUNT)<=(COUNTMAX)){\ 47 | NAME=AUTOALLOCHEADER##NAME;\ 48 | memset(NAME,0,(COUNT)*sizeof(TYPE));}\ 49 | else{if(COUNT) NAME=(TYPE*)calloc(COUNT,sizeof(TYPE));\ 50 | else NAME=0;} 51 | 52 | /* Automatically free memory depending on size. Does nothing if memory is on stack, 53 | * frees memory if is on heap. 54 | */ 55 | #define AUTOFREE(NAME) if(sizeof(AUTOALLOCHEADER##NAME)==0)CLEANMEM(NAME) 56 | #else 57 | #define AUTOALLOCSUF(TYPE,NAME,COUNT,COUNTMAX,SUFFIX) \ 58 | TYPE * SUFFIX NAME=(TYPE*)malloc((COUNT)*sizeof(TYPE)); 59 | #define AUTOCALLOCSUF(TYPE,NAME,COUNT,COUNTMAX,SUFFIX) \ 60 | TYPE * SUFFIX NAME=(TYPE*)calloc(COUNT,sizeof(TYPE); 61 | #define AUTOFREE(NAME) CLEANMEM(NAME) 62 | #endif 63 | #define AUTOALLOC(TYPE,NAME,COUNT,COUNTMAX) AUTOALLOCSUF(TYPE,NAME,COUNT,COUNTMAX,) 64 | #define AUTOCALLOC(TYPE,NAME,COUNT,COUNTMAX) AUTOCALLOCSUF(TYPE,NAME,COUNT,COUNTMAX,) 65 | 66 | 67 | #define AUTOALLOCVECTOR(NAME,COUNT,COUNTMAX,BASETYPE,VECTYPE) \ 68 | AUTOALLOC(BASETYPE,_vec_##NAME,COUNT,COUNTMAX)\ 69 | CONCATENATE2(VECTYPE,_view) _vecview_##NAME;\ 70 | VECTYPE* NAME=0;\ 71 | if(_vec_##NAME)\ 72 | {\ 73 | _vecview_##NAME=CONCATENATE2(VECTYPE,_view_array)(_vec_##NAME,COUNT);\ 74 | NAME=&_vecview_##NAME.vector;\ 75 | } 76 | #define AUTOALLOCMATRIX(NAME,COUNT1,COUNT2,COUNTMAX,BASETYPE,MATTYPE) \ 77 | AUTOALLOC(BASETYPE,_mat_##NAME,(COUNT1)*(COUNT2),COUNTMAX)\ 78 | CONCATENATE2(VECTYPE,_view) _matview_##NAME;\ 79 | MATTYPE* NAME=0;\ 80 | if(_mat_##NAME)\ 81 | {\ 82 | _matview_##NAME=CONCATENATE2(MATTYPE,_view_array)(_mat_##NAME,COUNT1,COUNT2);\ 83 | NAME=&_matview_##NAME.matrix;\ 84 | }\ 85 | 86 | #define AUTOFREEVEC(NAME) if(NAME){AUTOFREE(_vec_##NAME)NAME=0;} 87 | #define AUTOFREEMAT(NAME) if(NAME){AUTOFREE(_mat_##NAME)NAME=0;} 88 | 89 | #define AUTOALLOCVECO(NAME,COUNT,COUNTMAX) AUTOALLOCVECTOR(NAME,COUNT,COUNTMAX,float, VECTORO) 90 | #define AUTOALLOCVECD(NAME,COUNT,COUNTMAX) AUTOALLOCVECTOR(NAME,COUNT,COUNTMAX,double, VECTORD) 91 | #define AUTOALLOCVECC(NAME,COUNT,COUNTMAX) AUTOALLOCVECTOR(NAME,COUNT,COUNTMAX,char, VECTORC) 92 | #define AUTOALLOCVECUC(NAME,COUNT,COUNTMAX) AUTOALLOCVECTOR(NAME,COUNT,COUNTMAX,unsigned char, VECTORUC) 93 | #define AUTOALLOCVECI(NAME,COUNT,COUNTMAX) AUTOALLOCVECTOR(NAME,COUNT,COUNTMAX,int, VECTORI) 94 | #define AUTOALLOCVECL(NAME,COUNT,COUNTMAX) AUTOALLOCVECTOR(NAME,COUNT,COUNTMAX,long, VECTORL) 95 | #define AUTOALLOCVECUL(NAME,COUNT,COUNTMAX) AUTOALLOCVECTOR(NAME,COUNT,COUNTMAX,unsigned long, VECTORUL) 96 | #define AUTOALLOCVECF(NAME,COUNT,COUNTMAX) AUTOALLOCVECTOR(NAME,COUNT,COUNTMAX,FTYPE, VECTORF) 97 | #define AUTOALLOCVECG(NAME,COUNT,COUNTMAX) AUTOALLOCVECTOR(NAME,COUNT,COUNTMAX,GTYPE, VECTORG) 98 | #define AUTOALLOCMATO(NAME, COUNT1,COUNT2,COUNTMAX) AUTOALLOCMATRIX(NAME,COUNT1,COUNT2,COUNTMAX,float, MATRIXO) 99 | #define AUTOALLOCMATD(NAME, COUNT1,COUNT2,COUNTMAX) AUTOALLOCMATRIX(NAME,COUNT1,COUNT2,COUNTMAX,double, MATRIXD) 100 | #define AUTOALLOCMATC(NAME, COUNT1,COUNT2,COUNTMAX) AUTOALLOCMATRIX(NAME,COUNT1,COUNT2,COUNTMAX,char, MATRIXC) 101 | #define AUTOALLOCMATUC(NAME,COUNT1,COUNT2,COUNTMAX) AUTOALLOCMATRIX(NAME,COUNT1,COUNT2,COUNTMAX,unsigned char, MATRIXUC) 102 | #define AUTOALLOCMATI(NAME, COUNT1,COUNT2,COUNTMAX) AUTOALLOCMATRIX(NAME,COUNT1,COUNT2,COUNTMAX,int, MATRIXI) 103 | #define AUTOALLOCMATL(NAME, COUNT1,COUNT2,COUNTMAX) AUTOALLOCMATRIX(NAME,COUNT1,COUNT2,COUNTMAX,long, MATRIXL) 104 | #define AUTOALLOCMATUL(NAME,COUNT1,COUNT2,COUNTMAX) AUTOALLOCMATRIX(NAME,COUNT1,COUNT2,COUNTMAX,unsigned long, MATRIXUL) 105 | #define AUTOALLOCMATF(NAME, COUNT1,COUNT2,COUNTMAX) AUTOALLOCMATRIX(NAME,COUNT1,COUNT2,COUNTMAX,FTYPE, MATRIXF) 106 | #define AUTOALLOCMATG(NAME, COUNT1,COUNT2,COUNTMAX) AUTOALLOCMATRIX(NAME,COUNT1,COUNT2,COUNTMAX,GTYPE, MATRIXG) 107 | 108 | // Cleanup macros 109 | #define CLEANANY(X,F) if(X){F(X);X=0;} 110 | #define CLEANMEM(X) CLEANANY(X,free) 111 | #define CLEANVECO(X) CLEANANY(X,VECTOROF(free)) 112 | #define CLEANVECD(X) CLEANANY(X,VECTORDF(free)) 113 | #define CLEANVECC(X) CLEANANY(X,VECTORCF(free)) 114 | #define CLEANVECUC(X) CLEANANY(X,VECTORUCF(free)) 115 | #define CLEANVECI(X) CLEANANY(X,VECTORIF(free)) 116 | #define CLEANVECL(X) CLEANANY(X,VECTORLF(free)) 117 | #define CLEANVECUL(X) CLEANANY(X,VECTORULF(free)) 118 | #define CLEANVECF(X) CLEANANY(X,VECTORFF(free)) 119 | #define CLEANVECG(X) CLEANANY(X,VECTORGF(free)) 120 | 121 | #define CLEANMATO(X) CLEANANY(X,MATRIXOF(free)) 122 | #define CLEANMATD(X) CLEANANY(X,MATRIXDF(free)) 123 | #define CLEANMATC(X) CLEANANY(X,MATRIXCF(free)) 124 | #define CLEANMATUC(X) CLEANANY(X,MATRIXUCF(free)) 125 | #define CLEANMATI(X) CLEANANY(X,MATRIXIF(free)) 126 | #define CLEANMATL(X) CLEANANY(X,MATRIXLF(free)) 127 | #define CLEANMATUL(X) CLEANANY(X,MATRIXULF(free)) 128 | #define CLEANMATF(X) CLEANANY(X,MATRIXFF(free)) 129 | #define CLEANMATG(X) CLEANANY(X,MATRIXGF(free)) 130 | 131 | #define CLEANPERM(X) CLEANANY(X,gsl_permutation_free) 132 | #define CLEANHIST(X) CLEANANY(X,gsl_histogram_free) 133 | #define CLEANFILE(X) CLEANANY(X,fclose) 134 | #define CLEANMMATF(X,N) if(X){for(i=0;i. 17 | */ 18 | #include "config.h" 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include "gsl/sf.h" 24 | #include "gsl/math.h" 25 | #include "types.h" 26 | #include "logger.h" 27 | #include "math.h" 28 | 29 | 30 | void math_cdf_quantile_calc(double start,double step,size_t n,double left,double right,double (*func)(double,const void*),const void* param,double eps,double* ans) 31 | { 32 | size_t i,nleft,nright; 33 | double mid,midv,t1; 34 | 35 | while(n) 36 | { 37 | if(right-left0?(size_t)ceil(t1/step):0; 49 | if(nleft>n) 50 | nleft=n; 51 | nright=n-nleft; 52 | //Small side first 53 | if(nleft<=nright) 54 | { 55 | if(nleft) 56 | { 57 | math_cdf_quantile_calc(start,step,nleft,left,mid,func,param,eps,ans); 58 | start+=step*(double)nleft; 59 | n=nright; 60 | ans+=nleft; 61 | } 62 | left=mid; 63 | } 64 | else 65 | { 66 | if(nright) 67 | { 68 | math_cdf_quantile_calc(start+(double)nleft*step,step,nright,mid,right,func,param,eps,ans+nleft); 69 | n=nleft; 70 | } 71 | right=mid; 72 | } 73 | } 74 | } 75 | 76 | 77 | /* This function is modified from GNU Scientific Library (GSL) version 1.16. 78 | * See https://www.gnu.org/software/gsl/. 79 | */ 80 | int math_sf_2F1_m1(const double a, const double b, const double c,const double x, gsl_sf_result * result) 81 | { 82 | double sum_pos = 0.0; 83 | double sum_neg = 0.0; 84 | double del_pos = 0.0; 85 | double del_neg = 0.0; 86 | double del = 0.0; 87 | double k = 0.0; 88 | int i = 0; 89 | 90 | if(fabs(c) < GSL_DBL_EPSILON) { 91 | result->val = 0.0; /* FIXME: ?? */ 92 | result->err = 1.0; 93 | return 1; 94 | } 95 | 96 | do { 97 | if(++i > 30000) { 98 | result->val = sum_pos - sum_neg; 99 | result->err = del_pos + del_neg; 100 | result->err += 2.0 * GSL_DBL_EPSILON * (sum_pos + sum_neg); 101 | result->err += 2.0 * GSL_DBL_EPSILON * (2.0*sqrt(k)+1.0) * fabs(result->val); 102 | return 1; 103 | } 104 | del *= (a+k)*(b+k) * x / ((c+k) * (k+1.0)); /* Gauss series */ 105 | 106 | if(del > 0.0) { 107 | del_pos = del; 108 | sum_pos += del; 109 | } 110 | else if(del == 0.0) { 111 | /* Exact termination (a or b was a negative integer). 112 | */ 113 | del_pos = 0.0; 114 | del_neg = 0.0; 115 | break; 116 | } 117 | else { 118 | del_neg = -del; 119 | sum_neg -= del; 120 | } 121 | 122 | k += 1.0; 123 | } while(fabs((del_pos + del_neg)/(sum_pos-sum_neg)) > GSL_DBL_EPSILON); 124 | 125 | result->val = sum_pos - sum_neg; 126 | result->err = del_pos + del_neg; 127 | result->err += 2.0 * GSL_DBL_EPSILON * (sum_pos + sum_neg); 128 | result->err += 2.0 * GSL_DBL_EPSILON * (2.0*sqrt(k) + 1.0) * fabs(result->val); 129 | 130 | return 0; 131 | } 132 | -------------------------------------------------------------------------------- /base/math.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | /* This lib contains mathematical functions: 19 | * 1: Special functions 20 | * 2: Cumulative density function related 21 | */ 22 | 23 | #ifndef _HEADER_LIB_MATH_H_ 24 | #define _HEADER_LIB_MATH_H_ 25 | #include "config.h" 26 | #include 27 | #include 28 | #include "gsl/math.h" 29 | #include "gsl/sf.h" 30 | 31 | #ifdef __cplusplus 32 | extern "C" 33 | { 34 | #endif 35 | 36 | /************************************************** 37 | * Special functions 38 | **************************************************/ 39 | // Calculates ln(Gamma(n/2)) 40 | static inline double math_sf_lngammahalf(size_t n); 41 | 42 | // Calculates exp(x)-1, where x can be close to 0. 43 | static inline double math_sf_expminusone(double x); 44 | 45 | // Calculates log(x+1), where x can be close to 0. 46 | static inline double math_sf_logplusone(double x); 47 | 48 | // Calculates hypergeometric function minus 1, i.e. 2F1(a,b,c;x)-1 49 | int math_sf_2F1_m1(const double a, const double b, const double c,const double x, gsl_sf_result * result); 50 | 51 | /************************************************** 52 | * CDF related functions 53 | **************************************************/ 54 | 55 | /* Locate quantiles of CDF with binary search. 56 | * start: Start quantile location to be calculated 57 | * step: Step of quantile location 58 | * n: Number of quantiles to calculate 59 | * left: All quantiles are known >left. 60 | * right: All quantiles are known left. 71 | * right: All quantiles are known 1E-4?exp(x)-1:x*(1+(x/2)*(1+(x/3)*(1+x/4))); 110 | } 111 | 112 | 113 | static inline double math_sf_logplusone(double x) 114 | { 115 | return fabs(x)>1E-4?log(x+1):x*(1-(x/2)*(1+((x*2)/3)*(1-(x*3)/4))); 116 | } 117 | 118 | static inline void math_cdf_quantile(size_t n,double left,double right,double (*func)(double,const void*),const void* param,double eps,double* ans) 119 | { 120 | double step=1./(double)n; 121 | assert(n>1); 122 | assert((func(left,param)1-step)); 123 | math_cdf_quantile_calc(step,step,n-1,left,right,func,param,eps,ans); 124 | } 125 | 126 | #ifdef __cplusplus 127 | } 128 | #endif 129 | #endif 130 | -------------------------------------------------------------------------------- /base/os.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | // This file contains OS specific routines 19 | #ifndef _HEADER_LIB_OS_H_ 20 | #define _HEADER_LIB_OS_H_ 21 | 22 | #ifdef _NEWLINE_ 23 | #undef _NEWLINE_ 24 | #endif 25 | 26 | // OS dependent new line 27 | #if defined(unix) || defined(__unix__) || defined(__unix) || defined(__APPLE__) || defined(__MACH__) || defined(__linux__) 28 | #define _NEWLINE_ "\n" 29 | #define PRINTFSIZET "%zu" 30 | #endif 31 | #if defined(_WIN32) || defined(_WIN64) 32 | #define _NEWLINE_ "\r\n" 33 | #define PRINTFSIZET "%Iu" 34 | #endif 35 | #ifndef _NEWLINE_ 36 | #error Unsupported OS 37 | #endif 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /base/random.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #include "random.h" 19 | 20 | gsl_rng* random_gen; 21 | -------------------------------------------------------------------------------- /base/random.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | /* This file contains the low level randomization routines. 19 | */ 20 | 21 | #ifndef _HEADER_LIB_RANDOM_H_ 22 | #define _HEADER_LIB_RANDOM_H_ 23 | #include "config.h" 24 | #include 25 | #include "gsl/rng.h" 26 | #include "gsl/randist.h" 27 | #include "logger.h" 28 | #include "types.h" 29 | #ifdef __cplusplus 30 | extern "C" 31 | { 32 | #endif 33 | 34 | extern gsl_rng* random_gen; 35 | 36 | #define random_new() gsl_rng_alloc(gsl_rng_taus2) 37 | 38 | static inline void random_init_any(gsl_rng** rng) 39 | { 40 | *rng=random_new(); 41 | if(!(*rng)) 42 | LOG(1,"Can't allocate random number generator.") 43 | } 44 | #define random_init() random_init_any(&random_gen) 45 | 46 | #define random_seed_any(r,s) gsl_rng_set(r,s) 47 | #define random_seed(s) random_seed_any(random_gen,s) 48 | 49 | #define random_free_any(r) gsl_rng_free(r) 50 | #define random_free() random_free_any(&random_gen) 51 | 52 | #define random_seed_now_any(r) random_seed_any(r,(unsigned long int)time(NULL)) 53 | #define random_seed_now() random_seed_now_any(random_gen) 54 | 55 | // Generate uniformly distributed random number 56 | #define random_uniform_any(r) gsl_rng_uniform(r) 57 | #define random_uniform() random_uniform_any(random_gen) 58 | #define random_uniformi_any(r,n) gsl_rng_uniform_int(r,n) 59 | #define random_uniformi(n) random_uniformi_any(random_gen,n) 60 | // Generate gaussian distributed random number 61 | #define random_gaussian_any(r,sigma) gsl_ran_gaussian(r,sigma) 62 | #define random_gaussian(sigma) random_gaussian_any(random_gen,sigma) 63 | 64 | // Randomly shuffle items 65 | #define random_shufflevf_any(r,f) gsl_ran_shuffle(r,(f)->data,(f)->size,(f)->stride*sizeof(FTYPE)) 66 | #define random_shufflevf(f) random_shufflevf_any(random_gen,f) 67 | 68 | //Random shuffle gsl_permutation 69 | #define random_shuffle_any(r,f) gsl_ran_shuffle(r,(f)->data,(f)->size,sizeof(size_t)) 70 | #define random_shuffle(f) random_shuffle_any(random_gen,f) 71 | 72 | 73 | #ifdef __cplusplus 74 | } 75 | #endif 76 | #endif 77 | -------------------------------------------------------------------------------- /base/supernormalize.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #include "config.h" 19 | #include 20 | #include 21 | #include "gsl/sort.h" 22 | #include "logger.h" 23 | #include "macros.h" 24 | #include "threading.h" 25 | #include "data_process.h" 26 | #include "supernormalize.h" 27 | 28 | void supernormalize_byrow_single_buffed(MATRIXF* m,gsl_permutation *p1,const FTYPE* restrict Pinv) 29 | { 30 | size_t i,j; 31 | 32 | for(j=0;jsize1;j++) 33 | { 34 | VECTORFF(view) vvs=MATRIXFF(row)(m,j); 35 | 36 | //Rank 37 | CONCATENATE3(gsl_sort_vector,FTYPE_SUF,_index)(p1,&(vvs.vector)); 38 | //Distribution 39 | for(i=0;isize2;i++) 40 | MATRIXFF(set)(m,j,gsl_permutation_get(p1,i),Pinv[i]); 41 | } 42 | //Normalize again for unit variance 43 | MATRIXFF(normalize_row)(m); 44 | } 45 | 46 | int supernormalize_byrow_single(MATRIXF* m,const FTYPE* restrict Pinv) 47 | { 48 | gsl_permutation *p1; 49 | 50 | p1=gsl_permutation_alloc(m->size2); 51 | if(!p1) 52 | { 53 | LOG(1,"Can't allocate permutations.") 54 | return 1; 55 | } 56 | supernormalize_byrow_single_buffed(m,p1,Pinv); 57 | gsl_permutation_free(p1); 58 | return 0; 59 | } 60 | 61 | void supernormalize_byrow_buffed(MATRIXF* m,gsl_permutation * const *p,FTYPE* Pinv) 62 | { 63 | size_t nth=(size_t)omp_get_max_threads(); 64 | LOG(10,"Supernormalization started for matrix size ("PRINTFSIZET"*"PRINTFSIZET") on "PRINTFSIZET" threads.",m->size1,m->size2,nth) 65 | supernormalize_Pinv(m->size2,Pinv); 66 | 67 | #pragma omp parallel 68 | { 69 | size_t nid=(size_t)omp_get_thread_num(); 70 | size_t n1,n2; 71 | MATRIXFF(view) mv; 72 | 73 | threading_get_startend(m->size1,&n1,&n2); 74 | if(n2>n1) 75 | { 76 | mv=MATRIXFF(submatrix)(m,n1,0,n2-n1,m->size2); 77 | supernormalize_byrow_single_buffed(&mv.matrix,p[nid],Pinv); 78 | } 79 | } 80 | 81 | LOG(10,"Supernormalization completed.") 82 | } 83 | 84 | int supernormalize_byrow(MATRIXF* m) 85 | { 86 | #define CLEANUP for(i=0;isize2); 96 | ret=!!Pinv; 97 | for(i=0;isize2); 100 | ret=ret&&p[i]; 101 | } 102 | 103 | if(!ret) 104 | ERRRET("Not enough memory.") 105 | supernormalize_byrow_buffed(m,p,Pinv); 106 | CLEANUP 107 | return 0; 108 | #undef CLEANUP 109 | } 110 | 111 | int supernormalizef_byrow_single(MATRIXF* m,const FTYPE* restrict Pinv,FTYPE fluc) 112 | { 113 | int ret; 114 | ret=supernormalizea_byrow_single(m,Pinv); 115 | MATRIXFF(fluc)(m,fluc); 116 | MATRIXFF(normalize_row)(m); 117 | return ret; 118 | } 119 | 120 | void supernormalizef_byrow_buffed(MATRIXF* m,gsl_permutation * const *p,FTYPE* Pinv,FTYPE fluc) 121 | { 122 | supernormalize_byrow_buffed(m,p,Pinv); 123 | MATRIXFF(fluc)(m,fluc); 124 | MATRIXFF(normalize_row)(m); 125 | } 126 | 127 | int supernormalizef_byrow(MATRIXF* m,FTYPE fluc) 128 | { 129 | int ret; 130 | ret=supernormalize_byrow(m); 131 | MATRIXFF(fluc)(m,fluc); 132 | MATRIXFF(normalize_row)(m); 133 | return ret; 134 | } 135 | 136 | void supernormalizer_byrow_single_buffed(MATRIXF* m,gsl_permutation *p1,VECTORF* vb,const gsl_rng* r) 137 | { 138 | size_t i,j; 139 | VECTORFF(view) vvs; 140 | 141 | for(j=0;jsize1;j++) 142 | { 143 | //Random data 144 | for(i=0;isize2;i++) 145 | VECTORFF(set)(vb,i,(FTYPE)random_gaussian_any(r,1)); 146 | CONCATENATE2(gsl_sort_vector,FTYPE_SUF)(vb); 147 | 148 | //Rank 149 | vvs=MATRIXFF(row)(m,j); 150 | CONCATENATE3(gsl_sort_vector,FTYPE_SUF,_index)(p1,&(vvs.vector)); 151 | //Distribution 152 | for(i=0;isize2;i++) 153 | MATRIXFF(set)(m,j,gsl_permutation_get(p1,i),VECTORFF(get)(vb,i)); 154 | } 155 | //Normalize again for unit variance 156 | MATRIXFF(normalize_row)(m); 157 | } 158 | 159 | void supernormalizer_byrow_buffed(MATRIXF* m,MATRIXF* mb,gsl_permutation * const *p,gsl_rng * const* rng) 160 | { 161 | size_t nth=(size_t)omp_get_max_threads(); 162 | LOG(10,"Randomized normalization started for matrix size ("PRINTFSIZET"*"PRINTFSIZET") on "PRINTFSIZET" threads.",m->size1,m->size2,nth) 163 | 164 | #pragma omp parallel 165 | { 166 | size_t nid=(size_t)omp_get_thread_num(); 167 | size_t n1,n2; 168 | MATRIXFF(view) mv; 169 | VECTORFF(view) vv; 170 | 171 | threading_get_startend(m->size1,&n1,&n2); 172 | if(n2>n1) 173 | { 174 | mv=MATRIXFF(submatrix)(m,n1,0,n2-n1,m->size2); 175 | vv=MATRIXFF(row)(mb,nid); 176 | supernormalizer_byrow_single_buffed(&mv.matrix,p[nid],&vv.vector,rng[nid]); 177 | } 178 | } 179 | 180 | LOG(10,"Randomized normalization completed.") 181 | } 182 | 183 | int supernormalizer_byrow(MATRIXF* m) 184 | { 185 | #define CLEANUP for(i=0;isize2); 197 | ret=!!mb; 198 | for(i=0;isize2); 201 | r[i]=random_new(); 202 | ret=ret&&p[i]&&r[i]; 203 | } 204 | if(!ret) 205 | ERRRET("Not enough memory.") 206 | random_seed_any(r[0],(size_t)time(NULL)); 207 | for(i=1;i. 17 | */ 18 | /* This is the header file for supernormalization, i.e. transforming 19 | * samples of a variable to normal distribution N(0,1). Two method 20 | * are provided: deterministic and random. 21 | */ 22 | 23 | #ifndef _HEADER_LIB_SUPERNORMALIZE_H_ 24 | #define _HEADER_LIB_SUPERNORMALIZE_H_ 25 | #include "config.h" 26 | #include "gsl/permutation.h" 27 | #include "gsl/cdf.h" 28 | #include "gsl/math.h" 29 | #include "random.h" 30 | #include "types.h" 31 | #ifdef __cplusplus 32 | extern "C" 33 | { 34 | #endif 35 | 36 | /********************************************************************** 37 | * Deterministic supernormalization 38 | **********************************************************************/ 39 | 40 | /* Supernormalize matrix per row with single thread and buff provided. 41 | * Supernormalization takes place by converting the existing data into a normal distribution 42 | * with 0 mean and 1 variance. Due to numerical errors, their values may be inexact. This is performed 43 | * by first converting data into their ranking, and assign new values according to the cummulative 44 | * distribution function of the respective fraction. After that, a normalization is perform to scale 45 | * the new data into 0 mean and 1 variance. 46 | * m: Matrix to be supernormalized. Overwrites data. 47 | * p1: Permutation objects for ranking conversion 48 | * Pinv: Inverse transformation from ranking to normal distribution 49 | * (precalculated CDF values of normal distribution of the respective ranking) 50 | */ 51 | void supernormalize_byrow_single_buffed(MATRIXF* m,gsl_permutation *p1,const FTYPE* restrict Pinv); 52 | 53 | /* Supernormalize matrix per row with single thread and buff provided. 54 | * See supernormalize_byrow_single_buffed for detail. 55 | * m: Matrix to be supernormalized. Overwrites data. 56 | * Pinv: Inverse transformation from ranking to normal distribution 57 | * (precalculated CDF values of normal distribution of the respective ranking) 58 | * Return: 0 on success. 59 | */ 60 | int supernormalize_byrow_single(MATRIXF* m,const FTYPE* restrict Pinv); 61 | 62 | /* Obtain Inverse CDF for normal distribution of n fractiles. 63 | * n: n 64 | * Pinv: Inverse CDF of normal distribution. Return[i]=CDF^(-1)((i+1)/(n+1)). 65 | */ 66 | static inline void supernormalize_Pinv(size_t n,FTYPE*restrict Pinv); 67 | 68 | 69 | /* Supernormalizes and overwrites each row of matrix m. 70 | * Supernormalize into 0 mean and 1 variance, and fulfills normal distribution 71 | * Therefore numbers are assigned purely according to the rankings. 72 | * Uses multiple threads 73 | * Ties are ordered sequentially by GSL (potential increased correlation between rows) 74 | * With or without buffer included: 75 | * m: (n1,n2) Matrix to be supernormalized 76 | * p: (nth) permutation buffer 77 | * Pinv:Buffer to calculate and place inverse CDF 78 | * nth: Number of threads. 79 | * Return: 0 if success. 80 | */ 81 | void supernormalize_byrow_buffed(MATRIXF* m,gsl_permutation * const *p,FTYPE* Pinv); 82 | int supernormalize_byrow(MATRIXF* m); 83 | 84 | /********************************************************************** 85 | * Fluctuations after deterministic supernormalization 86 | **********************************************************************/ 87 | 88 | /* Same with supernormalize_byrow_single, 89 | * supernormalize_byrow_buffed, and supernormalize_byrow, 90 | * but with an extra parameter fluc: 91 | * After supernormalization, every element x is fluctuated randomly, 92 | * being replaced by x*(1+y*fluc), where y is uniformly distributed in [-1,1). 93 | * The new matrix is then normalized to 0 mean and unit variance. 94 | * Return: 0 if success. 95 | */ 96 | int supernormalizef_byrow_single(MATRIXF* m,const FTYPE* restrict Pinv,FTYPE fluc); 97 | void supernormalizef_byrow_buffed(MATRIXF* m,gsl_permutation * const *p,FTYPE* Pinv,FTYPE fluc); 98 | int supernormalizef_byrow(MATRIXF* m,FTYPE fluc); 99 | 100 | /********************************************************************** 101 | * Auto fluctuations after deterministic supernormalization 102 | **********************************************************************/ 103 | 104 | /* Only fluctuates when m->size2<30, with fluc=2*m->size2^(-2). 105 | */ 106 | static inline int supernormalizea_byrow_single(MATRIXF* m,const FTYPE* restrict Pinv); 107 | static inline void supernormalizea_byrow_buffed(MATRIXF* m,gsl_permutation * const *p,FTYPE* Pinv); 108 | static inline int supernormalizea_byrow(MATRIXF* m); 109 | 110 | /********************************************************************** 111 | * Random supernormalization 112 | **********************************************************************/ 113 | 114 | //Check their supernormalize counterparts for definition. 115 | void supernormalizer_byrow_buffed(MATRIXF* m,MATRIXF* mb,gsl_permutation * const *p,gsl_rng * const* rng); 116 | 117 | int supernormalizer_byrow(MATRIXF* m); 118 | 119 | 120 | /********************************************************************** 121 | * Inline functions 122 | **********************************************************************/ 123 | 124 | static inline void supernormalize_Pinv(size_t n,FTYPE* restrict Pinv) 125 | { 126 | size_t i; 127 | 128 | for(i=0;isize2<30) 135 | return supernormalizef_byrow_single(m,Pinv,(FTYPE)(2./gsl_pow_2((FTYPE)m->size2))); 136 | else 137 | return supernormalize_byrow_single(m,Pinv); 138 | } 139 | 140 | static inline void supernormalizea_byrow_buffed(MATRIXF* m,gsl_permutation * const *p,FTYPE* Pinv) 141 | { 142 | if(m->size2<30) 143 | supernormalizef_byrow_buffed(m,p,Pinv,(FTYPE)(2./gsl_pow_2((FTYPE)m->size2))); 144 | else 145 | supernormalize_byrow_buffed(m,p,Pinv); 146 | } 147 | 148 | static inline int supernormalizea_byrow(MATRIXF* m) 149 | { 150 | if(m->size2<30) 151 | return supernormalizef_byrow(m,(FTYPE)(2./gsl_pow_2((FTYPE)m->size2))); 152 | else 153 | return supernormalize_byrow(m); 154 | } 155 | 156 | #ifdef __cplusplus 157 | } 158 | #endif 159 | #endif 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | -------------------------------------------------------------------------------- /base/threading.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | // This file contains the functions for multi-threading 19 | #ifndef _HEADER_LIB_THREADING_H_ 20 | #define _HEADER_LIB_THREADING_H_ 21 | #include "config.h" 22 | #include 23 | #include 24 | 25 | #ifdef __cplusplus 26 | extern "C" 27 | { 28 | #endif 29 | 30 | 31 | /* Calculate the split position of the big problem into smaller ones. 32 | * ntotal: Total size of the problem 33 | * nthread: Total number of threads 34 | * x: ID of current thread 35 | * Return: The start position of problem id by thread x 36 | */ 37 | static inline size_t threading_get_start_bare(size_t ntotal,size_t nthread,size_t x); 38 | 39 | /* Calculate the start and end position of the big problem for any thread. 40 | * ntotal: Total size of the problem 41 | * start, 42 | * end: Return location of start and end positions for any thread 43 | * id: ID of current thread 44 | * ida: Total number of threads 45 | */ 46 | static inline void threading_get_startend_from(size_t ntotal,size_t *start,size_t *end,size_t id,size_t ida); 47 | 48 | /* Calculate the start and end position of the big problem for current thread (with openMP) 49 | * ntotal: Total size of the problem 50 | * start, 51 | * end: Return location of start and end positions for current thread 52 | */ 53 | static inline void threading_get_startend(size_t ntotal,size_t *start,size_t *end); 54 | 55 | 56 | static inline size_t threading_get_start_bare(size_t ntotal,size_t nthread,size_t x) 57 | { 58 | size_t i,j; 59 | i=ntotal/nthread; 60 | j=ntotal-i*nthread; 61 | if(j>x) 62 | j=x; 63 | j+=i*x; 64 | if(j>ntotal) 65 | j=ntotal; 66 | return j; 67 | } 68 | 69 | static inline void threading_get_startend_from(size_t ntotal,size_t *start,size_t *end,size_t id,size_t ida) 70 | { 71 | *start=threading_get_start_bare(ntotal,ida,id); 72 | *end=threading_get_start_bare(ntotal,ida,id+1); 73 | } 74 | 75 | static inline void threading_get_startend(size_t ntotal,size_t *start,size_t *end) 76 | { 77 | size_t id=(size_t)omp_get_thread_num(); 78 | size_t ida=(size_t)omp_get_num_threads(); 79 | threading_get_startend_from(ntotal,start,end,id,ida); 80 | } 81 | 82 | #ifdef __cplusplus 83 | } 84 | #endif 85 | 86 | #endif 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /base/types.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | // This file contains the numerical type definitions, especially vectors and matrices 19 | 20 | #ifndef _HEADER_LIB_TYPES_H_ 21 | #define _HEADER_LIB_TYPES_H_ 22 | #include "config.h" 23 | #include 24 | #include "gsl/vector.h" 25 | #include "gsl/matrix.h" 26 | #include "gsl/blas.h" 27 | 28 | 29 | #if FTYPEBITS == 32 30 | // Type definition 31 | #define FTYPE float 32 | // Type suffix definition, for gsl vector and matrix functions 33 | #define FTYPE_SUF _float 34 | // BLAS function macro 35 | #define BLASF(X) BLASFO(X) 36 | // Minimal value 37 | #define FTYPE_MIN FLT_MIN 38 | #define FTYPE_MAX FLT_MAX 39 | #elif FTYPEBITS == 64 40 | #define FTYPE double 41 | #define FTYPE_SUF 42 | #define BLASF(X) BLASFD(X) 43 | #define FTYPE_MIN DBL_MIN 44 | #define FTYPE_MAX DBL_MAX 45 | #else 46 | #error Unknown float type bit count. 47 | #endif 48 | #if GTYPEBITS == 8 49 | #define GTYPE unsigned char 50 | #define GTYPE_SUF _uchar 51 | #else 52 | #error Unknown genotype type bit count. 53 | #endif 54 | #define BLASFO(X) gsl_blas_s ## X 55 | #define BLASFD(X) gsl_blas_d ## X 56 | 57 | #define CONCATENATE2_(X,Y) X ## Y 58 | #define CONCATENATE2(X,Y) CONCATENATE2_(X,Y) 59 | #define CONCATENATE3_(X,Y,Z) X ## Y ## Z 60 | #define CONCATENATE3(X,Y,Z) CONCATENATE3_(X,Y,Z) 61 | #define CONCATENATE4_(X,Y,Z,W) X ## Y ## Z ## W 62 | #define CONCATENATE4(X,Y,Z,W) CONCATENATE4_(X,Y,Z,W) 63 | 64 | // vector type macro 65 | #define VECTORO gsl_vector_float 66 | #define VECTORD gsl_vector 67 | #define VECTORC gsl_vector_char 68 | #define VECTORUC gsl_vector_uchar 69 | #define VECTORI gsl_vector_int 70 | #define VECTORL gsl_vector_long 71 | #define VECTORUL gsl_vector_ulong 72 | #define VECTORF CONCATENATE2(gsl_vector,FTYPE_SUF) 73 | #define VECTORG CONCATENATE2(gsl_vector,GTYPE_SUF) 74 | // vector function type macro 75 | #define VECTOROF(X) gsl_vector_float_ ## X 76 | #define VECTORDF(X) gsl_vector_ ## X 77 | #define VECTORCF(X) gsl_vector_char_ ## X 78 | #define VECTORUCF(X) gsl_vector_uchar_ ## X 79 | #define VECTORIF(X) gsl_vector_int_ ## X 80 | #define VECTORLF(X) gsl_vector_long_ ## X 81 | #define VECTORULF(X) gsl_vector_ulong_ ## X 82 | #define VECTORFF(X) CONCATENATE2(VECTORF,_ ## X) 83 | #define VECTORGF(X) CONCATENATE2(VECTORG,_ ## X) 84 | // matrix type macro 85 | #define MATRIXO gsl_matrix_float 86 | #define MATRIXD gsl_matrix 87 | #define MATRIXC gsl_matrix_char 88 | #define MATRIXUC gsl_matrix_uchar 89 | #define MATRIXI gsl_matrix_int 90 | #define MATRIXL gsl_matrix_long 91 | #define MATRIXUL gsl_matrix_ulong 92 | #define MATRIXF CONCATENATE2(gsl_matrix,FTYPE_SUF) 93 | #define MATRIXG CONCATENATE2(gsl_matrix,GTYPE_SUF) 94 | // matrix function type macro 95 | #define MATRIXOF(X) gsl_matrix_float_ ## X 96 | #define MATRIXDF(X) gsl_matrix_ ## X 97 | #define MATRIXCF(X) gsl_matrix_char_ ## X 98 | #define MATRIXUCF(X) gsl_matrix_uchar_ ## X 99 | #define MATRIXIF(X) gsl_matrix_int_ ## X 100 | #define MATRIXLF(X) gsl_matrix_long_ ## X 101 | #define MATRIXULF(X) CONCATENATE2(gsl_matrix_ulong_ ## X 102 | #define MATRIXFF(X) CONCATENATE2(MATRIXF,_ ## X) 103 | #define MATRIXGF(X) CONCATENATE2(MATRIXG,_ ## X) 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | #endif 157 | -------------------------------------------------------------------------------- /cycle/cycle.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | /* This lib contains the general definitions of cycle detection routines. 19 | */ 20 | 21 | #ifndef _HEADER_LIB_CYCLE_H_ 22 | #define _HEADER_LIB_CYCLE_H_ 23 | #include "../base/config.h" 24 | #include 25 | #include "vg.h" 26 | 27 | #ifdef __cplusplus 28 | extern "C" 29 | { 30 | #endif 31 | 32 | #define CYCLEF(X) cycle_vg_ ## X 33 | 34 | #ifdef __cplusplus 35 | } 36 | #endif 37 | #endif 38 | -------------------------------------------------------------------------------- /cycle/vg.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #include "../base/config.h" 19 | #include 20 | #include 21 | #include "../base/general_alg.h" 22 | #include "../base/macros.h" 23 | #include "vg.h" 24 | 25 | int cycle_vg_init(struct cycle_vg_system* restrict vg,size_t dim,size_t amax) 26 | { 27 | int ret; 28 | assert(vg); 29 | ret=0; 30 | vg->n=dim; 31 | vg->nam=amax; 32 | vg->nim=vg->nom=(size_t)-1; 33 | 34 | MALLOCSIZE(vg->lvf,dim); 35 | MALLOCSIZE(vg->lvb,dim); 36 | MALLOCSIZE(vg->go,dim); 37 | MALLOCSIZE(vg->goi,dim); 38 | ret=ret||data_ll_init(&vg->gao,amax)||data_ll_init(&vg->gai,amax); 39 | MALLOCSIZE(vg->gaof,dim); 40 | MALLOCSIZE(vg->gaif,dim); 41 | MALLOCSIZE(vg->gni,dim); 42 | MALLOCSIZE(vg->gno,dim); 43 | MALLOCSIZE(vg->lao,dim); 44 | MALLOCSIZE(vg->lai,dim); 45 | MALLOCSIZE(vg->buff,dim); 46 | MALLOCSIZE(vg->buff2,dim); 47 | ret=ret||data_heap_init(&vg->lvfl,dim)||data_heapdec_init(&vg->lvbl,dim); 48 | if(ret||!(vg->lvf&&vg->lvb&&vg->go&&vg->goi&&vg->gaof&&vg->gaif&&vg->gni&&vg->gno&&vg->lao&&vg->lai&&vg->buff&&vg->buff2)) 49 | { 50 | cycle_vg_free(vg); 51 | LOG(1,"Not enough memory.") 52 | return 1; 53 | } 54 | //Initialize for empty graph. 55 | if(cycle_vg_empty(vg)) 56 | { 57 | cycle_vg_free(vg); 58 | return 1; 59 | } 60 | return 0; 61 | } 62 | 63 | struct cycle_vg_system* cycle_vg_new(size_t dim,size_t amax) 64 | { 65 | struct cycle_vg_system* vg; 66 | MALLOCSIZE(vg,1); 67 | if(!vg) 68 | { 69 | LOG(1,"Not enough memory.") 70 | return 0; 71 | } 72 | if(cycle_vg_init(vg,dim,amax)) 73 | { 74 | free(vg); 75 | return 0; 76 | } 77 | return vg; 78 | } 79 | 80 | int cycle_vg_free(struct cycle_vg_system* restrict vg) 81 | { 82 | #define FREEMEM(X) if(X){free(X);X=0;} 83 | FREEMEM(vg->lvf) 84 | FREEMEM(vg->lvb) 85 | FREEMEM(vg->go) 86 | FREEMEM(vg->goi) 87 | FREEMEM(vg->gaof) 88 | FREEMEM(vg->gaif) 89 | FREEMEM(vg->gni) 90 | FREEMEM(vg->gno) 91 | FREEMEM(vg->lao) 92 | FREEMEM(vg->lai) 93 | FREEMEM(vg->buff) 94 | FREEMEM(vg->buff2) 95 | data_ll_free(&vg->gao); 96 | data_ll_free(&vg->gai); 97 | data_heap_free(&vg->lvbl); 98 | data_heapdec_free(&vg->lvfl); 99 | return 0; 100 | #undef FREEMEM 101 | } 102 | 103 | int cycle_vg_empty(struct cycle_vg_system* restrict vg) 104 | { 105 | size_t i; 106 | vg->na=0; 107 | memset(vg->gaof,-1,vg->n*sizeof(*vg->gaof)); 108 | memset(vg->gaif,-1,vg->n*sizeof(*vg->gaif)); 109 | memset(vg->gni,0,vg->n*sizeof(*vg->gni)); 110 | memset(vg->gno,0,vg->n*sizeof(*vg->gno)); 111 | data_ll_empty(&vg->gao); 112 | data_ll_empty(&vg->gai); 113 | for(i=0;in;i++) 114 | { 115 | vg->go[i]=i; 116 | vg->goi[i]=i; 117 | } 118 | return 0; 119 | } 120 | 121 | void cycle_vg_restore_order(struct cycle_vg_system* restrict vg,size_t vv) 122 | { 123 | size_t t; 124 | char cond; 125 | size_t* p[2]; 126 | size_t* ps; 127 | 128 | t=vg->go[vv]; 129 | cond=!!vg->lvfl.n; 130 | if(cond) 131 | { 132 | size_t t1; 133 | t1=data_heap_top(&vg->lvfl); 134 | if(t1buff; 142 | p[1]=vg->buff2; 143 | general_alg_categorize_embed(vg->goi,vg->lvf,p,t); 144 | *(p[0]++)=vg->goi[t]; 145 | memcpy(p[0],vg->buff2,(size_t)(p[1]-vg->buff2)*sizeof(*p[0])); 146 | memcpy(vg->buff+t+1,vg->goi+t+1,(vg->n-t-1)*sizeof(*vg->buff)); 147 | 148 | ps=vg->buff; 149 | vg->buff=vg->goi; 150 | vg->goi=ps; 151 | cycle_vg_fix_go(vg); 152 | return; 153 | } 154 | 155 | p[0]=vg->buff; 156 | p[1]=vg->buff2; 157 | general_alg_categorize_embed(vg->goi,vg->lvf,p,t); 158 | *(p[1]++)=vg->goi[t]; 159 | ps=p[0]; 160 | p[0]=p[1]; 161 | p[1]=ps; 162 | general_alg_categorize_embed(vg->goi+t+1,vg->lvb,p,vg->n-t-1); 163 | memcpy(p[1],vg->buff2,(size_t)(p[0]-vg->buff2)*sizeof(*p[1])); 164 | 165 | ps=vg->buff; 166 | vg->buff=vg->goi; 167 | vg->goi=ps; 168 | cycle_vg_fix_go(vg); 169 | return; 170 | } 171 | 172 | int cycle_vg_add(struct cycle_vg_system* restrict vg,size_t v1,size_t v2) 173 | { 174 | 175 | //Validity check 176 | assert(v1!=v2); 177 | if(vg->na>=vg->nam) 178 | return 1; 179 | if(vg->go[v1]go[v2]) 180 | return cycle_vg_add_arc(vg,v1,v2); 181 | 182 | //Initialize 183 | data_heap_empty(&vg->lvfl); 184 | data_heapdec_empty(&vg->lvbl); 185 | memset(vg->lvf,0,vg->n*sizeof(*vg->lvf)); 186 | memset(vg->lvb,0,vg->n*sizeof(*vg->lvb)); 187 | memset(vg->lao,-1,vg->n*sizeof(*vg->lao)); 188 | memset(vg->lai,-1,vg->n*sizeof(*vg->lai)); 189 | 190 | //Test loop 191 | //Enter function, line 1 192 | vg->lvf[v2]=1; 193 | vg->lvb[v1]=1; 194 | vg->lao[v2]=vg->gaof[v2]; 195 | vg->lai[v1]=vg->gaif[v1]; 196 | //line 2 197 | if(vg->gaof[v2]!=(size_t)-1) 198 | data_heap_push(&vg->lvfl,vg->go[v2]); 199 | //line 3 200 | if(vg->gaif[v1]!=(size_t)-1) 201 | data_heapdec_push(&vg->lvbl,vg->go[v1]); 202 | //line 4&5 (while) 203 | while((vg->lvfl.n>0)&&(vg->lvbl.n>0)) 204 | { 205 | size_t vu,vx,vy,vz; 206 | 207 | vu=data_heap_top(&vg->lvfl); 208 | vz=data_heapdec_top(&vg->lvbl); 209 | if(vu>=vz) 210 | break; 211 | vu=vg->goi[vu]; 212 | vz=vg->goi[vz]; 213 | //Enter macro, line 1 214 | vx=data_ll_val(&vg->gao,vg->lao[vu]); 215 | vy=data_ll_val(&vg->gai,vg->lai[vz]); 216 | //line 2 217 | vg->lao[vu]=data_ll_child(&vg->gao,vg->lao[vu]); 218 | vg->lai[vz]=data_ll_child(&vg->gai,vg->lai[vz]); 219 | //line 3 220 | if(vg->lao[vu]==(size_t)-1) 221 | data_heap_pop(&vg->lvfl); 222 | if(vg->lai[vz]==(size_t)-1) 223 | data_heapdec_pop(&vg->lvbl); 224 | //line 4, first half 225 | if(vg->lvb[vx]) 226 | return 1; 227 | //line 5-8 (if) 228 | if(!vg->lvf[vx]) 229 | { 230 | //line 6,7 231 | vg->lvf[vx]=1; 232 | if(vg->gaof[vx]!=(size_t)-1) 233 | { 234 | vg->lao[vx]=vg->gaof[vx]; 235 | data_heap_push(&vg->lvfl,vg->go[vx]); 236 | } 237 | } 238 | //line 4, second half 239 | if(vg->lvf[vy]) 240 | return 1; 241 | //line 9-12 (if) 242 | if(!vg->lvb[vy]) 243 | { 244 | //line 10,11 245 | vg->lvb[vy]=1; 246 | if(vg->gaif[vy]!=(size_t)-1) 247 | { 248 | vg->lai[vy]=vg->gaif[vy]; 249 | data_heapdec_push(&vg->lvbl,vg->go[vy]); 250 | } 251 | } 252 | } 253 | 254 | //Add arc 255 | if(cycle_vg_add_arc(vg,v1,v2)) 256 | return 1; 257 | 258 | //Recover ordering 259 | cycle_vg_restore_order(vg,v1); 260 | return 0; 261 | } 262 | 263 | void cycle_vg_extract_graph(const struct cycle_vg_system* restrict vg,MATRIXUC* g) 264 | { 265 | size_t i; 266 | size_t t1; 267 | 268 | assert((vg->n==g->size1)&&(vg->n==g->size2)); 269 | MATRIXUCF(set_zero)(g); 270 | for(i=0;in;i++) 271 | { 272 | t1=vg->gaof[i]; 273 | while(t1!=(size_t)-1) 274 | { 275 | MATRIXUCF(set)(g,i,data_ll_val(&vg->gao,t1),1); 276 | t1=data_ll_child(&vg->gao,t1); 277 | } 278 | } 279 | } 280 | -------------------------------------------------------------------------------- /cycle/vg.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | /* This lib contains the definitions of Vertex Guided Search and topological order maintenance. 19 | */ 20 | 21 | #ifndef _HEADER_LIB_CYCLE_VG_H_ 22 | #define _HEADER_LIB_CYCLE_VG_H_ 23 | #include "../base/config.h" 24 | #include 25 | #include "../base/data_struct.h" 26 | #include "../base/logger.h" 27 | #include "../base/types.h" 28 | // #include "cycle_general.h" 29 | #ifdef __cplusplus 30 | extern "C" 31 | { 32 | #endif 33 | 34 | 35 | struct cycle_vg_system 36 | { 37 | //Constant parameters: 38 | //Number of vertices of the system 39 | size_t n; 40 | //Maximum number of arcs of the system 41 | size_t nam; 42 | 43 | //Constant Parameters requiring manual enabling after initialization: 44 | //Maximum number of incoming arcs for each vertex 45 | size_t nim; 46 | //Maximum number of outgoing arcs for each vertex 47 | size_t nom; 48 | 49 | //Graph construction variables: 50 | //Current number of arcs 51 | size_t na; 52 | //Order of vertices 53 | size_t* restrict go; 54 | //Inverse of go 55 | size_t* restrict goi; 56 | /* Graph representation for arcs out with linked list. 57 | * Each value j in linked list i corresponds to arc (i,j). 58 | * First item of each linked list i is specified in gaof. 59 | */ 60 | struct data_ll gao; 61 | //First item of each linked list i of gao, or -1 of not exist. 62 | size_t* restrict gaof; 63 | //Below for arcs in 64 | struct data_ll gai; 65 | size_t* restrict gaif; 66 | //Number of incoming arcs for each vertex 67 | size_t* restrict gni; 68 | //Number of outgoing arcs for each vertex 69 | size_t* restrict gno; 70 | 71 | 72 | 73 | //Loop detection temporary variables: 74 | //Vertices visitedness forward/backward, i.e. membership of F,B 75 | unsigned char* restrict lvf; 76 | unsigned char* restrict lvb; 77 | //Vertices to be visited forward/backward, i.e. membership of FL,BL 78 | struct data_heap lvfl; 79 | struct data_heapdec lvbl; 80 | /* Current arc id of those from/to a specific vertex. 81 | * (i,lao[i]) is the current out arc from i during the search, indexed by gao. 82 | * (lai[i],i) is the current in arc to i during the search, index by gai. 83 | */ 84 | size_t* restrict lao; 85 | size_t* restrict lai; 86 | //Buffer for calculation during loop detection and order maintenance. 87 | size_t* buff; 88 | size_t* buff2; 89 | }; 90 | 91 | /* Initialize cycle detection system with vertex count and max number of arc count 92 | * vg: Cycle detection system. 93 | * dim: Number of vertices. 94 | * amax: Max number of arcs. 95 | * Return: 0 on success. 96 | */ 97 | int cycle_vg_init(struct cycle_vg_system* restrict vg,size_t dim,size_t amax); 98 | struct cycle_vg_system* cycle_vg_new(size_t dim,size_t amax); 99 | int cycle_vg_free(struct cycle_vg_system* restrict vg); 100 | 101 | /* Re-initialize existing cycle detection system to the same size. 102 | * vg: Cycle detection system. 103 | * Return: 0 on success. 104 | */ 105 | int cycle_vg_empty(struct cycle_vg_system* restrict vg); 106 | 107 | /* Obtain the number of vertices of the system 108 | * vg: Cycle detection system. 109 | * Return: Number of vertices on success. 110 | */ 111 | static inline size_t cycle_vg_dim(const struct cycle_vg_system* restrict vg); 112 | 113 | /* Add arc v1->v2 to current graph in vg without loop checks. 114 | * vg: Cycle detection system. 115 | * v1: Source of arc 116 | * v2: Destination of arc 117 | * Return: 1 if arc full, or otherwise 0 for success. 118 | */ 119 | static inline int cycle_vg_add_arc(struct cycle_vg_system* restrict vg,size_t v1,size_t v2); 120 | 121 | // Fix vertex order array base on its inverse, and vice versa 122 | static inline void cycle_vg_fix_go(struct cycle_vg_system* restrict vg); 123 | static inline void cycle_vg_fix_goi(struct cycle_vg_system* restrict vg); 124 | 125 | /* Restore order of vertices after adding a backward arc. 126 | * vg: Cycle detection system. 127 | * vv: Source of newly added arc 128 | */ 129 | void cycle_vg_restore_order(struct cycle_vg_system* restrict vg,size_t vv); 130 | 131 | /* Try to add arc v1->v2 to current graph in vg. 132 | * vg: Cycle detection system. 133 | * v1: Source of arc 134 | * v2: Destination of arc 135 | * Return: 0 if success, or 1 if failed because of loop or full arc. 136 | */ 137 | int cycle_vg_add(struct cycle_vg_system* restrict vg,size_t v1,size_t v2); 138 | 139 | /* Extracts graph representation into matrix form. 140 | * vg: Cycle detection system. 141 | * g: (n,n) destination matrix. g[i,j]=1 if arc (i,j) exists, and 0 if not. 142 | */ 143 | void cycle_vg_extract_graph(const struct cycle_vg_system* restrict vg,MATRIXUC* g); 144 | 145 | 146 | 147 | 148 | 149 | static inline size_t cycle_vg_dim(const struct cycle_vg_system* restrict vg) 150 | { 151 | return vg->n; 152 | } 153 | 154 | static inline int cycle_vg_add_arc(struct cycle_vg_system* restrict vg,size_t v1,size_t v2) 155 | { 156 | size_t ret; 157 | 158 | if((vg->gno[v1]>=vg->nom)||(vg->gni[v2]>=vg->nim)) 159 | return 1; 160 | ret=data_ll_insert_before(&vg->gao,vg->gaof[v1],v2); 161 | if(ret==(size_t)-1) 162 | return 1; 163 | vg->gaof[v1]=ret; 164 | ret=data_ll_insert_before(&vg->gai,vg->gaif[v2],v1); 165 | vg->gaif[v2]=ret; 166 | vg->na++; 167 | vg->gno[v1]++; 168 | vg->gni[v2]++; 169 | return 0; 170 | } 171 | 172 | static inline void cycle_vg_fix_go(struct cycle_vg_system* restrict vg) 173 | { 174 | size_t i; 175 | for(i=0;in;i++) 176 | vg->go[vg->goi[i]]=i; 177 | } 178 | 179 | static inline void cycle_vg_fix_goi(struct cycle_vg_system* restrict vg) 180 | { 181 | size_t i; 182 | for(i=0;in;i++) 183 | vg->goi[vg->go[i]]=i; 184 | } 185 | 186 | 187 | #ifdef __cplusplus 188 | } 189 | #endif 190 | #endif 191 | -------------------------------------------------------------------------------- /doc.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lingfeiwang/findr/8aed971a7c0ade736eb764809f82728fb2fc72a8/doc.pdf -------------------------------------------------------------------------------- /netr/one.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #include 19 | #include 20 | #include "../base/gsl/math.h" 21 | #include "../base/gsl/permutation.h" 22 | #include "../base/gsl/sort.h" 23 | #include "../base/logger.h" 24 | #include "../base/macros.h" 25 | #include "../base/data_process.h" 26 | #include "../cycle/cycle.h" 27 | #include "one.h" 28 | 29 | 30 | size_t netr_one_greedy(const MATRIXF* p,MATRIXUC* net,size_t nam,size_t nimax,size_t nomax) 31 | { 32 | #define CLEANUP CLEANVECF(v)CYCLEF(free)(&cs);CLEANPERM(perm) 33 | #define TOID(N,V1,V2) V1=(N)/(n-1);V2=(N)%(n-1);if((V2)>=(V1))V2++; 34 | 35 | struct CYCLEF(system) cs; 36 | VECTORF* v=0; 37 | gsl_permutation* perm=0; 38 | int ret; 39 | size_t n,na,i,ntot; 40 | 41 | 42 | 43 | //Initialize 44 | n=p->size1; 45 | assert((n==p->size2)&&(n==net->size1)&&(n==net->size2)); 46 | assert(nimax&&nomax&&nam); 47 | ntot=n*(n-1); 48 | nam=GSL_MIN(ntot/2,nam); 49 | { 50 | size_t t1; 51 | t1=GSL_MIN(nimax,nomax); 52 | if(nam/n>=t1) 53 | nam=t1*n; 54 | } 55 | ret=CYCLEF(init)(&cs,n,nam); 56 | if(ret) 57 | ERRRETV(0,"Failed to initialize cycle detection.") 58 | cs.nim=nimax; 59 | cs.nom=nomax; 60 | v=VECTORFF(alloc)(ntot); 61 | perm=gsl_permutation_alloc(ntot); 62 | if(!(v&&perm)) 63 | ERRRETV(0,"Not enough memory.") 64 | 65 | //Obtain edge order 66 | MATRIXFF(flatten_nodiag)(p,v); 67 | ret=CONCATENATE3(gsl_sort_vector,FTYPE_SUF,_index)(perm,v); 68 | if(ret) 69 | ERRRETV(0,"Failed to sort vector.") 70 | CLEANVECF(v) 71 | 72 | //Add edges 73 | for(i=0,na=0;(i=(V1))V2++; 93 | 94 | struct CYCLEF(system) cs; 95 | VECTORF* v=0; 96 | gsl_permutation* perm=0; 97 | int ret; 98 | size_t n,na,i,ntot; 99 | int sign[2]={1,-1}; 100 | clock_t cstart,cnow; 101 | 102 | //Initialize 103 | n=p->size1; 104 | assert((n==p->size2)&&(n==net->size1)&&(n==net->size2)); 105 | assert(nimax&&nomax); 106 | ntot=n*(n-1); 107 | nam=GSL_MIN(ntot/2,nam); 108 | ret=CYCLEF(init)(&cs,n,nam); 109 | if(ret) 110 | ERRRETV(0,"Failed to initialize cycle detection.") 111 | cs.nim=nimax; 112 | cs.nom=nomax; 113 | v=VECTORFF(alloc)(ntot); 114 | perm=gsl_permutation_alloc(ntot); 115 | if(!(v&&perm)) 116 | ERRRETV(0,"Not enough memory.") 117 | 118 | //Obtain edge order 119 | MATRIXFF(flatten_nodiag)(p,v); 120 | ret=CONCATENATE3(gsl_sort_vector,FTYPE_SUF,_index)(perm,v); 121 | if(ret) 122 | ERRRETV(0,"Failed to sort vector.") 123 | CLEANVECF(v) 124 | 125 | //Add edges 126 | MATRIXLF(set_zero)(net); 127 | cstart=clock(); 128 | MATRIXDF(set_all)(time,(double)cstart); 129 | for(i=0,na=0;(i. 17 | */ 18 | /* This lib contains the implementation of network reconstruction 19 | * algorithms for a single network. 20 | */ 21 | 22 | #ifndef _HEADER_LIB_NETR_ONE_H_ 23 | #define _HEADER_LIB_NETR_ONE_H_ 24 | #include "../base/config.h" 25 | #include 26 | #include "../base/types.h" 27 | 28 | #ifdef __cplusplus 29 | extern "C" 30 | { 31 | #endif 32 | 33 | /* Construct a deterministic single best Direct Acyclic Graph from prior pij information, 34 | * and stop when the number of edges reaches threshold or no edge can be added. 35 | * This method sorts pij values and attempt to add edges from the most likely one, 36 | * therefore named 'greedy'. 37 | * p: (n,n) for pij matrix 38 | * net: (n,n) for constructed network. net[i,j]=1 if edge (i,j) exists, 0 if not. 39 | * nam: Maximum number of edges. Set to (size_t)-1 for unlimited. 40 | * nimax: Maximum number of incoming edges for each node. Set to (size_t)-1 for unlimited. 41 | * nomax: Maximum number of outgoing edges for each node. Set to (size_t)-1 for unlimited. 42 | * Return: Number of edges, or 0 if failed. 43 | */ 44 | size_t netr_one_greedy(const MATRIXF* p,MATRIXUC* net,size_t nam,size_t nimax,size_t nomax); 45 | 46 | /* Construct a deterministic single best Direct Acyclic Graph from prior pij information, 47 | * and stop when the number of edges reaches threshold or no edge can be added. 48 | * This method sorts pij values and attempt to add edges from the most likely one. 49 | * Additional information is obtained in the output network variable. 50 | * p: (n,n) for pij matrix 51 | * net: (n,n) for constructed network. net[i,j]=0 indiates the edge is never tried. 52 | net[i,j]!=0 indicates the edge has been tried. Its absolute values(=x) indicates 53 | the edge is tried at the x-th edge addition attempt. net[i,j]>0 indicates successful 54 | edge addition and <0 indicates failure. 55 | * time:(n,n) for CPU time passed from starting to add edges to finish trying 56 | * to add this edge in CPU seconds. 57 | * nam: Maximum number of edges. 58 | * nimax: Maximum number of incoming edges for each node. Set to (size_t)-1 for unlimited. 59 | * nomax: Maximum number of outgoing edges for each node. Set to (size_t)-1 for unlimited. 60 | * Return: Number of edges, or 0 if failed. 61 | */ 62 | size_t netr_one_greedy_info(const MATRIXF* p,MATRIXL* net,MATRIXD* time,size_t nam,size_t nimax,size_t nomax); 63 | 64 | #ifdef __cplusplus 65 | } 66 | #endif 67 | #endif 68 | -------------------------------------------------------------------------------- /pij/cassist/cassist.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #include "../../base/config.h" 19 | #include 20 | #include 21 | #include 22 | #include "../../base/logger.h" 23 | #include "../../base/macros.h" 24 | #include "../../base/const.h" 25 | #include "../../base/supernormalize.h" 26 | #include "../../base/threading.h" 27 | #include "../../base/data_process.h" 28 | #include "llr.h" 29 | #include "llrtopij.h" 30 | #include "llrtopv.h" 31 | #include "cassist.h" 32 | 33 | 34 | int pijs_cassist_pv(const MATRIXF* g,const MATRIXF* t,const MATRIXF* t2,VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,size_t memlimit) 35 | { 36 | #define CLEANUP CLEANMATF(gnew)CLEANMATF(tnew)CLEANMATF(tnew2) 37 | MATRIXF *gnew; //(ng,ns) Supernormalized transcript matrix 38 | MATRIXF *tnew,*tnew2; //(nt,ns) Supernormalized transcript matrix 39 | int ret; 40 | size_t ns=g->size2; 41 | #ifndef NDEBUG 42 | size_t nt; 43 | size_t ng=g->size1; 44 | 45 | nt=t2->size1; 46 | ns=g->size2; 47 | #endif 48 | 49 | gnew=tnew=tnew2=0; 50 | 51 | //Validation 52 | assert(!((t->size1!=ng)||(t->size2!=ns)||(t2->size2!=ns) 53 | ||(p1&&(p1->size!=ng)) 54 | ||(p2&&((p2->size1!=ng)||(p2->size2!=nt))) 55 | ||(p3&&((p3->size1!=ng)||(p3->size2!=nt))) 56 | ||(p4&&((p4->size1!=ng)||(p4->size2!=nt))) 57 | ||(p5&&((p5->size1!=ng)||(p5->size2!=nt))))); 58 | assert(memlimit); 59 | 60 | if(ns<4) 61 | ERRRET("Cannot compute p-values with fewer than 4 samples.") 62 | 63 | { 64 | size_t mem1; 65 | mem1=(4*t->size1*t->size2+2*t2->size1*t2->size2+p1->size+p2->size1*p2->size2*4)*sizeof(FTYPE); 66 | if(memlimit<=mem1) 67 | ERRRET("Memory limit lower than minimum memory needed. Try increasing your memory usage limit.") 68 | LOG(10,"Memory limit: %lu bytes.",memlimit) 69 | } 70 | 71 | gnew=MATRIXFF(alloc)(g->size1,g->size2); 72 | tnew=MATRIXFF(alloc)(t->size1,t->size2); 73 | tnew2=MATRIXFF(alloc)(t2->size1,t2->size2); 74 | if(!(gnew&&tnew&&tnew2)) 75 | ERRRET("Not enough memory.") 76 | 77 | //Step 1: Supernormalization 78 | LOG(9,"Supernormalizing...") 79 | MATRIXFF(memcpy)(gnew,g); 80 | ret=supernormalizea_byrow(gnew); 81 | MATRIXFF(memcpy)(tnew,t); 82 | ret=ret||supernormalizea_byrow(tnew); 83 | MATRIXFF(memcpy)(tnew2,t2); 84 | ret=ret||supernormalizea_byrow(tnew2); 85 | if(ret) 86 | ERRRET("Supernormalization failed.") 87 | 88 | //Step 2: Log likelihood ratios from nonpermuted data 89 | LOG(9,"Calculating real log likelihood ratios...") 90 | pij_cassist_llr(gnew,tnew,tnew2,p1,p2,p3,p4,p5); 91 | //Step 3: Convert log likelihood ratios to p-values 92 | LOG(9,"Converting log likelihood ratios into p-values...") 93 | pij_cassist_llrtopvs(p1,p2,p3,p4,p5,ns); 94 | //Cleanup 95 | CLEANUP 96 | return ret; 97 | #undef CLEANUP 98 | } 99 | 100 | int pijs_cassist(const MATRIXF* g,const MATRIXF* t,const MATRIXF* t2,VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,char nodiag,size_t memlimit) 101 | { 102 | #define CLEANUP CLEANMATF(gnew)CLEANMATF(tnew)CLEANMATF(tnew2) 103 | MATRIXF *gnew; //(ng,ns) Supernormalized transcript matrix 104 | MATRIXF *tnew,*tnew2; //(nt,ns) Supernormalized transcript matrix 105 | VECTORFF(view) vv; 106 | int ret; 107 | size_t ns=g->size2; 108 | #ifndef NDEBUG 109 | size_t nt; 110 | size_t ng=g->size1; 111 | 112 | nt=t2->size1; 113 | ns=g->size2; 114 | #endif 115 | 116 | gnew=tnew=tnew2=0; 117 | 118 | //Validation 119 | assert(!((t->size1!=ng)||(t->size2!=ns)||(t2->size2!=ns) 120 | ||(p1&&(p1->size!=ng)) 121 | ||(p2&&((p2->size1!=ng)||(p2->size2!=nt))) 122 | ||(p3&&((p3->size1!=ng)||(p3->size2!=nt))) 123 | ||(p4&&((p4->size1!=ng)||(p4->size2!=nt))) 124 | ||(p5&&((p5->size1!=ng)||(p5->size2!=nt))))); 125 | assert(memlimit); 126 | 127 | if(ns<4) 128 | ERRRET("Cannot compute probabilities with fewer than 4 samples.") 129 | //Defaults to 8GB memory usage 130 | { 131 | size_t mem1; 132 | mem1=(4*t->size1*t->size2+2*t2->size1*t2->size2+p1->size+p2->size1*p2->size2*4)*sizeof(FTYPE); 133 | if(memlimit<=mem1) 134 | ERRRET("Memory limit lower than minimum memory needed. Try increasing your memory usage limit.") 135 | LOG(10,"Memory limit: %lu bytes.",memlimit) 136 | } 137 | 138 | gnew=MATRIXFF(alloc)(g->size1,g->size2); 139 | tnew=MATRIXFF(alloc)(t->size1,t->size2); 140 | tnew2=MATRIXFF(alloc)(t2->size1,t2->size2); 141 | if(!(gnew&&tnew&&tnew2)) 142 | ERRRET("Not enough memory.") 143 | 144 | //Check for identical rows in input data 145 | { 146 | VECTORFF(view) vbuff1=MATRIXFF(column)(tnew,0); 147 | VECTORFF(view) vbuff2=MATRIXFF(row)(tnew2,0); 148 | MATRIXFF(cmprow)(t,t2,&vbuff1.vector,&vbuff2.vector,nodiag,1); 149 | } 150 | 151 | //Step 1: Supernormalization 152 | LOG(9,"Supernormalizing...") 153 | MATRIXFF(memcpy)(gnew,g); 154 | ret=supernormalizea_byrow(gnew); 155 | MATRIXFF(memcpy)(tnew,t); 156 | ret=ret||supernormalizea_byrow(tnew); 157 | MATRIXFF(memcpy)(tnew2,t2); 158 | ret=ret||supernormalizea_byrow(tnew2); 159 | if(ret) 160 | ERRRET("Supernormalization failed.") 161 | 162 | //Step 2: Log likelihood ratios from nonpermuted data 163 | LOG(9,"Calculating real log likelihood ratios...") 164 | pij_cassist_llr(gnew,tnew,tnew2,p1,p2,p3,p4,p5); 165 | //Step 3: Convert log likelihood ratios to probabilities 166 | if((ret=pij_cassist_llrtopijs(p1,p2,p3,p4,p5,ns,nodiag))) 167 | LOG(4,"Failed to convert all log likelihood ratios to probabilities.") 168 | if(nodiag) 169 | { 170 | vv=MATRIXFF(diagonal)(p2); 171 | VECTORFF(set_zero)(&vv.vector); 172 | vv=MATRIXFF(diagonal)(p3); 173 | VECTORFF(set_zero)(&vv.vector); 174 | vv=MATRIXFF(diagonal)(p4); 175 | VECTORFF(set_zero)(&vv.vector); 176 | vv=MATRIXFF(diagonal)(p5); 177 | VECTORFF(set_zero)(&vv.vector); 178 | } 179 | 180 | //Cleanup 181 | CLEANUP 182 | return ret; 183 | #undef CLEANUP 184 | } 185 | 186 | int pij_cassist(const MATRIXF* g,const MATRIXF* t,const MATRIXF* t2,MATRIXF* ans,char nodiag,size_t memlimit) 187 | { 188 | #define CLEANUP CLEANVECF(p1)CLEANMATF(p2)CLEANMATF(p3)CLEANMATF(p4) 189 | VECTORF *p1; 190 | MATRIXF *p2,*p3,*p4; 191 | size_t ng=g->size1; 192 | size_t nt=t2->size1; 193 | 194 | assert(g&&t&&t2&&ans&&pijs); 195 | assert((g->size2==t->size2)&&(g->size2==t2->size2)); 196 | assert((t->size1==ng)&&(ans->size1==ng)&&(ans->size2==nt)); 197 | p1=VECTORFF(alloc)(ng); 198 | p2=MATRIXFF(alloc)(ng,nt); 199 | p3=MATRIXFF(alloc)(ng,nt); 200 | p4=MATRIXFF(alloc)(ng,nt); 201 | if(!(p1&&p2&&p3&&p4)) 202 | ERRRET("Not enough memory.") 203 | if(pijs_cassist(g,t,t2,p1,p2,p3,p4,ans,nodiag,memlimit)) 204 | ERRRET("pij_cassist_pijs failed.") 205 | 206 | //Combine tests 207 | #pragma omp parallel 208 | { 209 | size_t ng1,ng2; 210 | MATRIXFF(view) mva,mv2,mv4; 211 | threading_get_startend(g->size1,&ng1,&ng2); 212 | if(ng1size2); 215 | mv2=MATRIXFF(submatrix)(p2,ng1,0,ng2-ng1,p2->size2); 216 | mv4=MATRIXFF(submatrix)(p4,ng1,0,ng2-ng1,p4->size2); 217 | MATRIXFF(mul_elements)(&mva.matrix,&mv2.matrix); 218 | MATRIXFF(add)(&mva.matrix,&mv4.matrix); 219 | MATRIXFF(scale)(&mva.matrix,0.5); 220 | } 221 | } 222 | //Cleanup 223 | CLEANUP 224 | return 0; 225 | #undef CLEANUP 226 | } 227 | 228 | int pij_cassist_trad(const MATRIXF* g,const MATRIXF* t,const MATRIXF* t2,MATRIXF* ans,char nodiag,size_t memlimit) 229 | { 230 | #define CLEANUP CLEANVECF(p1)CLEANMATF(p2)CLEANMATF(p4)CLEANMATF(p5) 231 | VECTORF *p1; 232 | MATRIXF *p2,*p4,*p5; 233 | size_t ng=g->size1; 234 | size_t nt=t2->size1; 235 | 236 | assert(g&&t&&t2&&ans); 237 | assert((g->size2==t->size2)&&(g->size2==t2->size2)); 238 | assert((t->size1==ng)&&(ans->size1==ng)&&(ans->size2==nt)); 239 | p1=VECTORFF(alloc)(ng); 240 | p2=MATRIXFF(alloc)(ng,nt); 241 | p4=MATRIXFF(alloc)(ng,nt); 242 | p5=MATRIXFF(alloc)(ng,nt); 243 | if(!(p1&&p2&&p5&&p4)) 244 | ERRRET("Not enough memory.") 245 | if(pijs_cassist(g,t,t2,p1,p2,ans,p4,p5,nodiag,memlimit)) 246 | ERRRET("pij_cassist_pijs failed.") 247 | 248 | //Combine tests 249 | MATRIXFF(mul_elements)(ans,p2); 250 | 251 | //Cleanup 252 | CLEANUP 253 | return 0; 254 | #undef CLEANUP 255 | } 256 | -------------------------------------------------------------------------------- /pij/cassist/cassist.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | /* This part contains the main interface function of genotype assisted pij inference. 19 | */ 20 | 21 | #ifndef _HEADER_LIB_PIJ_CASSIST_H_ 22 | #define _HEADER_LIB_PIJ_CASSIST_H_ 23 | #include "../../base/config.h" 24 | #include "../../base/types.h" 25 | #ifdef __cplusplus 26 | extern "C" 27 | { 28 | #endif 29 | 30 | /* Estimates the p-value of A B against A->B from genotype and expression data with 5 tests. 31 | * E is always the best eQTL of A. Full data is required. 32 | * g: (ng,ns) Genotype data for E, =0,1,...,nv-1. Each is the best eQTL of the corresponding gene in t. 33 | * t: (ng,ns) Expression data of A. 34 | * t2: (nt,ns) Expression data of B. Can be A or a superset of A. 35 | * p1: (ng) P-values of step 1. Tests E->A v.s. E A. 36 | * p2: (ng,nt) P-values of step 2. Tests E->B v.s. E B. 37 | * p3: (ng,nt) P-values of step 3. Tests E->A->B v.s. E->A->B with E->B. 38 | * p4: (ng,nt) P-values of step 4. Tests E->A->B with E->B v.s. E->A B. 39 | * p5: (ng,nt) P-values of step 5. Tests E->A->B with E->B v.s. A<-E->B. 40 | * memlimit: The function is able to split very large datasets (ng and nt) into smaller chunks for inference. This variable specifies the approximate memory usage limit. Note: For large datasets, a too small memory limit can fail the function. For unlimited memory, set memlimit=-1. 41 | * Return: 0 on sucess 42 | * Appendix: 43 | * ng: Number of genes with best eQTL. 44 | * nt: Number of genes with expression data for B 45 | * ns: Number of samples. 46 | */ 47 | int pijs_cassist_pv(const MATRIXF* g,const MATRIXF* t,const MATRIXF* t2,VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,size_t memlimit); 48 | 49 | /* Estimates the probability of A->B from genotype and expression data with 5 tests. 50 | * E is always the best eQTL of A. Full data is required. 51 | * g: (ng,ns) Genotype data for E, =0,1,...,nv-1. Each is the best eQTL of the corresponding gene in t. 52 | * t: (ng,ns) Expression data of A. 53 | * t2: (nt,ns) Expression data of B. Can be A or a superset of A. 54 | * p1: (ng) Probabilities of step 1. Tests E->A v.s. E A. For nodiag=0, because the function expects significant eQTLs, p1 always return 1. For nodiag=1, uses diagonal elements of p2. Consider replacing p1 with your own (1-FDR) from eQTL discovery. 55 | * p2: (ng,nt) Probabilities of step 2. Tests E->B v.s. E B. 56 | * p3: (ng,nt) Probabilities of step 3. Tests E->A->B v.s. E->A->B with E->B. 57 | * p4: (ng,nt) Probabilities of step 4. Tests E->A->B with E->B v.s. E->A B. 58 | * p5: (ng,nt) Probabilities of step 5. Tests E->A->B with E->B v.s. A<-E->B. 59 | * nv: Number of possible values each genotype entry may take, =number of alleles+1. 60 | * nodiag: When the top ng rows of t2 is exactly t, diagonals of p2 and p3 are meaningless. In this case, set nodiag to 1 to avoid inclusion of NANs. For nodiag=0, t and t2 should not have any identical genes. 61 | * memlimit: The function is able to split very large datasets (ng and nt) into smaller chunks for inference. This variable specifies the approximate memory usage limit. Note: For large datasets, a too small memory limit can fail the function. For unlimited memory, set memlimit=-1. 62 | * Return: 0 on sucess 63 | * Appendix: 64 | * ng: Number of genes with best eQTL. 65 | * nt: Number of genes with expression data for B 66 | * ns: Number of samples. 67 | */ 68 | int pijs_cassist(const MATRIXF* g,const MATRIXF* t,const MATRIXF* t2,VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,char nodiag,size_t memlimit); 69 | 70 | /* Estimates the probability of A->B from genotype and expression data with defaults combination of tests. Uses results from pijs_gassist_tot or pijs_gassist_a. Variables have the same definitions except: 71 | * ans: (ng,nt) Predicted probability of A->B based on default combination of 5 tests. The default combination is (p2*p5+p4)/2. Note: this combination does not include p1. 72 | * Return: 0 on sucess 73 | */ 74 | int pij_cassist(const MATRIXF* g,const MATRIXF* t,const MATRIXF* t2,MATRIXF* ans,char nodiag,size_t memlimit); 75 | 76 | /* Estimates the probability of A->B from genotype and expression data with traditional causal inference method. 77 | * NOTE: This is not and is not intended as a loyal reimplementation of the Trigger R package. Instead, it aims at reusing methods and tests of Findr to produce inferences that mimicks the three tests performed by Trigger. Many implementational details are different between this function and Trigger, althrough a significant (but not full) overlap has been observed in existing studies. This method does not include p1. 78 | * Inputs and ouputs are the same as function pij_gassist_a. 79 | */ 80 | int pij_cassist_trad(const MATRIXF* g,const MATRIXF* t,const MATRIXF* t2,MATRIXF* ans,char nodiag,size_t memlimit); 81 | 82 | #ifdef __cplusplus 83 | } 84 | #endif 85 | #endif 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | -------------------------------------------------------------------------------- /pij/cassist/llr.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #include "../../base/config.h" 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include "../../base/gsl/blas.h" 24 | #include "../../base/random.h" 25 | #include "../../base/const.h" 26 | #include "../../base/logger.h" 27 | #include "../../base/macros.h" 28 | #include "../../base/data_process.h" 29 | #include "../../base/threading.h" 30 | #include "llr.h" 31 | 32 | 33 | /* Calculates the 5 log likelihood ratios of Trigger with nonpermuted data in block form: 34 | * 1. E->A v.s. E no relation with A 35 | * 2. A<-E->B with A--B v.s. E->A<-B 36 | * 3. E->A->B v.s. A<-E->B with A--B 37 | * 4. A<-E->B with A->B v.s. E->A 38 | * 5. A<-E->B with A->B v.s. A<-E->B 39 | * Uses GSL BLAS. 40 | * Note: for each row, g must be the best eQTL of t of the same row. 41 | */ 42 | static void pij_cassist_llr_block(const MATRIXF* g,const MATRIXF* t,const MATRIXF* t2,VECTORF* llr1,MATRIXF* llr2,MATRIXF* llr3,MATRIXF* llr4,MATRIXF* llr5) 43 | { 44 | size_t i,j; 45 | VECTORFF(view) vv; 46 | size_t ng=g->size1; 47 | size_t nt=t2->size1; 48 | 49 | assert(ng&&(t->size1==ng)&&(llr1->size==ng)&&(llr2->size1==ng)&&(llr3->size1==ng) 50 | &&(llr4->size1==ng)&&(llr5->size1==ng)); 51 | assert(nt&&(llr2->size2==nt)&&(llr3->size2==nt)&&(llr4->size2==nt)&&(llr5->size2==nt)); 52 | assert(g->size2&&(t->size2==g->size2)&&(t2->size2==g->size2)); 53 | 54 | //llr1=rho_EA 55 | MATRIXFF(cov2_1v1_bounded)(g,t,llr1); 56 | //llr2=rho_EB 57 | MATRIXFF(cov2_bounded)(g,t2,llr2); 58 | //llr5=rho_AB 59 | MATRIXFF(cov2_bounded)(t,t2,llr5); 60 | 61 | //llr4=rho_EA*rho_EB 62 | MATRIXFF(memcpy)(llr4,llr2); 63 | for(i=0;isize1; 141 | nt=t2->size1; 142 | ns=t->size2; 143 | #endif 144 | 145 | //Validation 146 | assert(!((g->size2!=ns)||(t2->size2!=ns)||(t->size1!=ng)||(llr2->size1!=ng)||(llr2->size2!=nt)||(llr3->size1!=ng)||(llr3->size2!=nt)||(llr4->size1!=ng)||(llr4->size2!=nt)||(llr5->size1!=ng)||(llr5->size2!=nt))); 147 | assert(!(llr1->size!=ng)); 148 | 149 | #pragma omp parallel 150 | { 151 | size_t n1,n2; 152 | threading_get_startend(t->size1,&n1,&n2); 153 | if(n2>n1) 154 | { 155 | MATRIXFF(const_view) mvg=MATRIXFF(const_submatrix)(g,n1,0,n2-n1,g->size2); 156 | MATRIXFF(const_view) mvt=MATRIXFF(const_submatrix)(t,n1,0,n2-n1,t->size2); 157 | VECTORFF(view) vvllr1; 158 | MATRIXFF(view) mvllr2,mvllr3,mvllr4,mvllr5; 159 | vvllr1=VECTORFF(subvector)(llr1,n1,n2-n1); 160 | mvllr2=MATRIXFF(submatrix)(llr2,n1,0,n2-n1,llr2->size2); 161 | mvllr3=MATRIXFF(submatrix)(llr3,n1,0,n2-n1,llr3->size2); 162 | mvllr4=MATRIXFF(submatrix)(llr4,n1,0,n2-n1,llr4->size2); 163 | mvllr5=MATRIXFF(submatrix)(llr5,n1,0,n2-n1,llr5->size2); 164 | pij_cassist_llr_block(&mvg.matrix,&mvt.matrix,t2,&vvllr1.vector,&mvllr2.matrix,&mvllr3.matrix,&mvllr4.matrix,&mvllr5.matrix); 165 | } 166 | } 167 | } 168 | 169 | -------------------------------------------------------------------------------- /pij/cassist/llr.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | /* This part contains the log likelihood ratio calculations. 19 | */ 20 | 21 | #ifndef _HEADER_LIB_PIJ_CASSIST_LLR_H_ 22 | #define _HEADER_LIB_PIJ_CASSIST_LLR_H_ 23 | #include "../../base/config.h" 24 | #include "../../base/types.h" 25 | #ifdef __cplusplus 26 | extern "C" 27 | { 28 | #endif 29 | 30 | /* Multithread calculation of log likelihood ratios for 5 tests. 31 | * g: MATRIXF (ng,ns) Full genotype data matrix 32 | * t: MATRIXF (ng,ns) Supernormalized transcript data matrix of A 33 | * t2: MATRIXF (nt,ns) Supernormalized transcript data matrix of B 34 | * llr1: VECTORF (ng). Log likelihood ratios for test 1. Tests E->A v.s. E A. 35 | * llr2: MATRIXF (ng,nt). Log likelihood ratios for test 2. Tests E->B v.s. E B. 36 | * llr3: MATRIXF (ng,nt). Log likelihood ratios for test 3. Tests E->A->B v.s. E->A->B with E->B. 37 | * llr4: MATRIXF (ng,nt). Log likelihood ratios for test 4. Tests E->A->B with E->B v.s. E->A B. 38 | * llr5: MATRIXF (ng,nt). Log likelihood ratios for test 5. Tests E->A->B with E->B v.s. A<-E->B. 39 | */ 40 | void pij_cassist_llr(const MATRIXF* g,const MATRIXF* t,const MATRIXF* t2,VECTORF* llr1,MATRIXF* llr2,MATRIXF* llr3,MATRIXF* llr4,MATRIXF* llr5); 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | #ifdef __cplusplus 69 | } 70 | #endif 71 | #endif 72 | -------------------------------------------------------------------------------- /pij/cassist/llrtopij.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #include "../../base/config.h" 19 | #include 20 | #include 21 | #include 22 | #include "../../base/logger.h" 23 | #include "llrtopij.h" 24 | #include "../llrtopij.h" 25 | 26 | 27 | 28 | 29 | 30 | 31 | /* Always return probability of step 1 is 1. This is useful when best eQTL are already selected in advance. 32 | */ 33 | static inline int pij_cassist_llrtopij1_1(VECTORF* p1) 34 | { 35 | LOG(9,"Converting LLR to probabilities for step 1. Filling with 1.") 36 | VECTORFF(set_all)(p1,1); 37 | return 0; 38 | } 39 | 40 | /* Functions to convert LLR of specific steps into probabilities. 41 | * Uses pij_llrtopij_convert with different settings of n1d and n2d. 42 | * Function name suffices indicate which LLR to convert. 43 | */ 44 | static inline int pij_cassist_llrtopij1(VECTORF* d) 45 | { 46 | LOG(9,"Converting LLR to probabilities for step 1 on per A basis.") 47 | return pij_cassist_llrtopij1_1(d); 48 | } 49 | 50 | static inline int pij_cassist_llrtopij2(MATRIXF* d,size_t ns,char nodiag) 51 | { 52 | LOG(9,"Converting LLR to probabilities for step 2 on per A basis.") 53 | assert(ns>2); 54 | return pij_llrtopij_convert_single_self(d,1,ns-2,nodiag,0); 55 | } 56 | 57 | static inline int pij_cassist_llrtopij3(MATRIXF* d,size_t ns,char nodiag) 58 | { 59 | LOG(9,"Converting LLR to probabilities for step 3 on per A basis.") 60 | assert(ns>3); 61 | if(pij_llrtopij_convert_single_self(d,1,ns-3,nodiag,0)) 62 | return 1; 63 | MATRIXFF(scale)(d,-1); 64 | MATRIXFF(add_constant)(d,1); 65 | return 0; 66 | } 67 | 68 | static inline int pij_cassist_llrtopij4(MATRIXF* d,size_t ns,char nodiag) 69 | { 70 | LOG(9,"Converting LLR to probabilities for step 4 on per A basis.") 71 | assert(ns>3); 72 | return pij_llrtopij_convert_single_self(d,2,ns-3,nodiag,0); 73 | } 74 | 75 | static inline int pij_cassist_llrtopij5(MATRIXF* d,size_t ns,char nodiag) 76 | { 77 | LOG(9,"Converting LLR to probabilities for step 5 on per A basis.") 78 | assert(ns>3); 79 | return pij_llrtopij_convert_single_self(d,1,ns-3,nodiag,0); 80 | } 81 | 82 | 83 | int pij_cassist_llrtopijs(VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,size_t ns,char nodiag) 84 | { 85 | int ret=0,ret2=0; 86 | 87 | if(ns<4) 88 | { 89 | LOG(0,"Cannot convert log likelihood ratios to probabilities. Needs at least 4 samples.") 90 | return 1; 91 | } 92 | ret=ret||(ret2=pij_cassist_llrtopij2(p2,ns,nodiag)); 93 | if(ret2) 94 | LOG(1,"Failed to convert log likelihood ratios to probabilities in step 2.") 95 | //For p1, if nodiag, copy p2 data, otherwise set all to 1. 96 | if(nodiag) 97 | { 98 | VECTORFF(view) vv; 99 | vv=MATRIXFF(diagonal)(p2); 100 | ret=ret||(ret2=VECTORFF(memcpy)(p1,&vv.vector)); 101 | } 102 | else 103 | ret=ret||(ret2=pij_cassist_llrtopij1_1(p1)); 104 | if(ret2) 105 | LOG(1,"Failed to convert log likelihood ratios to probabilities in step 1.") 106 | ret=ret||(ret2=pij_cassist_llrtopij3(p3,ns,nodiag)); 107 | if(ret2) 108 | LOG(1,"Failed to convert log likelihood ratios to probabilities in step 3.") 109 | ret=ret||(ret2=pij_cassist_llrtopij4(p4,ns,nodiag)); 110 | if(ret2) 111 | LOG(1,"Failed to convert log likelihood ratios to probabilities in step 4.") 112 | ret=ret||(ret2=pij_cassist_llrtopij5(p5,ns,nodiag)); 113 | if(ret2) 114 | LOG(1,"Failed to convert log likelihood ratios to probabilities in step 5.") 115 | return ret; 116 | } 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /pij/cassist/llrtopij.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | /* This file contains the conversion from log likelihood ratio to probabilities 19 | * 20 | */ 21 | 22 | #ifndef _HEADER_LIB_PIJ_CASSIST_LLRTOPIJ_H_ 23 | #define _HEADER_LIB_PIJ_CASSIST_LLRTOPIJ_H_ 24 | #include "../../base/config.h" 25 | #include "../../base/types.h" 26 | #ifdef __cplusplus 27 | extern "C" 28 | { 29 | #endif 30 | 31 | /* Converts four LLRs into probabilities together. 32 | * Uses pij_cassit_llrtopij1_a to pij_cassit_llrtopij5_a. 33 | * See above functions for parameter definitions. 34 | * Return: 0 if all functions are successful. 35 | */ 36 | int pij_cassist_llrtopijs(VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,size_t ns,char nodiag); 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | #ifdef __cplusplus 52 | } 53 | #endif 54 | #endif 55 | -------------------------------------------------------------------------------- /pij/cassist/llrtopv.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | /* This file contains the conversion from log likelihood ratio to p-values for continuous anchors 19 | * 20 | */ 21 | 22 | #ifndef _HEADER_LIB_PIJ_CASSIST_LLRTOPV_H_ 23 | #define _HEADER_LIB_PIJ_CASSIST_LLRTOPV_H_ 24 | #include "../../base/config.h" 25 | #include "../../base/types.h" 26 | #include "../llrtopv.h" 27 | #ifdef __cplusplus 28 | extern "C" 29 | { 30 | #endif 31 | 32 | /* Converts log likelihood ratios into p-values for continuous assisted causal inference test for each test separately. 33 | * d: MATRIXF of any size, as input of LLRs and also output of corresponding p-values 34 | * ns: Number of samples, to be used to calculate the null distribution 35 | */ 36 | static inline void pij_cassist_llrtopv1(VECTORF* d,size_t ns) 37 | { 38 | assert(ns>3); 39 | pij_llrtopv_block(d,1,ns-2); 40 | } 41 | 42 | static inline void pij_cassist_llrtopv2(MATRIXF* d,size_t ns) 43 | { 44 | assert(ns>3); 45 | pij_llrtopvm(d,1,ns-2); 46 | } 47 | 48 | static inline void pij_cassist_llrtopv3(MATRIXF* d,size_t ns) 49 | { 50 | assert(ns>3); 51 | pij_llrtopvm(d,1,ns-3); 52 | } 53 | 54 | static inline void pij_cassist_llrtopv4(MATRIXF* d,size_t ns) 55 | { 56 | assert(ns>3); 57 | pij_llrtopvm(d,2,ns-3); 58 | } 59 | 60 | static inline void pij_cassist_llrtopv5(MATRIXF* d,size_t ns) 61 | { 62 | assert(ns>3); 63 | pij_llrtopvm(d,1,ns-3); 64 | } 65 | 66 | /* Converts log likelihood ratios into p-values for continuous assisted causal inference test for all tests together 67 | * p1: (ng) 68 | * p2: (ng,nt) 69 | * p3: (ng,nt) 70 | * p4: (ng,nt) 71 | * p5: (ng,nt) Data for 5 tests from p1 to p5, as input for log likelihood ratios, 72 | and also as output for converted p-values. 73 | * ns: Number of samples. 74 | */ 75 | static inline void pij_cassist_llrtopvs(VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,size_t ns) 76 | { 77 | pij_cassist_llrtopv1(p1,ns); 78 | pij_cassist_llrtopv2(p2,ns); 79 | pij_cassist_llrtopv3(p3,ns); 80 | pij_cassist_llrtopv4(p4,ns); 81 | pij_cassist_llrtopv5(p5,ns); 82 | } 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | #ifdef __cplusplus 122 | } 123 | #endif 124 | #endif 125 | -------------------------------------------------------------------------------- /pij/gassist/gassist.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | /* This part contains the main interface function of genotype assisted pij inference. 19 | */ 20 | 21 | #ifndef _HEADER_LIB_PIJ_GASSIST_H_ 22 | #define _HEADER_LIB_PIJ_GASSIST_H_ 23 | #include "../../base/config.h" 24 | #include "../../base/types.h" 25 | #ifdef __cplusplus 26 | extern "C" 27 | { 28 | #endif 29 | 30 | /* Estimates the p-value of A B against A->B from genotype and expression data with 5 tests. 31 | * E is always the best eQTL of A. Full data is required. 32 | * g: (ng,ns) Genotype data, =0,1,...,nv-1. Each is the best eQTL of the corresponding gene in t. 33 | * t: (ng,ns) Expression data of A. 34 | * t2: (nt,ns) Expression data of B. Can be A or a superset of A. 35 | * p1: (ng) P-values of step 1. Tests E->A v.s. E A. 36 | * p2: (ng,nt) P-values of step 2. Tests E->B v.s. E B. 37 | * p3: (ng,nt) P-values of step 3. Tests E->A->B v.s. E->A->B with E->B. 38 | * p4: (ng,nt) P-values of step 4. Tests E->A->B with E->B v.s. E->A B. 39 | * p5: (ng,nt) P-values of step 5. Tests E->A->B with E->B v.s. A<-E->B. 40 | * nv: Number of possible values each genotype entry may take, =number of alleles+1. 41 | * memlimit: The function is able to split very large datasets (ng and nt) into smaller chunks for inference. This variable specifies the approximate memory usage limit. Note: For large datasets, a too small memory limit can fail the function. For unlimited memory, set memlimit=-1. 42 | * Return: 0 on sucess 43 | * Appendix: 44 | * ng: Number of genes with best eQTL. 45 | * nt: Number of genes with expression data for B 46 | * ns: Number of samples. 47 | */ 48 | int pijs_gassist_pv(const MATRIXG* g,const MATRIXF* t,const MATRIXF* t2,VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,size_t nv,size_t memlimit); 49 | 50 | /* Estimates the probability of A->B from genotype and expression data with 5 tests. 51 | * E is always the best eQTL of A. Full data is required. 52 | * g: (ng,ns) Genotype data, =0,1,...,nv-1. Each is the best eQTL of the corresponding gene in t. 53 | * t: (ng,ns) Expression data of A. 54 | * t2: (nt,ns) Expression data of B. Can be A or a superset of A. 55 | * p1: (ng) Probabilities of step 1. Tests E->A v.s. E A. For nodiag=0, because the function expects significant eQTLs, p1 always return 1. For nodiag=1, uses diagonal elements of p2. Consider replacing p1 with your own (1-FDR) from eQTL discovery. 56 | * p2: (ng,nt) Probabilities of step 2. Tests E->B v.s. E B. 57 | * p3: (ng,nt) Probabilities of step 3. Tests E->A->B v.s. E->A->B with E->B. 58 | * p4: (ng,nt) Probabilities of step 4. Tests E->A->B with E->B v.s. E->A B. 59 | * p5: (ng,nt) Probabilities of step 5. Tests E->A->B with E->B v.s. A<-E->B. 60 | * nv: Number of possible values each genotype entry may take, =number of alleles+1. 61 | * nodiag: When the top ng rows of t2 is exactly t, diagonals of p2 and p3 are meaningless. In this case, set nodiag to 1 to avoid inclusion of NANs. For nodiag=0, t and t2 should not have any identical genes. 62 | * memlimit: The function is able to split very large datasets (ng and nt) into smaller chunks for inference. This variable specifies the approximate memory usage limit. Note: For large datasets, a too small memory limit can fail the function. For unlimited memory, set memlimit=-1. 63 | * Return: 0 on sucess 64 | * Appendix: 65 | * ng: Number of genes with best eQTL. 66 | * nt: Number of genes with expression data for B 67 | * ns: Number of samples. 68 | */ 69 | int pijs_gassist(const MATRIXG* g,const MATRIXF* t,const MATRIXF* t2,VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,size_t nv,char nodiag,size_t memlimit); 70 | 71 | /* Estimates the probability of A->B from genotype and expression data with defaults combination of tests. Uses results from pijs_gassist. Variables have the same definitions except: 72 | * ans: (ng,nt) Predicted probability of A->B based on default combination of 5 tests. The default combination is (p2*p5+p4)/2. Note: this combination does not include p1. 73 | * Return: 0 on sucess 74 | */ 75 | int pij_gassist(const MATRIXG* g,const MATRIXF* t,const MATRIXF* t2,MATRIXF* ans,size_t nv,char nodiag,size_t memlimit); 76 | 77 | /* Estimates the probability of A->B from genotype and expression data with traditional causal inference method. 78 | * NOTE: This is not and is not intended as a loyal reimplementation of the Trigger R package. Instead, it aims at reusing methods and tests of Findr to produce inferences that mimicks the three tests performed by Trigger. Many implementational details are different between this function and Trigger, althrough a significant (but not full) overlap has been observed in existing studies. This method does not include p1. 79 | * Inputs and ouputs are the same as function pij_gassist. 80 | */ 81 | int pij_gassist_trad(const MATRIXG* g,const MATRIXF* t,const MATRIXF* t2,MATRIXF* ans,size_t nv,char nodiag,size_t memlimit); 82 | 83 | #ifdef __cplusplus 84 | } 85 | #endif 86 | #endif 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /pij/gassist/llr.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | /* This part contains the log likelihood ratio calculations. 19 | */ 20 | 21 | #ifndef _HEADER_LIB_PIJ_GASSIST_LLR_H_ 22 | #define _HEADER_LIB_PIJ_GASSIST_LLR_H_ 23 | #include "../../base/config.h" 24 | #include "../../base/types.h" 25 | #ifdef __cplusplus 26 | extern "C" 27 | { 28 | #endif 29 | 30 | /* Multithread calculation of log likelihood ratios for 5 tests. 31 | * g: MATRIXF (ng,ns) Full genotype data matrix 32 | * t: MATRIXF (ng,ns) Supernormalized transcript data matrix of A 33 | * t2: MATRIXF (nt,ns) Supernormalized transcript data matrix of B 34 | * llr1: VECTORF (ng). Log likelihood ratios for test 1. Tests E->A v.s. E A. 35 | * llr2: MATRIXF (ng,nt). Log likelihood ratios for test 2. Tests E->B v.s. E B. 36 | * llr3: MATRIXF (ng,nt). Log likelihood ratios for test 3. Tests E->A->B v.s. E->A->B with E->B. 37 | * llr4: MATRIXF (ng,nt). Log likelihood ratios for test 4. Tests E->A->B with E->B v.s. E->A B. 38 | * llr5: MATRIXF (ng,nt). Log likelihood ratios for test 5. Tests E->A->B with E->B v.s. A<-E->B. 39 | * nv: Number of possible values for each genotype 40 | * Return: 0 on success. 41 | */ 42 | int pij_gassist_llr(const MATRIXG* g,const MATRIXF* t,const MATRIXF* t2,VECTORF* llr1,MATRIXF* llr2,MATRIXF* llr3,MATRIXF* llr4,MATRIXF* llr5,size_t nv); 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | #ifdef __cplusplus 75 | } 76 | #endif 77 | #endif 78 | -------------------------------------------------------------------------------- /pij/gassist/llrtopij.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #include "../../base/config.h" 19 | #include 20 | #include 21 | #include 22 | #include "../../base/gsl/math.h" 23 | #include "../../base/gsl/histogram.h" 24 | #include "../../base/gsl/blas.h" 25 | #include "../../base/logger.h" 26 | #include "../../base/threading.h" 27 | #include "../../base/macros.h" 28 | #include "../../base/data_process.h" 29 | #include "../llrtopij.h" 30 | #include "llrtopij.h" 31 | 32 | 33 | 34 | /* Always return probability of step 1 is 1. This is useful when best eQTL are already selected in advance. 35 | */ 36 | static inline int pij_gassist_llrtopij1_1(VECTORF* p1) 37 | { 38 | LOG(9,"Converting LLR to probabilities for step 1. Filling with 1.") 39 | VECTORFF(set_all)(p1,1); 40 | return 0; 41 | } 42 | 43 | /* Convert real log likelihood ratios into probability functions. 44 | * This function converts every A in hypothesis (E->A->B) separately. 45 | * Suppose there are ng (E,A) pairs and nt Bs, this function converts ng times, 46 | * each for one (E,A) pair but all Bs. 47 | * d: (ng,nt) Input log likelihood ratios for construction of 48 | * histograms and calculation of probability of true hypothesis. 49 | * g: (ng,ns) Original genotype matrix, used for analytical calculation 50 | * of null distribution. Every element=0,1,...,nv-1. 51 | * h: [nv-1]. Null histogram of the specific test. 52 | * Output of pij_nullhist. 53 | * nv: Maximum number of values each g may take. 54 | * nodiag: If diagonal elements of d should be removed in construction of real 55 | * histogram. This should be set to true (!=0) when t is identical with 56 | * the top rows of t2 (in calculation of llr). 57 | * Return: 0 if success. 58 | */ 59 | static int pij_gassist_llrtopij_convert_self(MATRIXF* d,const MATRIXG* g,const gsl_histogram * const * h, size_t nv,char nodiag,long nodiagshift) 60 | { 61 | #define CLEANUP CLEANVECG(vcount)CLEANAMHIST(hreal,nth)CLEANAMHIST(hc,nth)\ 62 | CLEANMATD(mb1)CLEANMATD(mb2)CLEANMATD(mnull)CLEANMATF(mb3)CLEANVECD(vwidth) 63 | 64 | VECTORG *vcount; 65 | size_t ng=g->size1; 66 | size_t i,nbin; 67 | //gsl_histogram **hreal,**hc; 68 | MATRIXD *mb1,*mb2,*mnull; 69 | MATRIXF *mb3; 70 | VECTORD *vwidth; 71 | VECTORDF(view) vv1; 72 | size_t nth; 73 | 74 | mb1=mb2=mnull=0; 75 | mb3=0; 76 | vwidth=0; 77 | vcount=0; 78 | //Validity checks 79 | { 80 | int nth0=omp_get_max_threads(); 81 | assert(nth0>0); 82 | nth=(size_t)nth0; 83 | } 84 | 85 | 86 | AUTOCALLOC(gsl_histogram*,hreal,nth,64) 87 | AUTOCALLOC(gsl_histogram*,hc,nth,64) 88 | if(!(hreal&&hc)) 89 | ERRRET("Not enough memory."); 90 | 91 | //Construct null density histograms 92 | nbin=h[0]->n; 93 | //Memory allocation 94 | { 95 | size_t n1,n2; 96 | pij_llrtopij_convert_histograms_get_buff_sizes(nbin,&n1,&n2); 97 | mb1=MATRIXDF(alloc)(nth,n1); 98 | mb2=MATRIXDF(alloc)(nth,n2); 99 | mnull=MATRIXDF(alloc)(nth,nbin); 100 | mb3=MATRIXFF(alloc)(nth,d->size2); 101 | vwidth=VECTORDF(alloc)(nbin); 102 | if(!(mb1&&mb2&&mnull&&mb3&&vwidth)) 103 | ERRRET("Not enough memory.") 104 | } 105 | 106 | //Prepare for real histogram 107 | { 108 | int ret; 109 | for(i=0,ret=1;in+2); 113 | ret=ret&&hreal[i]&&hc[i]; 114 | } 115 | vcount=VECTORGF(alloc)(ng); 116 | if(!(ret&&vcount)) 117 | ERRRET("Not enough memory."); 118 | } 119 | 120 | { 121 | VECTORUC *vb4=VECTORUCF(alloc)(nv); 122 | if(!vb4) 123 | ERRRET("Not enough memory."); 124 | MATRIXGF(countv_byrow_buffed)(g,vcount,vb4); 125 | CLEANVECUC(vb4) 126 | } 127 | 128 | //Conversion 129 | for(i=2;i<=nv;i++) 130 | { 131 | vv1=VECTORDF(view_array)(h[i-2]->range+1,nbin); 132 | VECTORDF(memcpy)(vwidth,&vv1.vector); 133 | vv1=VECTORDF(view_array)(h[i-2]->range,nbin); 134 | VECTORDF(sub)(vwidth,&vv1.vector); 135 | vv1=VECTORDF(view_array)(h[i-2]->bin,nbin); 136 | #pragma omp parallel 137 | { 138 | size_t ng1,ng2,id; 139 | size_t j; 140 | long k; 141 | VECTORDF(view) vvreal,vvnull,vvb1,vvb2; 142 | VECTORFF(view) vvb3,vva; 143 | 144 | id=(size_t)omp_get_thread_num(); 145 | vvreal=VECTORDF(view_array)(hreal[id]->bin,nbin); 146 | vvnull=MATRIXDF(row)(mnull,id); 147 | vvb1=MATRIXDF(row)(mb1,id); 148 | vvb2=MATRIXDF(row)(mb2,id); 149 | vvb3=MATRIXFF(row)(mb3,id); 150 | threading_get_startend(ng,&ng1,&ng2); 151 | 152 | for(j=ng1;jrange,h[i-2]->range,(nbin+1)*sizeof(*hreal[id]->range)); 158 | memset(hreal[id]->bin,0,nbin*sizeof(*hreal[id]->bin)); 159 | //Construct real histogram 160 | if(nodiag&&((long)j+nodiagshift>=0)&&((long)j+nodiagshift<(long)d->size2)) 161 | { 162 | for(k=(long)j+nodiagshift-1;k>=0;k--) 163 | gsl_histogram_increment(hreal[id],MATRIXFF(get)(d,j,(size_t)k)); 164 | for(k=(long)j+nodiagshift+1;k<(long)d->size2;k++) 165 | gsl_histogram_increment(hreal[id],MATRIXFF(get)(d,j,(size_t)k)); 166 | VECTORDF(scale)(&vvreal.vector,1./(double)(d->size2-1)); 167 | } 168 | else 169 | { 170 | for(k=0;k<(long)d->size2;k++) 171 | gsl_histogram_increment(hreal[id],MATRIXFF(get)(d,j,(size_t)k)); 172 | VECTORDF(scale)(&vvreal.vector,1./(double)(d->size2)); 173 | } 174 | 175 | //Convert to density histogram 176 | VECTORDF(div)(&vvreal.vector,vwidth); 177 | //Convert to probability central histogram 178 | pij_llrtopij_convert_histograms_buffed(hreal[id],&vvnull.vector,hc[id],&vvb1.vector,&vvb2.vector); 179 | //Convert likelihoods to probabilities 180 | vva=MATRIXFF(row)(d,j); 181 | pij_llrtopij_histogram_interpolate_linear(hc[id],&vvb3.vector,&vva.vector); 182 | } 183 | } 184 | } 185 | CLEANUP 186 | return 0; 187 | #undef CLEANUP 188 | } 189 | 190 | int pij_gassist_llrtopijs(const MATRIXG* g,VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,size_t nv,const gsl_histogram * const * h[4],char nodiag,long nodiagshift) 191 | { 192 | int ret=0,ret2=0; 193 | if(g->size2<=3) 194 | { 195 | LOG(0,"Needs at least 4 samples to compute probabilities.") 196 | return 1; 197 | } 198 | ret=ret||(ret2=pij_gassist_llrtopij_convert_self(p2,g,h[0],nv,nodiag,nodiagshift)); 199 | if(ret2) 200 | LOG(1,"Failed to log likelihood ratios to probabilities in step 2.") 201 | //For p1, if nodiag, copy p2 data, otherwise set all to 1. 202 | if(nodiag) 203 | { 204 | VECTORFF(view) vv; 205 | vv=MATRIXFF(superdiagonal)(p2,(size_t)nodiagshift); 206 | ret=(ret2=VECTORFF(memcpy)(p1,&vv.vector)); 207 | } 208 | else 209 | ret=(ret2=pij_gassist_llrtopij1_1(p1)); 210 | if(ret2) 211 | LOG(1,"Failed to log likelihood ratios to probabilities in step 1.") 212 | ret=ret||(ret2=pij_gassist_llrtopij_convert_self(p3,g,h[1],nv,nodiag,nodiagshift)); 213 | if(ret2) 214 | LOG(1,"Failed to log likelihood ratios to probabilities in step 3.") 215 | MATRIXFF(scale)(p3,-1); 216 | MATRIXFF(add_constant)(p3,1); 217 | ret=ret||(ret2=pij_gassist_llrtopij_convert_self(p4,g,h[2],nv,nodiag,nodiagshift)); 218 | if(ret2) 219 | LOG(1,"Failed to log likelihood ratios to probabilities in step 4.") 220 | ret=ret||(ret2=pij_gassist_llrtopij_convert_self(p5,g,h[3],nv,nodiag,nodiagshift)); 221 | if(ret2) 222 | LOG(1,"Failed to log likelihood ratios to probabilities in step 5.") 223 | return ret; 224 | } 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | -------------------------------------------------------------------------------- /pij/gassist/llrtopij.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | /* This file contains the conversion from log likelihood ratio to probabilities 19 | * 20 | */ 21 | 22 | #ifndef _HEADER_LIB_PIJ_GASSIST_LLRTOPIJ_H_ 23 | #define _HEADER_LIB_PIJ_GASSIST_LLRTOPIJ_H_ 24 | #include "../../base/config.h" 25 | #include "../../base/gsl/histogram.h" 26 | #include "../../base/types.h" 27 | #ifdef __cplusplus 28 | extern "C" 29 | { 30 | #endif 31 | 32 | 33 | 34 | /* Converts four LLRs into probabilities together. 35 | * Uses pij_gassist_llrtopij1 to pij_gassist_llrtopij5. 36 | * See above functions for parameter definitions. 37 | * h: Null histograms. 0 to 3 for tests 2 to 5. 38 | * Return: 0 if all functions are successful. 39 | */ 40 | int pij_gassist_llrtopijs(const MATRIXG* g,VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,size_t nv,const gsl_histogram * const * h[4],char nodiag,long nodiagshift); 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | #ifdef __cplusplus 53 | } 54 | #endif 55 | #endif 56 | -------------------------------------------------------------------------------- /pij/gassist/llrtopv.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #include "../../base/config.h" 19 | #include 20 | #include 21 | #include 22 | #include "../../base/logger.h" 23 | #include "../../base/threading.h" 24 | #include "../../base/macros.h" 25 | #include "../llrtopv.h" 26 | 27 | /* Convert log likelihood ratios into p-values for matrix 28 | * d: (ng,nt) Data, as input for log likelihood ratios, 29 | and also as output for converted p-values. 30 | * g: (ng,ns) Original genotype matrix, used for analytical calculation 31 | * of null distribution. Every element=0,1,...,nv-1. 32 | Has matching rows with data d. 33 | * nv: Maximum number of values each g may take. 34 | * n1c, 35 | * n1d, 36 | * n2c, 37 | * n2d: Parameters to specify null distribution. See pij_nullhist 38 | * Return: 0 if success. 39 | */ 40 | static int pij_gassist_llrtopv_block(MATRIXF* d,const MATRIXG* g,size_t nv,long n1c,size_t n1d,long n2c,size_t n2d) 41 | { 42 | #define CLEANUP AUTOFREE(nexist) 43 | 44 | size_t i,j,nvr; 45 | VECTORFF(view) vv; 46 | assert(d->size1==g->size1); 47 | assert(MATRIXGF(max)(g)size1;i++) 54 | { 55 | //Count for number of genotypes 56 | memset(nexist,0,nv*sizeof(nexist[0])); 57 | for(j=0;jsize2;j++) 58 | nexist[MATRIXGF(get)(g,i,j)]=1; 59 | nvr=0; 60 | for(j=0;j1); 63 | nvr-=2; 64 | assert(((long)nvr*n1c+(long)n1d>0)&&((long)n2d>(long)nvr*n2c)); 65 | vv=MATRIXFF(row)(d,i); 66 | pij_llrtopv_block(&vv.vector,(size_t)((long)nvr*n1c+(long)n1d),(size_t)((long)n2d-(long)nvr*n2c)); 67 | } 68 | 69 | CLEANUP 70 | return 0; 71 | #undef CLEANUP 72 | } 73 | 74 | /* Convert log likelihood ratios into p-values for vector 75 | * d: (ng) Data, as input for log likelihood ratios, 76 | and also as output for converted p-values. 77 | * g: (ng,ns) Original genotype matrix, used for analytical calculation 78 | * of null distribution. Every element=0,1,...,nv-1. 79 | Has matching rows with data d. 80 | * nv: Maximum number of values each g may take. 81 | * n1c, 82 | * n1d, 83 | * n2c, 84 | * n2d: Parameters to specify null distribution. See pij_nullhist 85 | * Return: 0 if success. 86 | */ 87 | static inline int pij_gassist_llrtopv_vec_block(VECTORF* d,const MATRIXG* g,size_t nv,long n1c,size_t n1d,long n2c,size_t n2d) 88 | { 89 | MATRIXFF(view) mv=MATRIXFF(view_vector)(d,d->size,1); 90 | assert(d->size==g->size1); 91 | assert((long)n2d>n2c*(long)nv); 92 | return pij_gassist_llrtopv_block(&mv.matrix,g,nv,n1c,n1d,n2c,n2d); 93 | } 94 | 95 | /* Convert log likelihood ratios into p-values for matrix in single thread 96 | * p1: (ng) 97 | * p2: (ng,nt) 98 | * p3: (ng,nt) 99 | * p4: (ng,nt) 100 | * p5: (ng,nt) Data for 5 tests from p1 to p5, as input for log likelihood ratios, 101 | and also as output for converted p-values. 102 | * g: (ng,ns) Original genotype matrix, used for analytical calculation 103 | * of null distribution. Every element=0,1,...,nv-1. 104 | Has matching rows with data p1 to p5.. 105 | * nv: Maximum number of values each g may take. 106 | * Return: 0 if success. 107 | */ 108 | static int pij_gassist_llrtopvs_block(VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,const MATRIXG* g,size_t nv) 109 | { 110 | int ret=0,ret2=0; 111 | assert((p1->size==g->size1)&&(p2->size1==g->size1)&&(p3->size1==g->size1)&&(p4->size1==g->size1)&&(p5->size1==g->size1)); 112 | assert((p2->size2==p3->size2)&&(p2->size2==p4->size2)&&(p2->size2==p5->size2)); 113 | 114 | ret=ret||(ret2=pij_gassist_llrtopv_vec_block(p1,g,nv,1,1,1,g->size2-2)); 115 | if(ret2) 116 | LOG(1,"Failed to log likelihood ratios to p-values in step 1.") 117 | ret=ret||(ret2=pij_gassist_llrtopv_block(p2,g,nv,1,1,1,g->size2-2)); 118 | if(ret2) 119 | LOG(1,"Failed to log likelihood ratios to p-values in step 2.") 120 | ret=ret||(ret2=pij_gassist_llrtopv_block(p3,g,nv,1,1,1,g->size2-3)); 121 | if(ret2) 122 | LOG(1,"Failed to log likelihood ratios to p-values in step 3.") 123 | ret=ret||(ret2=pij_gassist_llrtopv_block(p4,g,nv,1,2,1,g->size2-3)); 124 | if(ret2) 125 | LOG(1,"Failed to log likelihood ratios to p-values in step 4.") 126 | ret=ret||(ret2=pij_gassist_llrtopv_block(p5,g,nv,0,1,1,g->size2-3)); 127 | if(ret2) 128 | LOG(1,"Failed to log likelihood ratios to p-values in step 5.") 129 | return ret; 130 | } 131 | 132 | int pij_gassist_llrtopvs(VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,const MATRIXG* g,size_t nv) 133 | { 134 | int ret=0; 135 | assert((p1->size==g->size1)&&(p2->size1==g->size1)&&(p3->size1==g->size1)&&(p4->size1==g->size1)&&(p5->size1==g->size1)); 136 | assert((p2->size2==p3->size2)&&(p2->size2==p4->size2)&&(p2->size2==p5->size2)); 137 | 138 | if(g->size2size,&ng1,&ng2); 150 | if(ng2>ng1) 151 | { 152 | size_t dn=ng2-ng1; 153 | VECTORFF(view) vv1; 154 | MATRIXFF(view) mv2,mv3,mv4,mv5; 155 | MATRIXGF(const_view) mvg=MATRIXGF(const_submatrix)(g,ng1,0,dn,g->size2); 156 | vv1=VECTORFF(subvector)(p1,ng1,dn); 157 | mv2=MATRIXFF(submatrix)(p2,ng1,0,dn,p2->size2); 158 | mv3=MATRIXFF(submatrix)(p3,ng1,0,dn,p2->size2); 159 | mv4=MATRIXFF(submatrix)(p4,ng1,0,dn,p2->size2); 160 | mv5=MATRIXFF(submatrix)(p5,ng1,0,dn,p2->size2); 161 | ret2=pij_gassist_llrtopvs_block(&vv1.vector,&mv2.matrix,&mv3.matrix,&mv4.matrix,&mv5.matrix,&mvg.matrix,nv); 162 | } 163 | #pragma omp critical 164 | ret=ret||ret2; 165 | } 166 | return ret; 167 | } 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | -------------------------------------------------------------------------------- /pij/gassist/llrtopv.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | /* This file contains the conversion from log likelihood ratio to p-values for discrete anchors, e.g. genotypes 19 | * 20 | */ 21 | #ifndef _HEADER_LIB_PIJ_GASSIST_LLRTOPV_H_ 22 | #define _HEADER_LIB_PIJ_GASSIST_LLRTOPV_H_ 23 | #include "../../base/config.h" 24 | #include "../../base/types.h" 25 | #ifdef __cplusplus 26 | extern "C" 27 | { 28 | #endif 29 | 30 | /* Convert log likelihood ratios into p-values for matrix in multi thread 31 | * p1: (ng) 32 | * p2: (ng,nt) 33 | * p3: (ng,nt) 34 | * p4: (ng,nt) 35 | * p5: (ng,nt) Data for 5 tests from p1 to p5, as input for log likelihood ratios, 36 | and also as output for converted p-values of LLRs of each test. 37 | * g: (ng,ns) Original genotype matrix, used for analytical calculation 38 | * of null distribution. Every element=0,1,...,nv-1. 39 | Has matching rows with data p1 to p5.. 40 | * nv: Maximum number of values each g may take. 41 | * Return: 0 if success. 42 | */ 43 | int pij_gassist_llrtopvs(VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,const MATRIXG* g,size_t nv); 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | #ifdef __cplusplus 83 | } 84 | #endif 85 | #endif 86 | -------------------------------------------------------------------------------- /pij/gassist/nullhist.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #include "../../base/config.h" 19 | #include 20 | #include 21 | #include 22 | #include "../../base/logger.h" 23 | #include "../../base/gsl/histogram.h" 24 | #include "../nullhist.h" 25 | #include "nullhist.h" 26 | 27 | int pij_gassist_nullhists(gsl_histogram** h[4],size_t nt,size_t ns,size_t nv,const FTYPE dmax[4]) 28 | { 29 | //Construct null density histograms 30 | h[0]=pij_nullhist((double)dmax[0],nv,nt,1,1,1,ns-2); 31 | h[1]=pij_nullhist((double)dmax[1],nv,nt,1,1,1,ns-3); 32 | h[2]=pij_nullhist((double)dmax[2],nv,nt,1,2,1,ns-3); 33 | h[3]=pij_nullhist((double)dmax[3],nv,nt,0,1,1,ns-3); 34 | if(h[0]&&h[1]&&h[2]&&h[3]) 35 | return 0; 36 | 37 | LOG(1,"pij_nullhist failed.") 38 | return 1; 39 | } 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /pij/gassist/nullhist.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | /* This file provides the analytical method to construct histograms 19 | * for the null pdf of LLRs of genotype assisted pij inference. 20 | */ 21 | 22 | #ifndef _HEADER_LIB_PIJ_GASSIST_NULLHIST_H_ 23 | #define _HEADER_LIB_PIJ_GASSIST_NULLHIST_H_ 24 | #include "../../base/config.h" 25 | #include "../../base/gsl/histogram.h" 26 | #include "../../base/types.h" 27 | #ifdef __cplusplus 28 | extern "C" 29 | { 30 | #endif 31 | 32 | /* Produce null histograms for all tests (2 to 5). 33 | * h: Output location of null histograms. 0 to 3 for tests 2 to 5. 34 | * nt: Number of targets 35 | * ns: Number of samples 36 | * nv: Number of values, = number of alleles + 1 37 | * dmax: Maximum value of all LLRs, for histogram construction. 38 | * It can be larger than the maximum of d, if memlimit is not infinite. 39 | * 0 to 4 for tests 1 to 5. 40 | * Return: 0 on success and 1 otherwise 41 | */ 42 | int pij_gassist_nullhists(gsl_histogram** h[4],size_t nt,size_t ns,size_t nv,const FTYPE dmax[4]); 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | #ifdef __cplusplus 65 | } 66 | #endif 67 | #endif 68 | -------------------------------------------------------------------------------- /pij/llrtopij.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | /* This file contains the conversion from log likelihood ratio to probabilities 19 | * 20 | */ 21 | 22 | #ifndef _HEADER_LIB_PIJ_LLRTOPIJ_H_ 23 | #define _HEADER_LIB_PIJ_LLRTOPIJ_H_ 24 | #include "../base/config.h" 25 | #include "../base/gsl/histogram.h" 26 | #include "../base/types.h" 27 | #ifdef __cplusplus 28 | extern "C" 29 | { 30 | #endif 31 | 32 | 33 | /* Use central histogram to estimate distribution probabilities of any point 34 | * within the histogram range. Linear intepolation is used. 35 | * Points outside histogram range gives boundary output 36 | * hc: central histogram for estimation 37 | * d: data (x coordinates of histogram) to be estimated their probabilities 38 | * ans: output of estimated probabilities 39 | */ 40 | void pij_llrtopij_histogram_interpolate_linear(const gsl_histogram *hc,const VECTORF* d,VECTORF* ans); 41 | 42 | /* Calculate buffer sizes for histogram conversion in pij_llrtopij_convert_histograms_buffed. 43 | * n: Number of histogram bins. This must match pij_llrtopij_convert_histograms_buffed. 44 | * n1, 45 | * n2: Sizes of two buffers for VECTORD. 46 | */ 47 | void pij_llrtopij_convert_histograms_get_buff_sizes(size_t n,size_t *n1,size_t *n2); 48 | 49 | /* Allocate buffer for histogram conversion in pij_llrtopij_convert_histograms_buffed. 50 | * n: Number of histogram bins. This must match pij_llrtopij_convert_histograms_buffed. 51 | * vb1, 52 | * vb2: Output locations of allocated buffers. 53 | * Return: 0 on success. 54 | */ 55 | int pij_llrtopij_convert_histograms_make_buffs(size_t n,VECTORD** vb1,VECTORD** vb2); 56 | 57 | /* Convert density histograms of null and real distribution into probability central 58 | * histogram with buffer provided. Both histograms must be distributions 59 | * (sum to unity and nonnegative). 60 | * hreal: (n) Real density histogram to convert from. Also changed in calculation. 61 | * vnull: (n) Null density histogram in vector format. Also changed in calculation. 62 | * hc: (n+2) Central probability histogram as output. 63 | * vb1, 64 | * vb2: Buffers needed for conversion. To allocate buffers, use 65 | pij_llrtopij_convert_histograms_make_buffs. 66 | */ 67 | void pij_llrtopij_convert_histograms_buffed(gsl_histogram* hreal,VECTORD* vnull,gsl_histogram* hc,VECTORD* vb1,VECTORD* vb2); 68 | 69 | /* Convert density histograms of null and real distribution into probability central histogram. Both histograms must be distributions (sum to unity and nonnegative). 70 | * hreal: (n) Real density histogram to convert from. Also changed in calculation. 71 | * vnull: (n) Null density histogram in vector format. Also changed in calculation. 72 | * hc: (n+2) Central probability histogram as output. 73 | * Return: 0 if success. 74 | */ 75 | int pij_llrtopij_convert_histograms(gsl_histogram* hreal,VECTORD* vnull,gsl_histogram* hc); 76 | 77 | 78 | /* Obtains the maximum of matrix, possibly ignoring diagonal elements. 79 | * Fails in the presence of NAN, and warns and updates at INFs. 80 | * d: Matrix/Vector to get maximum, and update any INFs 81 | * nodiag: Whether to ignore diagonal values when searching for maximum. 82 | * Return: 0 if NAN is found, or the non-INF maximum otherwise. 83 | */ 84 | FTYPE pij_llrtopij_llrmatmax(MATRIXF* d,char nodiag); 85 | FTYPE pij_llrtopij_llrvecmax(VECTORF* d); 86 | 87 | 88 | /* Convert LLR of real data to probabilities, when the distribution 89 | * of LLR of null distribution can be calculated analytically to follow 90 | * x=-0.5*log(1-z1/(z1+z2)), where z1~chi2(n1),z2~chi2(n2). 91 | * The conversion is performed for each gene A, i.e. per row of d and dconv. 92 | * This function is older than pij_llrtopij_convert_single so it is not parallel. 93 | * Make it parallel before using. 94 | * d: [nrow,nx] The data to use for calculation of conversion rule from LLR to pij. 95 | * dconv: [nrow,nd] The data of LLR to actually convert to pij. Can be same with d. 96 | * ans: [nrow,nd] The output location of converted pij from dconv. 97 | * n1, 98 | * n2: Parameters of null distribution. 99 | * nodiag: Whether diagonal elements of d should be ignored when converting 100 | * to probabilities. 101 | * nodiagshift: Diangonal column shift for nodiag==1. 102 | * For nodiagshift>0/<0, use upper/lower diagonal. 103 | * Return: 0 on success. 104 | */ 105 | int pij_llrtopij_convert_single(const MATRIXF* d,const MATRIXF* dconv,MATRIXF* ans,size_t n1,size_t n2,char nodiag,long nodiagshift); 106 | 107 | // Same with pij_llrtopij_convert_single, for d=dconv=ans. Saves memory. 108 | int pij_llrtopij_convert_single_self(MATRIXF* d,size_t n1,size_t n2,char nodiag,long nodiagshift); 109 | 110 | 111 | 112 | 113 | #ifdef __cplusplus 114 | } 115 | #endif 116 | #endif 117 | -------------------------------------------------------------------------------- /pij/llrtopv.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #include "../base/config.h" 19 | #include 20 | #include 21 | #include 22 | #include "../base/macros.h" 23 | #include "../base/threading.h" 24 | #include "llrtopv.h" 25 | 26 | void pij_llrtopvm(MATRIXF* p,size_t n1,size_t n2) 27 | { 28 | #pragma omp parallel 29 | { 30 | size_t m1,m2; 31 | 32 | threading_get_startend(p->size1,&m1,&m2); 33 | if(m2>m1) 34 | { 35 | MATRIXFF(view) mvp=MATRIXFF(submatrix)(p,m1,0,m2-m1,p->size2); 36 | pij_llrtopvm_block(&mvp.matrix,n1,n2); 37 | } 38 | } 39 | } 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /pij/llrtopv.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | /* This file contains the conversion from log likelihood ratio to p-values 19 | * 20 | */ 21 | #ifndef _HEADER_LIB_PIJ_LLRTOPV_H_ 22 | #define _HEADER_LIB_PIJ_LLRTOPV_H_ 23 | #include "../base/config.h" 24 | #include "../base/types.h" 25 | #include "nulldist.h" 26 | #ifdef __cplusplus 27 | extern "C" 28 | { 29 | #endif 30 | 31 | 32 | /* Converts a vector of log likelihood ratios into p-values with the same null distribution. 33 | * Single thread. 34 | * For null distribution, see pij_nulldist_cdfQ. 35 | * p: data as input for LLR and output for p-values 36 | * n1, 37 | * n2: Null distribution parameters. 38 | */ 39 | static inline void pij_llrtopv_block(VECTORF* p,size_t n1,size_t n2); 40 | // Converts a matrix with the same null distribution in single thread 41 | static inline void pij_llrtopvm_block(MATRIXF* p,size_t n1,size_t n2); 42 | // Converts a matrix with the same null distribution in multi threads 43 | void pij_llrtopvm(MATRIXF* p,size_t n1,size_t n2); 44 | 45 | 46 | 47 | static inline void pij_llrtopv_block(VECTORF* p,size_t n1,size_t n2) 48 | { 49 | size_t i; 50 | for(i=0;isize;i++) 51 | VECTORFF(set)(p,i,(FTYPE)pij_nulldist_cdfQ(VECTORFF(get)(p,i),n1,n2)); 52 | } 53 | 54 | static inline void pij_llrtopvm_block(MATRIXF* p,size_t n1,size_t n2) 55 | { 56 | size_t i,j; 57 | for(i=0;isize1;i++) 58 | for(j=0;jsize2;j++) 59 | MATRIXFF(set)(p,i,j,(FTYPE)pij_nulldist_cdfQ(MATRIXFF(get)(p,i,j),n1,n2)); 60 | } 61 | 62 | #ifdef __cplusplus 63 | } 64 | #endif 65 | #endif 66 | -------------------------------------------------------------------------------- /pij/nulldist.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #include "../base/config.h" 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include "../base/gsl/blas.h" 25 | #include "../base/gsl/math.h" 26 | #include "../base/logger.h" 27 | #include "../base/macros.h" 28 | #include "../base/histogram.h" 29 | #include "../base/data_process.h" 30 | #include "nulldist.h" 31 | #pragma GCC diagnostic ignored "-Wunused-parameter" 32 | 33 | 34 | 35 | /************************************************************* 36 | * Generic functions for any step 37 | *************************************************************/ 38 | 39 | void pij_nulldist_pdfs(const VECTORD* loc,VECTORD* ans,const void* param) 40 | { 41 | const struct pij_nulldist_pdfs_param *p=param; 42 | size_t nd=loc->size; 43 | size_t i; 44 | 45 | //Part 1: (1-exp(-2*x))^((n1-2)/2) 46 | for(i=0;in1/2-1); 49 | //Part 2: exp(-n2*x) 50 | gsl_blas_daxpy(-(double)p->n2,loc,ans); 51 | //Part 3: 2*Gamma((n1+n2)/2)/(Gamma(n1/2)*Gamma(n2/2) 52 | VECTORDF(add_constant)(ans,M_LN2+math_sf_lngammahalf(p->n1+p->n2)-math_sf_lngammahalf(p->n1)-math_sf_lngammahalf(p->n2)); 53 | //Final: all log 54 | for(i=0;isize; 61 | size_t i,j; 62 | 63 | assert(n1d&&n2d); 64 | assert(nd&&(ans->size2==nd)&&(vb2->size==nd)); 65 | //Calculate vb2=log(1-exp(-2x)) 66 | VECTORDF(memcpy)(vb2,loc); 67 | VECTORDF(scale)(vb2,-2); 68 | for(i=0;isize1;i++) 86 | { 87 | VECTORDF(const_view) vv1=MATRIXDF(const_row)(ans,i-1); 88 | VECTORDF(view) vv2=MATRIXDF(row)(ans,i); 89 | VECTORDF(memcpy)(&vv2.vector,&vv1.vector); 90 | VECTORDF(add)(&vv2.vector,vb2); 91 | } 92 | 93 | //Include nv-dependent coefficients 94 | for(i=0;isize1;i++) 95 | { 96 | VECTORDF(view) vv=MATRIXDF(row)(ans,i); 97 | VECTORDF(add_constant)(&vv.vector,(FTYPE)(M_LN2+math_sf_lngammahalf((size_t)((long)i*(n1c-n2c)+(long)(n1d+n2d)))-math_sf_lngammahalf((size_t)((long)i*n1c+(long)n1d))-math_sf_lngammahalf((size_t)(-(long)i*n2c+(long)n2d)))); 98 | } 99 | //Convert log pdf to pdf 100 | for(i=0;isize1;i++) 101 | for(j=0;jsize2;j++) 102 | MATRIXDF(set)(ans,i,j,exp(MATRIXDF(get)(ans,i,j))); 103 | } 104 | 105 | static int pij_nulldist_calcpdf(long n1c,size_t n1d,long n2c,size_t n2d,const VECTORD* loc,MATRIXD* ans) 106 | { 107 | #define CLEANUP AUTOFREEVEC(vb) 108 | AUTOALLOCVECD(vb,loc->size,30000) 109 | if(!vb) 110 | ERRRET("Not enough memory.") 111 | pij_nulldist_calcpdf_buffed(n1c,n1d,n2c,n2d,loc,ans,vb); 112 | CLEANUP 113 | return 0; 114 | #undef CLEANUP 115 | } 116 | 117 | int pij_nulldist_hist_pdf(const double* restrict range,size_t nbin,double* restrict hist,size_t n1,size_t n2,size_t n) 118 | { 119 | #define CLEANUP CLEANVECD(loc)CLEANVECD(val) 120 | VECTORD *loc,*val; 121 | VECTORDF(view) vvh=VECTORDF(view_array)(hist,nbin); 122 | MATRIXDF(view) mvv; 123 | size_t i; 124 | size_t nsp; 125 | 126 | assert(n&&(n<10)); 127 | nsp=(size_t)1<<(n-1); 128 | loc=VECTORDF(alloc)(nbin*nsp); 129 | val=VECTORDF(alloc)(nbin*nsp); 130 | if(!(loc&&val)) 131 | ERRRET("Not enough memory.") 132 | 133 | //Construct bin ranges 134 | { 135 | VECTORDF(const_view) vvc=VECTORDF(const_view_array)(range,nbin+1); 136 | histogram_finer_central(&vvc.vector,loc,nsp); 137 | } 138 | 139 | mvv=MATRIXDF(view_vector)(val,1,val->size); 140 | //Calculate bin values 141 | if(pij_nulldist_calcpdf(0,n1,0,n2,loc,&mvv.matrix)) 142 | { 143 | CLEANUP 144 | return 1; 145 | } 146 | 147 | //Shrink to output 148 | VECTORDF(set_zero)(&vvh.vector); 149 | for(i=0;i. 17 | */ 18 | /* This part contains analytical calculation of the histogram of log likelihood ratio from null hypothesis. 19 | * Each function is applicable to one or more stages, which are stated in the function name as pij_nulldistX_..., where X is the applicable stage. 20 | * For each stage, different methods to calculate histogram can coexist. The method is declared in the function name as suffix: 21 | * _cdf: Calculate histogram as the difference of cdf. 22 | * This is applicable when distribution is single-variable integrable. 23 | * _pdf: Calculate histogram as the pdf mean of points evenly split within the bin. This is applicable when distribution is single-variable non-integrable. 24 | * _sim: Construct histogram by sampling. This is applicable when distribution is multi-variable non-integrable. 25 | */ 26 | 27 | #ifndef _HEADER_LIB_PIJ_NULLDIST_H_ 28 | #define _HEADER_LIB_PIJ_NULLDIST_H_ 29 | #include "../base/gsl/cdf.h" 30 | #include "../base/config.h" 31 | #include "../base/types.h" 32 | #include "../base/math.h" 33 | #ifdef __cplusplus 34 | extern "C" 35 | { 36 | #endif 37 | 38 | 39 | /************************************************************* 40 | * Generic functions for any step 41 | *************************************************************/ 42 | 43 | struct pij_nulldist_pdfs_param 44 | { 45 | size_t n1; 46 | size_t n2; 47 | }; 48 | 49 | /* Calculate the pdf p(x|n1,n2) for x=-0.5*log(1-z1/(z1+z2)), 50 | * where z1~chi2(n1), z2~chi2(n2). 51 | * p(x|n1,n2)=2*(1-exp(-2*x))^((i-2)/2)*exp(-n2*x)*Gamma((n1+n2)/2) 52 | * /(Gamma(n1/2)*Gamma(n2/2)). 53 | * loc: (nd) Locations of x to calculate p(x|n1,n2). 54 | * ans: (nd) Calculated p(x|n1,n2). 55 | * param: Parameters. See struct pij_nulldist_pdfs_param. 56 | */ 57 | void pij_nulldist_pdfs(const VECTORD* loc,VECTORD* ans,const void* param); 58 | 59 | 60 | /* Calculate the pdf p(x|i) for x=-0.5*log(1-z1_i/(z1_i+z2_i)), 61 | * where z1_i ~ chi2(i*n1c+n1d), z2_i~chi2(-i*n2c+n2d), i=0,...,nmax-1. 62 | * p(x|i)=2*(1-exp(-2*x))^((i*n1c+n1d-2)/2)*exp((i*n2c-n2d)*x)*Gamma((i*(n1c-n2c)+n1d+n2d)/2)/(Gamma((i*n1c+n1d)/2)*Gamma((-i*n2c+n2d)/2)). 63 | * Buffer is provided. 64 | * n1c, 65 | * n1d, 66 | * n2c, 67 | * n2d, 68 | * nmax: As indicated in equation. 69 | * loc: (nd) Locations of x to calculate p(x|i). 70 | * ans: (nmax,nd) Calculated p(x|i). ans[j,k]=p(loc[k]|i=nsubmin+j). 71 | * vb2: (nd) Buffer. =log(1-exp(-2x))*n1c/2+x*n2c 72 | * nd: loc->size 73 | */ 74 | void pij_nulldist_calcpdf_buffed(long n1c,size_t n1d,long n2c,size_t n2d,const VECTORD* loc,MATRIXD* ans,VECTORD* vb2); 75 | 76 | /* Calculate density histogram of null distribution based on pdf function. 77 | * This uses interpolation within each bin, similarly with pij_nulldist_nullhist_pdf. 78 | * Null distribution is for x=-0.5*log(1-z1/(z1+z2)), z1~chi2(n1), z2~chi2(n2) 79 | * nbin: Number of bins for histogram 80 | * range: (nbin+1) Histogram range 81 | * hist: (nbin) Output of histogram bins 82 | * n1, 83 | * n2: Parameters of null distribution 84 | * n: Log_2 Number of points for interpolation within each bin. 85 | */ 86 | int pij_nulldist_hist_pdf(const double* restrict range,size_t nbin,double* restrict hist,size_t n1,size_t n2,size_t n); 87 | 88 | // CDF for x=-log(1-y)/2, y=z1/(z1+z2), z1~chi2(n1), z2~chi2(n2), i.e. y~Beta(n1/2,n2/2) 89 | static inline double pij_nulldist_cdfQ(double x,const size_t n1,const size_t n2); 90 | 91 | /***************************************************** 92 | * Inline functions 93 | *****************************************************/ 94 | 95 | static inline double pij_nulldist_cdfQ(double x,const size_t n1,const size_t n2) 96 | { 97 | double x1; 98 | x1=gsl_cdf_beta_Q(-math_sf_expminusone(-2*x),(double)n1/2,(double)n2/2); 99 | assert((x1>=0)&&(x1<=1)); 100 | return x1; 101 | } 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | #ifdef __cplusplus 117 | } 118 | #endif 119 | #endif 120 | -------------------------------------------------------------------------------- /pij/nullhist.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #include "../base/config.h" 19 | #include 20 | #include 21 | #include 22 | #include "../base/gsl/histogram.h" 23 | #include "../base/logger.h" 24 | #include "../base/macros.h" 25 | #include "../base/histogram.h" 26 | #include "nulldist.h" 27 | #include "nullhist.h" 28 | 29 | 30 | 31 | gsl_histogram* pij_nullhist_single(double dmax,size_t nd,size_t n1,size_t n2) 32 | { 33 | #define CLEANUP CLEANHIST(h) 34 | struct pij_nulldist_pdfs_param param={n1,n2}; 35 | size_t nbin; 36 | gsl_histogram *h=0; 37 | 38 | assert(n1&&n2); 39 | dmax*=(1+1E-6); 40 | nbin=histogram_unequalbins_param_count(nd); 41 | if(nbin<5) 42 | ERRRETV(0,"Determined "PRINTFSIZET" bins constructed. Bin count too small.",nbin) 43 | else if(nbin<10) 44 | LOG(5,"Determined "PRINTFSIZET" bins, smaller than recommended minimum bin count (10).",nbin) 45 | else 46 | LOG(10,"Determined "PRINTFSIZET" bins.",nbin) 47 | h=gsl_histogram_alloc(nbin); 48 | if(!h) 49 | ERRRETV(0,"Not enough memory.") 50 | //Null density histogram 51 | gsl_histogram_set_ranges_uniform(h,0,dmax); 52 | //Set null histogram ranges 53 | if(histogram_unequalbins_fromnullpdfs(nbin,h->range,pij_nulldist_pdfs,¶m)) 54 | ERRRETV(0,"histogram_unequalbins_fromnullpdfs failed.") 55 | //Calculate null density histogram 56 | if(pij_nulldist_hist_pdf(h->range,nbin,h->bin,param.n1,param.n2,5)) 57 | ERRRETV(0,"pij_nulldist_hist_pdf failed.") 58 | return h; 59 | #undef CLEANUP 60 | } 61 | 62 | gsl_histogram** pij_nullhist(double dmax,size_t nv,size_t nd,long n1c,size_t n1d,long n2c,size_t n2d) 63 | { 64 | #define CLEANUP if(h){for(i=0;i=2); 71 | dmax*=(1+1E-6); 72 | CALLOCSIZE(h,nv-1); 73 | if(!h) 74 | ERRRETV(0,"Not enough memory.") 75 | nbin=histogram_unequalbins_param_count(nd); 76 | if(nbin<5) 77 | ERRRETV(0,"Determined "PRINTFSIZET" bins constructed. Bin count too small.",nbin) 78 | else if(nbin<10) 79 | LOG(5,"Determined "PRINTFSIZET" bins, smaller than recommended minimum bin count (10).",nbin) 80 | else 81 | LOG(10,"Determined "PRINTFSIZET" bins.",nbin) 82 | ret=1; 83 | for(i=0;irange,pij_nulldist_pdfs,¶m)) 95 | ERRRETV(0,"histogram_unequalbins_fromnullpdfs failed.") 96 | //Calculate null density histogram 97 | if(pij_nulldist_hist_pdf(h[i]->range,nbin,h[i]->bin,param.n1,param.n2,5)) 98 | ERRRETV(0,"pij_nulldist_hist_pdf failed.") 99 | } 100 | return h; 101 | #undef CLEANUP 102 | } 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | -------------------------------------------------------------------------------- /pij/nullhist.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | /* This part produces the histogram of parametric null distributions. 19 | */ 20 | 21 | #ifndef _HEADER_LIB_PIJ_NULLHIST_H_ 22 | #define _HEADER_LIB_PIJ_NULLHIST_H_ 23 | #include "../base/config.h" 24 | #include "../base/types.h" 25 | #include "../base/gsl/histogram.h" 26 | #ifdef __cplusplus 27 | extern "C" 28 | { 29 | #endif 30 | 31 | 32 | /* Construct one null histogram for a specific genotype value count. 33 | * The function calculates the null density histogram for random variable: 34 | * x=-0.5*log(1-z1/(z1+z2)), where z1~chi2(n1),z2~chi2(n2), 35 | * Histogram bin count and width are automatically determined 36 | * from real data count (nd). 37 | * For bin range settings, see histogram_unequalbins_fromnullcdf. 38 | * For null density histogram from pdf, see pij_nulldist_hist_pdf. 39 | * dmax: Specifies the histogram bound as [0,dmax). 40 | * nd: Count of real data to form real histograms. This is used to 41 | * automatically decide number of bins and widths. 42 | * n1, 43 | * n2: Parameters of null distribution. 44 | * Return: Constructed null distribution histograms with preset 45 | * bin ranges and values as density. 46 | */ 47 | gsl_histogram* pij_nullhist_single(double dmax,size_t nd,size_t n1,size_t n2); 48 | 49 | /* Construct multiple null histograms for different genotype value counts. 50 | * The function calculates the null density histogram for random variable: 51 | * x=-0.5*log(1-z1/(z1+z2)), where z1~chi2(i*n1c+n1d),z2~chi2(-i*n2c+n2d), 52 | * i=0,...,nv-2. Histogram bin count and width are automatically determined 53 | * from real data count (nd). 54 | * For bin range settings, see histogram_unequalbins_fromnullcdf. 55 | * For null density histogram from pdf, see pij_nulldist_hist_pdf. 56 | * dmax: Specifies the histogram bound as [0,dmax). 57 | * nv: Maximum number of values each genotype can type. Must be nv>=2. 58 | * This limits the possible values of kv in distribution, and 59 | * also output histogram count. 60 | * nd: Count of real data to form real histograms. This is used to 61 | * automatically decide number of bins and widths. 62 | * n1c, 63 | * n1d, 64 | * n2c 65 | * n2d: Parameters of null distribution. 66 | * Return: [nv-1]. Constructed null distribution histograms with preset 67 | * bin ranges and values as density. Genotypes with i values have 68 | * histogram stored in Return[i-2]. 69 | */ 70 | gsl_histogram** pij_nullhist(double dmax,size_t nv,size_t nd,long n1c,size_t n1d,long n2c,size_t n2d); 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | #ifdef __cplusplus 84 | } 85 | #endif 86 | #endif 87 | -------------------------------------------------------------------------------- /pij/rank.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | #include "../base/config.h" 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include "../base/gsl/blas.h" 24 | #include "../base/random.h" 25 | #include "../base/const.h" 26 | #include "../base/logger.h" 27 | #include "../base/macros.h" 28 | #include "../base/data_process.h" 29 | #include "../base/supernormalize.h" 30 | #include "../base/threading.h" 31 | #include "llrtopij.h" 32 | #include "llrtopv.h" 33 | #include "rank.h" 34 | 35 | /* Calculates the log likelihood ratio correlated v.s. uncorrelated models. 36 | * Uses GSL BLAS. 37 | */ 38 | static void pij_rank_llr_block(const MATRIXF* t,const MATRIXF* t2,MATRIXF* llr) 39 | { 40 | size_t i,j; 41 | size_t ng=t->size1; 42 | size_t nt=t2->size1; 43 | #ifndef NDEBUG 44 | size_t ns=t->size2; 45 | #endif 46 | assert((t2->size2==ns)); 47 | assert((llr->size1==ng)); 48 | assert((llr->size2==nt)); 49 | MATRIXFF(cov2_bounded)(t,t2,llr); 50 | MATRIXFF(mul_elements)(llr,llr); 51 | MATRIXFF(scale)(llr,-1); 52 | MATRIXFF(add_constant)(llr,1); 53 | for(i=0;isize2==t2->size2)&&(llr->size1==t->size1)&&(llr->size2==t2->size1)); 70 | #pragma omp parallel 71 | { 72 | size_t n1,n2; 73 | 74 | threading_get_startend(t->size1,&n1,&n2); 75 | if(n2>n1) 76 | { 77 | MATRIXFF(const_view) mvt=MATRIXFF(const_submatrix)(t,n1,0,n2-n1,t->size2); 78 | MATRIXFF(view) mvllr; 79 | mvllr=MATRIXFF(submatrix)(llr,n1,0,n2-n1,llr->size2); 80 | pij_rank_llr_block(&mvt.matrix,t2,&mvllr.matrix); 81 | } 82 | } 83 | } 84 | 85 | /* Converts log likelihood ratios into p-values for ranked correlation test 86 | * d: MATRIXF of any size, as input of LLRs and also output of corresponding p-values 87 | * ns: Number of samples, to be used to calculate the null distribution 88 | */ 89 | static inline void pij_rank_llrtopv(MATRIXF* d,size_t ns) 90 | { 91 | assert(ns>2); 92 | pij_llrtopvm(d,1,ns-2); 93 | } 94 | 95 | int pij_rank_pv(const MATRIXF* t,const MATRIXF* t2,MATRIXF* p,size_t memlimit) 96 | { 97 | #define CLEANUP CLEANMATF(tnew)CLEANMATF(tnew2) 98 | MATRIXF *tnew,*tnew2; //(nt,ns) Supernormalized transcript matrix 99 | int ret; 100 | size_t ng,nt,ns; 101 | 102 | ng=t->size1; 103 | nt=t2->size1; 104 | ns=t->size2; 105 | 106 | tnew=tnew2=0; 107 | 108 | //Validation 109 | assert((t2->size2==ns)&&(p->size1==ng)&&(p->size2==nt)&&memlimit); 110 | if(ns<3) 111 | ERRRET("Needs at least 3 samples to compute p-values.") 112 | 113 | { 114 | size_t mem; 115 | mem=(2*t->size1*t->size2+2*t2->size1*t2->size2+p->size1*p->size2)*FTYPEBITS/8; 116 | if(memlimit<=mem) 117 | ERRRET("Memory limit lower than minimum memory needed. Try increasing your memory usage limit.") 118 | LOG(10,"Memory limit: %lu bytes.",memlimit) 119 | } 120 | 121 | tnew=MATRIXFF(alloc)(ng,ns); 122 | tnew2=MATRIXFF(alloc)(nt,ns); 123 | if(!(tnew&&tnew2)) 124 | ERRRET("Not enough memory.") 125 | 126 | //Step 1: Supernormalization 127 | LOG(9,"Supernormalizing...") 128 | MATRIXFF(memcpy)(tnew,t); 129 | ret=supernormalizea_byrow(tnew); 130 | MATRIXFF(memcpy)(tnew2,t2); 131 | ret=ret||supernormalizea_byrow(tnew2); 132 | if(ret) 133 | ERRRET("Supernormalization failed.") 134 | 135 | //Step 2: Log likelihood ratios from nonpermuted data 136 | LOG(9,"Calculating real log likelihood ratios...") 137 | pij_rank_llr(tnew,tnew2,p); 138 | //Step 3: Convert log likelihood ratios to probabilities 139 | LOG(9,"Converting likelihood ratios into p-values...") 140 | pij_rank_llrtopv(p,ns); 141 | 142 | //Cleanup 143 | CLEANUP 144 | return 0; 145 | #undef CLEANUP 146 | } 147 | 148 | /* Convert LLR into probabilities per A. Uses pij_llrtopij_convert. 149 | * ans: (ng,nt) Source real LLRs to compare with null LLRs, 150 | * also output location of converted probabilities. 151 | * ns: Number of samples, used for calculation of null distribution 152 | * nodiag: Whether diagonal elements of d should be ignored when converting 153 | * to probabilities 154 | * nodiagshift: Offdiagonal shift. 155 | * For nodiagshift>0/<0, use upper/lower diagonals of corresponding id. 156 | * Return: 0 if succeed. 157 | */ 158 | static int pij_rank_llrtopij(MATRIXF* ans,size_t ns,char nodiag,long nodiagshift) 159 | { 160 | LOG(9,"Converting LLR to probabilities on per A basis.") 161 | if(ns<=2) 162 | { 163 | LOG(0,"Needs at least 3 samples to compute probabilities.") 164 | return 1; 165 | } 166 | return pij_llrtopij_convert_single_self(ans,1,ns-2,nodiag,nodiagshift); 167 | } 168 | 169 | int pij_rank(const MATRIXF* t,const MATRIXF* t2,MATRIXF* p,char nodiag,size_t memlimit) 170 | { 171 | #define CLEANUP CLEANMATF(tnew)CLEANMATF(tnew2) 172 | MATRIXF *tnew,*tnew2; //(nt,ns) Supernormalized transcript matrix 173 | VECTORFF(view) vv; 174 | int ret; 175 | size_t ng,nt,ns; 176 | 177 | ng=t->size1; 178 | nt=t2->size1; 179 | ns=t->size2; 180 | 181 | tnew=tnew2=0; 182 | 183 | //Validation 184 | assert((t2->size2==ns)&&(p->size1==ng)&&(p->size2==nt)&&memlimit); 185 | 186 | if(ns<=2) 187 | ERRRET("Needs at least 3 samples to compute probabilities.") 188 | { 189 | size_t mem; 190 | mem=(2*t->size1*t->size2+2*t2->size1*t2->size2+p->size1*p->size2)*FTYPEBITS/8; 191 | if(memlimit<=mem) 192 | ERRRET("Memory limit lower than minimum memory needed. Try increasing your memory usage limit.") 193 | LOG(10,"Memory limit: %lu bytes.",memlimit) 194 | } 195 | 196 | tnew=MATRIXFF(alloc)(ng,ns); 197 | tnew2=MATRIXFF(alloc)(nt,ns); 198 | if(!(tnew&&tnew2)) 199 | ERRRET("Not enough memory.") 200 | 201 | //Check for identical rows in input data 202 | { 203 | VECTORFF(view) vbuff1=MATRIXFF(column)(tnew,0); 204 | VECTORFF(view) vbuff2=MATRIXFF(row)(tnew2,0); 205 | MATRIXFF(cmprow)(t,t2,&vbuff1.vector,&vbuff2.vector,nodiag,1); 206 | } 207 | 208 | //Step 1: Supernormalization 209 | LOG(9,"Supernormalizing...") 210 | MATRIXFF(memcpy)(tnew,t); 211 | ret=supernormalizea_byrow(tnew); 212 | MATRIXFF(memcpy)(tnew2,t2); 213 | ret=ret||supernormalizea_byrow(tnew2); 214 | if(ret) 215 | ERRRET("Supernormalization failed.") 216 | 217 | //Step 2: Log likelihood ratios from nonpermuted data 218 | LOG(9,"Calculating real log likelihood ratios...") 219 | pij_rank_llr(tnew,tnew2,p); 220 | if(nodiag) 221 | { 222 | vv=MATRIXFF(diagonal)(p); 223 | VECTORFF(set_zero)(&vv.vector); 224 | } 225 | //Step 3: Convert log likelihood ratios to probabilities 226 | if((ret=pij_rank_llrtopij(p,ns,nodiag,0))) 227 | LOG(1,"Failed to convert log likelihood ratios to probabilities.") 228 | 229 | //Cleanup 230 | CLEANUP 231 | return ret; 232 | #undef CLEANUP 233 | } 234 | -------------------------------------------------------------------------------- /pij/rank.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2016-2018, 2020 Lingfei Wang 2 | * 3 | * This file is part of Findr. 4 | * 5 | * Findr is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Affero General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Findr is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Affero General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Affero General Public License 16 | * along with Findr. If not, see . 17 | */ 18 | /* This file contains rank-based pij prediction without genotype information. 19 | * Input expression data are first supernormalized so only rank information 20 | * remains. Then different prediction method can be applied for pij. 21 | * 22 | * Currently only one method is provided. It first calculates the log likelihood 23 | * ratio (LLR) between null A B and alternative A---B hypotheses. The LLR 24 | * is then converted into probability of alternative hypothesis per A. 25 | * The probability is regarded as pij. This is in function pij_rank_a. 26 | */ 27 | 28 | #ifndef _HEADER_LIB_PIJ_RANK_H_ 29 | #define _HEADER_LIB_PIJ_RANK_H_ 30 | #include "../base/config.h" 31 | #include "../base/types.h" 32 | #ifdef __cplusplus 33 | extern "C" 34 | { 35 | #endif 36 | 37 | /* Calculate p-values of A B against A--B based on LLR distributions of real data 38 | * and null hypothesis. 39 | * t: (ng,ns) Expression data for A 40 | * t2: (nt,ns) Expression data for B 41 | * p: (ng,nt) Output for p-values of A--B is false 42 | * memlimit:Specifies approximate memory usage. Function can fail if memlimit is too small. For unlimited memory, set memlimit=-1. 43 | * Return: 0 if succeed. 44 | */ 45 | int pij_rank_pv(const MATRIXF* t,const MATRIXF* t2,MATRIXF* p,size_t memlimit); 46 | 47 | /* Calculate probabilities of A--B based on LLR distributions of real data 48 | * and null hypothesis. 49 | * t: (ng,ns) Expression data for A 50 | * t2: (nt,ns) Expression data for B 51 | * p: (ng,nt) Output for probabilities A--B is true 52 | * nodiag: When the top ng rows of t2 is exactly t, diagonals of pij are meaningless. 53 | * In this case, set nodiag to 1 to avoid inclusion of NANs. For nodiag=0, t and t2 54 | * should not have any identical genes. 55 | * memlimit:Specifies approximate memory usage. Function can fail if memlimit is too small. For large dataset, memory usage will be reduced by spliting t into smaller chunks and infer separately. For unlimited memory, set memlimit=-1. 56 | * Return: 0 if succeed. 57 | */ 58 | int pij_rank(const MATRIXF* t,const MATRIXF* t2,MATRIXF* p,char nodiag,size_t memlimit); 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | #ifdef __cplusplus 75 | } 76 | #endif 77 | #endif 78 | --------------------------------------------------------------------------------