├── COPYRIGHT
├── LICENSE
├── Makefile
├── README
├── UPDATES
├── base
├── config.h
├── const.h
├── data_process.c
├── data_process.h
├── data_struct.h
├── data_struct_heap.c
├── data_struct_heap.h
├── data_struct_ll.c
├── data_struct_ll.h
├── general_alg.h
├── gsl
│ ├── blas.h
│ ├── cdf.h
│ ├── errno.h
│ ├── histogram.h
│ ├── math.h
│ ├── matrix.h
│ ├── permutation.h
│ ├── randist.h
│ ├── rng.h
│ ├── sf.h
│ ├── sort.h
│ ├── statistics.h
│ └── vector.h
├── histogram.c
├── histogram.h
├── lib.c
├── lib.h
├── logger.c
├── logger.h
├── macros.h
├── math.c
├── math.h
├── os.h
├── random.c
├── random.h
├── supernormalize.c
├── supernormalize.h
├── threading.h
└── types.h
├── cycle
├── cycle.h
├── vg.c
└── vg.h
├── doc.pdf
├── external
└── R.c
├── netr
├── one.c
└── one.h
└── pij
├── cassist
├── cassist.c
├── cassist.h
├── llr.c
├── llr.h
├── llrtopij.c
├── llrtopij.h
└── llrtopv.h
├── gassist
├── gassist.c
├── gassist.h
├── llr.c
├── llr.h
├── llrtopij.c
├── llrtopij.h
├── llrtopv.c
├── llrtopv.h
├── nullhist.c
└── nullhist.h
├── llrtopij.c
├── llrtopij.h
├── llrtopv.c
├── llrtopv.h
├── nulldist.c
├── nulldist.h
├── nullhist.c
├── nullhist.h
├── rank.c
└── rank.h
/COPYRIGHT:
--------------------------------------------------------------------------------
1 | Copyright 2016-2018 Lingfei Wang
2 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | FTYPEBITS=32
2 | GTYPEBITS=8
3 | LIB_NAME=findr
4 | LIB_NAMEFULL="Fast Inference of Networks from Directed Regulations"
5 | LIB_FNAME=lib$(LIB_NAME).so
6 | AUTHOR="Lingfei Wang"
7 | AUTHOR_EMAIL="Lingfei.Wang.github@outlook.com"
8 | URL_LIB="https://github.com/lingfeiwang/findr"
9 | URL_BIN="https://github.com/lingfeiwang/findr-bin"
10 | URL_PYTHON="https://github.com/lingfeiwang/findr-python"
11 | URL_R="https://github.com/lingfeiwang/findr-R"
12 | URL_DOC="https://github.com/lingfeiwang/findr/blob/master/doc.pdf"
13 | URL_LIB_REL="$(URL_LIB)/releases"
14 | URL_BIN_REL="$(URL_BIN)/releases"
15 | URL_R_REL="$(URL_R)/releases"
16 | VERSION1=1
17 | VERSION2=0
18 | VERSION3=8
19 | LICENSE=AGPL-3
20 | LICENSE_FULL="GNU Affero General Public License, Version 3"
21 | LICENSE_URL="https://www.gnu.org/licenses/agpl-3.0"
22 | ifdef INCLUDE_MAKEFILE_BEFORE
23 | #Input package info here
24 | include $(INCLUDE_MAKEFILE_BEFORE)
25 | endif
26 | include Makefile.flags
27 | ifndef LIB_FNAME
28 | LIB_FNAME=lib
29 | endif
30 | ifndef PREFIX
31 | PREFIX=/usr/local
32 | endif
33 | ifndef DIR_BUILD
34 | DIR_BUILD=.
35 | endif
36 | ifndef DIR_SRC
37 | DIR_SRC=.
38 | endif
39 | DIR_INSTALL_PREFIX=$(PREFIX)
40 | DIR_INSTALL_LIB=$(DIR_INSTALL_PREFIX)/lib
41 | DIR_INSTALL_INC0=$(DIR_INSTALL_PREFIX)/include
42 | DIR_INSTALL_INC=$(DIR_INSTALL_INC0)/$(LIB_NAME)
43 |
44 | CC=gcc
45 | CFLAGSI=$(addprefix -I ,. $(R_INCLUDE_DIR) $(PREFIX)/include /usr/local/include)
46 | F90C=gfortran
47 | F90FLAGS=-fPIC -fdefault-real-8 -ffixed-form -O3
48 | LD=gcc
49 | #INSTALL=install
50 | OPTFLAGS=-O3 -DNDEBUG=1 -DGSL_RANGE_CHECK_OFF=1 -DHAVE_INLINE=1
51 |
52 | LIB_CONFIG=base/config_auto.h
53 | LIB_C=$(wildcard $(DIR_SRC)/*/*.c) $(wildcard $(DIR_SRC)/*/*/*.c)
54 | LIB_C_B=$(basename $(LIB_C))
55 | LIB_F90=$(wildcard $(DIR_SRC)/*/*.f90) $(wildcard $(DIR_SRC)/*/*/*.f90)
56 | LIB_F90_B=$(basename $(LIB_F90))
57 | LIB_H=$(wildcard $(DIR_SRC)/*/*.h) $(wildcard $(DIR_SRC)/*/*/*.h) $(LIB_CONFIG)
58 | LIB_H_B=$(basename $(LIB_H))
59 | LIB_O_C=$(addsuffix .o,$(LIB_C_B))
60 | LIB_O_F90=$(addsuffix .o,$(LIB_F90_B))
61 | LIB_O=$(LIB_O_C) $(LIB_O_F90)
62 | LIB_PRODUCT=$(LIB_O)
63 | LIB_DPRODUCT=$(DIR_BUILD)/$(LIB_FNAME)
64 | INC_DPRODUCT=$(LIB_CONFIG)
65 | INC_INSTALL_FILES=$(LIB_H)
66 | INC_INSTALL_DIRS=$(dir $(LIB_H))
67 | LIB_UNINSTALL=$(addprefix $(DIR_INSTALL_LIB)/,$(notdir $(LIB_DPRODUCT)))
68 | INC_UNINSTALL=$(DIR_INSTALL_INC)
69 | PKGCONFIG=$(LIB_NAME).pc
70 | PKGCONFIG_UNINSTALL=$(DIR_INSTALL_LIB)/pkgconfig/$(LIB_NAME).pc
71 |
72 | .PHONY: all clean distclean install-lib install-inc install uninstall
73 |
74 | all: $(LIB_DPRODUCT) $(PKGCONFIG)
75 |
76 | $(PKGCONFIG):
77 | @echo "prefix=$(DIR_INSTALL_PREFIX)" > $@
78 | @echo "exec_prefix=$(DIR_INSTALL_PREFIX)" >> $@
79 | @echo "libdir=$(DIR_INSTALL_LIB)" >> $@
80 | @echo "includedir=$(DIR_INSTALL_INC0)" >> $@
81 | @echo >> $@
82 | @echo "Name: $(LIB_NAME)" >> $@
83 | @echo "Description: Fast Inference of Networks from Directed Regulations" >> $@
84 | @echo "Version: $(VERSION1).$(VERSION2).$(VERSION3)" >> $@
85 | @echo "Libs: -L$(DIR_INSTALL_LIB) -l$(LIB_NAME) -lgsl" >> $@
86 | @echo "Cflags: -I$(DIR_INSTALL_INC0)" >> $@
87 |
88 | $(LIB_CONFIG):
89 | @echo "#ifndef _HEADER_LIB_CONFIG_AUTO_H_" > $@
90 | @echo "#define _HEADER_LIB_CONFIG_AUTO_H_" >> $@
91 | @echo "#define FTYPEBITS $(FTYPEBITS)" >> $@
92 | @echo "#define GTYPEBITS $(GTYPEBITS)" >> $@
93 | @echo "#define LIB_NAME $(LIB_NAME)" >> $@
94 | @echo "#define VERSION1 $(VERSION1)" >> $@
95 | @echo "#define VERSION2 $(VERSION2)" >> $@
96 | @echo "#define VERSION3 $(VERSION3)" >> $@
97 | @if [ -n "$(DIR_SRC_GSL)" ]; then \
98 | echo "#define LIBGSL_LOCAL $(LIBGSL_LOCAL)" >> $@; \
99 | fi
100 | @echo "#endif" >> $@
101 |
102 | $(DIR_BUILD):
103 | mkdir -p $@
104 |
105 | $(LIB_O_C): $(LIB_CONFIG)
106 |
107 | $(LIB_O_F90):
108 | $(F90C) -o $@ -c $(F90FLAGS) $(addsuffix .f90,$(basename $@))
109 |
110 | $(LIB_DPRODUCT): $(LIB_PRODUCT) $(DIR_BUILD)
111 | $(LD) -o $@ $(LIB_PRODUCT) $(LDFLAGS)
112 |
113 | clean:
114 | $(RM) $(LIB_PRODUCT)
115 |
116 | distclean: clean
117 | $(RM) $(LIB_DPRODUCT) $(PKGCONFIG) $(LIB_CONFIG) Makefile.flags $(TMP_FILE)
118 |
119 | install-lib: SHELL:=/bin/bash
120 | install-lib: all
121 | umask 0022 && mkdir -p $(DIR_INSTALL_LIB) && \
122 | cp $(LIB_DPRODUCT) $(DIR_INSTALL_LIB)/ && \
123 | chmod 0755 $(DIR_INSTALL_LIB)/$(notdir $(LIB_DPRODUCT)) && \
124 | ldconfig $(DIR_INSTALL_LIB) || true
125 |
126 | install-inc: SHELL:=/bin/bash
127 | install-inc: $(LIB_CONFIG)
128 | umask 0022 && mkdir -p $(DIR_INSTALL_INC) && \
129 | for dname in $(INC_INSTALL_DIRS); do \
130 | mkdir -p $(DIR_INSTALL_INC)/$$dname || exit 1; \
131 | done
132 | # Then Files
133 | umask 0022 && for fname in $(INC_INSTALL_FILES); do \
134 | cp $$fname $(DIR_INSTALL_INC)/$$fname || exit 1; \
135 | chmod 0644 $(DIR_INSTALL_INC)/$$fname || exit 1; \
136 | done
137 |
138 | install-pkgconfig: $(PKGCONFIG)
139 | umask 0022 && mkdir -p $(DIR_INSTALL_LIB)/pkgconfig && \
140 | cp $< $(DIR_INSTALL_LIB)/pkgconfig/
141 | chmod 0644 $(DIR_INSTALL_LIB)/pkgconfig/$(notdir $<)
142 |
143 | install: install-lib install-inc install-pkgconfig
144 |
145 | uninstall:
146 | $(RM) -R $(LIB_UNINSTALL) $(INC_UNINSTALL) $(PKGCONFIG_UNINSTALL)
147 |
148 | TMP_FILE=.tmp
149 | Makefile.flags:
150 | @echo "Testing gcc"
151 | if ! $(CC) --version > /dev/null 2>&1; then echo "GCC not found. Please download the latest GCC or specify its location in CC variable in Makefile."; exit 1; fi
152 | gver="$$($(CC) --version)"; \
153 | t1=$$(echo "$$gver" | grep -io gcc); \
154 | if ! [ -n "$$t1" ]; then echo "Invalid GCC version. Please download the latest GCC."; exit 1; fi
155 | cflags="$(CFLAGS) $(CFLAGS_EXTRA) $(CFLAGSI) -fopenmp -ggdb -fPIC -Wall -Wextra -Wconversion -Wsign-conversion -Wundef -Wendif-labels -std=c99 -pedantic-errors $(OPTFLAGS)"; \
156 | ldflags="$(LDFLAGS) $(LDFLAGS_EXTRA) -L $(PREFIX)/lib -L /usr/local/lib -L /usr/lib -fopenmp -lm -shared -lc"; \
157 | echo "Testing test method"; \
158 | if ! $(LD) $$ldflags -o $(TMP_FILE) > /dev/null 2>&1; then \
159 | echo "Linking with default flags failed."; exit 1; fi; \
160 | echo "Testing gfortran"; \
161 | $(LD) $$ldflags -lgfortran -o $(TMP_FILE) > /dev/null 2>&1 && \
162 | ldflags="$$ldflags -lgfortran"; \
163 | echo "Testing local GSL" ; \
164 | if [ -n "$(DIR_SRC_GSL)" ] ; then \
165 | echo "Testing -Wl,--whole-archive" ; \
166 | ldflags2="$(DIR_SRC_GSL)/.libs/libgsl.a $(DIR_SRC_GSL)/cblas/.libs/libgslcblas.a"; \
167 | $(LD) $$ldflags "-Wl,--whole-archive $$ldflags2 -Wl,--no-whole-archive" --shared -o $(TMP_FILE) > /dev/null 2>&1 && \
168 | ldflags2="-Wl,--whole-archive $$ldflags2 -Wl,--no-whole-archive"; \
169 | if ! $(LD) $$ldflags $$ldflags2 --shared -o $(TMP_FILE) > /dev/null 2>&1; then \
170 | echo "Can't link to embedded GSL with right flag." ; exit 1; fi; \
171 | cflags="-I $(DIR_SRC_GSL) $$cflags" ; \
172 | ldflags="$$ldflags $$ldflags2" ; \
173 | else \
174 | ldflags="$$ldflags -lgsl -lgslcblas"; \
175 | if ! $(LD) $$ldflags --shared -o $(TMP_FILE) > /dev/null 2>&1; then \
176 | echo "Link to installed GSL failed."; exit 1; fi; \
177 | fi ; \
178 | echo "Testing -Wl,--no-as-needed" ; \
179 | $(LD) -Wl,--no-as-needed $$ldflags --shared -o $(TMP_FILE) > /dev/null 2>&1 && \
180 | ldflags="-Wl,--no-as-needed $$ldflags"; \
181 | echo "CFLAGS=$$cflags" > $@ && \
182 | echo "LDFLAGS=$$ldflags" >> $@ && \
183 | $(RM) $(TMP_FILE)
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 | ifdef INCLUDE_MAKEFILE_AFTER
192 | include $(INCLUDE_MAKEFILE_AFTER)
193 | endif
194 |
195 |
196 |
--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
1 | Findr (Fast Inference of Networks from Directed Regulations) is a statistical inference tool for genetics. It predicts the probability of pairwise gene regulation probability based on gene expression level data. When genotype data is available for the best eQTLs, higher prediction accuracy can be achieved. The pairwise regulation probability is then applied for the reconstruction of gene regulation networks.
2 |
3 | Findr obtains much higher accuracy and faster speed than existing programs [1]. This is due to the analytical and implementational advances. Findr's unprecedented speed allows for whole-transcriptome causal network reconstruction, with a tutorial in [2]. Findr library can be downloaded from [3].
4 |
5 | This package is the C implementation of Findr library. It requires recent builds of GCC, GNU make, and GNU Scientific Library (GSL). Users can use the provided binary and python interfaces, or R package to interact with Findr library to perform calculations, or write one's own program and call Findr. The binary, python, and R entry points can be downloaded from [4], [5], [6] respectively. On Windows, we recommend building and running Findr on "Bash on Windows" [7], rather than building everything natively from scratch.
6 |
7 | A more detailed documentation of Findr can be found as doc.pdf.
8 |
9 | [1] Lingfei Wang and Tom Michoel (2017) Efficient and accurate causal inference with hidden confounders from genome-transcriptome variation data. PLOS Computational Biology 13(8): e1005703. https://doi.org/10.1371/journal.pcbi.1005703
10 | [2] Lingfei Wang and Tom Michoel (2017) Whole-transcriptome causal network inference with genomic and transcriptomic data. bioRxiv 213371. https://doi.org/10.1101/213371
11 | [3] https://github.com/lingfeiwang/findr
12 | [4] https://github.com/lingfeiwang/findr-bin
13 | [5] https://github.com/lingfeiwang/findr-python
14 | [6] https://github.com/lingfeiwang/findr-R
15 | [7] https://msdn.microsoft.com/commandline/wsl/about
16 |
--------------------------------------------------------------------------------
/UPDATES:
--------------------------------------------------------------------------------
1 | 1.0.8:
2 | Reversed compatibility warning due to lack of schedule for version 2.
3 | 1.0.7:
4 | Added compatibility warning for interface changes in future versions.
5 | 1.0.6:
6 | Corrected a bug that may produce biased output in pij_gassist, pij_gassist_trad, and pijs_gassist when nodiag is set or when memlimit is small so computation is split into chunks.
7 | Now setting histogram bounds based on the maximum of all LLRs (as opposed to the maximum of the chunk when memlimit is small) in pij_gassist, pij_gassist_trad, and pijs_gassist. This ensures the output is independent of memlimit (related to question from sritchie73@github).
8 | Added sanity checks for agreement between input data and nodiag flag for pij functions excluding _pv (suggested by sritchie73@github).
9 | Lots of internal function renaming and code restructuring.
10 | Removed some unneeded files and functions.
11 | Now support pkg-config setup.
12 | 1.0.5:
13 | Updated Makefiles to account for different make environments (reported by sritchie73@github).
14 | 1.0.1:
15 | Bug correction:
16 | Updated LDFLAGS for R interface (reported by audreyqyfu@github).
17 | 1.0.0:
18 | New functions:
19 | Included P-value computation for 4 tests in pijs_gassist_pv and correlation test in pij_rank_pv.
20 | Included using continuous anchors (instead of discrete genotypes) for causal inference in pij(s)_cassist*.
21 | Revisions:
22 | Minor adjustments on adding fluctuations for supernormalized data when the number of samples is small (<30).
23 | Updated logging.
24 | Minor adjustments in conversion from LLR to local FDR.
25 | Minor bug corrections on defensive programming.
26 | 0.5.0:
27 | New function: reconstruction of directed acyclic graph from prior information of edge significance. For details, see library function netr_one_greedy, doc.pdf, or UPDATES in any interface.
28 | 0.4.1:
29 | Extreme situation behavior:
30 | Added error checking for few samples (<4).
31 | Added special consideration for extremely skewed LLR during local FDR estimation.
32 | Updated copyright notice.
33 | 0.4.0:
34 | Interface changes:
35 | Removed trailing _a's for all interface function names.
36 | Removed _tot functions. (See Obsoletes.)
37 | Binary interface has an extra parameter specifying memory usage limit.
38 | Python interface has an optional parameter specifying memory usage limit.
39 | Improvements:
40 | We have introduced the function pij_gassist_trad for traditional inference test.
41 | We have parallelized the remaining part of major computation and obtained even faster speed.
42 | We have modified functions to perform inplace operations as much as possible to save memory.
43 | We have introduced memory usage limit as a parameter (except R interface). Large datasets will be automatically split before calculation to keep memory usage under limit.
44 | We have included p1 calculation for nodiag=1 case.
45 | Better input validity checks for interfaces.
46 | Bug corrections.
47 | Obsoletes:
48 | In order to reduce effort in improving and maintaining the package, we decided to obsolete several non-essential functions. They include:
49 | pij(s)_*_tot functions: they will be hardly needed as more genes can be measured by advancing technologies.
50 | Windows native support: The latest version of Windows has provided support for Ubuntu Bash. All interfaces should integrate better with Bash on Windows. (https://msdn.microsoft.com/commandline/wsl)
51 |
52 | 0.3.0:
53 | Function pijs_gassist_*: changed function definition to provide latest test statistics.
54 | Function pij_gassist_*: added new funciton for recommended combination of tests.
55 |
--------------------------------------------------------------------------------
/base/config.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | //This is the general configuration header
19 |
20 | #ifndef _HEADER_LIB_CONFIG_H_
21 | #define _HEADER_LIB_CONFIG_H_
22 | #include "config_auto.h"
23 | #ifdef LIBEXTENSION_R
24 | #include
25 | #define logprintf REprintf
26 | #define logvprintf REvprintf
27 | #ifdef NDEBUG
28 | #undef NDEBUG
29 | #endif
30 | #ifdef GSL_RANGE_CHECK_OFF
31 | #undef GSL_RANGE_CHECK_OFF
32 | #endif
33 | #ifdef HAVE_INLINE
34 | #undef HAVE_INLINE
35 | #endif
36 | #define NDEBUG 1
37 | #define GSL_RANGE_CHECK_OFF 1
38 | #define HAVE_INLINE 1
39 | #else
40 | #define logprintf(...) fprintf(stderr,__VA_ARGS__)
41 | #define logvprintf(...) vfprintf(stderr,__VA_ARGS__)
42 | #endif
43 | #endif
44 |
--------------------------------------------------------------------------------
/base/const.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | /* This file contains the constants defined.
19 | */
20 |
21 | #ifndef _HEADER_LIB_CONST_H_
22 | #define _HEADER_LIB_CONST_H_
23 |
24 | //Maximum number of values for genotypes
25 | #define CONST_NV_MAX 128
26 | //Minimum number of values for genotypes
27 | #define CONST_NV_MIN 2
28 | #endif
29 |
--------------------------------------------------------------------------------
/base/data_struct.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | /* This lib contains definitions of data structures
19 | */
20 |
21 | #ifndef _HEADER_LIB_DATA_STRUCT_H_
22 | #define _HEADER_LIB_DATA_STRUCT_H_
23 | #include "config.h"
24 | #include "data_struct_ll.h"
25 | #include "data_struct_heap.h"
26 |
27 | #endif
28 |
--------------------------------------------------------------------------------
/base/data_struct_heap.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #include "config.h"
19 | #include
20 | #include "logger.h"
21 | #include "macros.h"
22 | #include "data_struct_heap.h"
23 |
24 | int data_heap_init(struct data_heap* h,size_t nmax)
25 | {
26 | h->nmax=nmax;
27 | h->n=0;
28 | MALLOCSIZE(h->d,nmax);
29 | return !h->d;
30 | }
31 |
32 | void data_heap_free(struct data_heap* h)
33 | {
34 | h->nmax=0;
35 | if(h->d)
36 | free(h->d);
37 | h->d=0;
38 | }
39 |
40 | void data_heap_empty(struct data_heap* h)
41 | {
42 | h->n=0;
43 | }
44 |
45 | int data_heap_push(struct data_heap* h, HTYPE d)
46 | {
47 | size_t c,p;
48 |
49 | if(h->nmax==h->n)
50 | {
51 | LOG(10,"Heap push failed: heap full.")
52 | return 1;
53 | }
54 | h->d[h->n]=d;
55 | c=h->n++;
56 | while(c)
57 | {
58 | p=(c-1)/2;
59 | if(d>h->d[p])
60 | return 0;
61 | h->d[c]=h->d[p];
62 | h->d[p]=d;
63 | c=p;
64 | }
65 | return 0;
66 | }
67 |
68 | HTYPE data_heap_pop(struct data_heap* h)
69 | {
70 | size_t p,c2,cm,pn;
71 | HTYPE v,ret;
72 |
73 | assert(h->n);
74 | ret=h->d[0];
75 | h->d[0]=h->d[--(h->n)];
76 | if(!h->n)
77 | return ret;
78 | p=0;
79 | pn=h->n/2;
80 | while(p+1d[c2]d[cm])
85 | cm=c2;
86 | if(h->d[p]d[cm])
87 | return ret;
88 | v=h->d[p];
89 | h->d[p]=h->d[cm];
90 | h->d[cm]=v;
91 | p=cm;
92 | }
93 | if(p+1==pn)
94 | {
95 | cm=2*p+1;
96 | c2=cm+1;
97 | if((c2n)&&(h->d[c2]d[cm]))
98 | cm=c2;
99 | if(h->d[p]d[cm])
100 | return ret;
101 | v=h->d[p];
102 | h->d[p]=h->d[cm];
103 | h->d[cm]=v;
104 | }
105 | return ret;
106 | }
107 |
108 | int data_heapdec_push(struct data_heapdec* h, HTYPE d)
109 | {
110 | size_t c,p;
111 |
112 | if(h->nmax==h->n)
113 | {
114 | LOG(10,"Heap push failed: heap full.")
115 | return 1;
116 | }
117 | h->d[h->n]=d;
118 | c=h->n++;
119 | while(c)
120 | {
121 | p=(c-1)/2;
122 | if(dd[p])
123 | return 0;
124 | h->d[c]=h->d[p];
125 | h->d[p]=d;
126 | c=p;
127 | }
128 | return 0;
129 | }
130 |
131 | HTYPE data_heapdec_pop(struct data_heapdec* h)
132 | {
133 | size_t p,c2,cm,pn;
134 | HTYPE v,ret;
135 |
136 | assert(h->n);
137 | ret=h->d[0];
138 | h->d[0]=h->d[--(h->n)];
139 | if(!h->n)
140 | return ret;
141 | p=0;
142 | pn=h->n/2;
143 | while(p+1d[c2]>h->d[cm])
148 | cm=c2;
149 | if(h->d[p]>h->d[cm])
150 | return ret;
151 | v=h->d[p];
152 | h->d[p]=h->d[cm];
153 | h->d[cm]=v;
154 | p=cm;
155 | }
156 | if(p+1==pn)
157 | {
158 | cm=2*p+1;
159 | c2=cm+1;
160 | if((c2n)&&(h->d[c2]>h->d[cm]))
161 | cm=c2;
162 | if(h->d[p]>h->d[cm])
163 | return ret;
164 | v=h->d[p];
165 | h->d[p]=h->d[cm];
166 | h->d[cm]=v;
167 | }
168 | return ret;
169 | }
170 |
171 |
172 |
173 |
--------------------------------------------------------------------------------
/base/data_struct_heap.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | /* This lib contains definition of heap data structure
19 | */
20 |
21 | #ifndef _HEADER_LIB_DATA_STRUCT_HEAP_H_
22 | #define _HEADER_LIB_DATA_STRUCT_HEAP_H_
23 | #include "config.h"
24 | #include
25 | #include
26 |
27 | #ifdef __cplusplus
28 | extern "C"
29 | {
30 | #endif
31 |
32 |
33 | #define HTYPE size_t
34 |
35 | //Incremental heap
36 | struct data_heap
37 | {
38 | size_t nmax;
39 | size_t n;
40 | HTYPE* restrict d;
41 | };
42 |
43 | int data_heap_init(struct data_heap* h,size_t nmax);
44 | void data_heap_free(struct data_heap* h);
45 | void data_heap_empty(struct data_heap* h);
46 | int data_heap_push(struct data_heap* h, HTYPE d);
47 | HTYPE data_heap_pop(struct data_heap* h);
48 | // int data_heap_popto(struct data_heap* h, HTYPE* d);
49 | static inline HTYPE data_heap_get(const struct data_heap* h,size_t n);
50 | static inline HTYPE data_heap_top(const struct data_heap* h);
51 |
52 | //Decremental heap
53 | #define data_heapdec data_heap
54 | #define data_heapdec_init data_heap_init
55 | #define data_heapdec_free data_heap_free
56 | #define data_heapdec_empty data_heap_empty
57 | int data_heapdec_push(struct data_heapdec* h, HTYPE d);
58 | HTYPE data_heapdec_pop(struct data_heapdec* h);
59 | #define data_heapdec_get data_heap_get
60 | #define data_heapdec_top data_heap_top
61 |
62 |
63 | static inline HTYPE data_heap_get(const struct data_heap* h,size_t n)
64 | {
65 | assert(h->n>n);
66 | return h->d[n];
67 | }
68 |
69 | static inline HTYPE data_heap_top(const struct data_heap* h)
70 | {
71 | return data_heap_get(h,0);
72 | }
73 |
74 | #ifdef __cplusplus
75 | }
76 | #endif
77 | #endif
78 |
--------------------------------------------------------------------------------
/base/data_struct_ll.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #include "config.h"
19 | #include
20 | #include "macros.h"
21 | #include "data_struct_ll.h"
22 |
23 | int data_ll_init(struct data_ll* ll,size_t nmax)
24 | {
25 | assert(ll);
26 | ll->nmax=nmax;
27 | ll->n=0;
28 | MALLOCSIZE(ll->d,2*nmax);
29 | if(!ll->d)
30 | {
31 | LOG(1,"Not enough memory.")
32 | return 1;
33 | }
34 | memset(ll->d,-1,2*nmax*sizeof(*ll->d));
35 | return 0;
36 | }
37 |
38 | void data_ll_free(struct data_ll* ll)
39 | {
40 | assert(ll);
41 | if(ll->d)
42 | {
43 | free(ll->d);
44 | ll->d=0;
45 | }
46 | ll->nmax=0;
47 | }
48 |
49 | void data_ll_empty(struct data_ll* ll)
50 | {
51 | assert(ll);
52 | ll->n=0;
53 | memset(ll->d,-1,2*ll->nmax*sizeof(*ll->d));
54 | }
55 |
56 |
57 |
58 |
--------------------------------------------------------------------------------
/base/data_struct_ll.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | /* This lib contains definition of linked list data structure
19 | */
20 |
21 | #ifndef _HEADER_LIB_DATA_STRUCT_LL_H_
22 | #define _HEADER_LIB_DATA_STRUCT_LL_H_
23 | #include "config.h"
24 | #include
25 | #include
26 | #include "logger.h"
27 |
28 | #ifdef __cplusplus
29 | extern "C"
30 | {
31 | #endif
32 |
33 |
34 | //Linked list for size_t
35 | struct data_ll
36 | {
37 | //Max number of items
38 | size_t nmax;
39 | //Current number of items
40 | size_t n;
41 | /* Data and links
42 | * d[2*i] is the child and d[2*i+1] is data.
43 | * Item i has child j (at d[2*j] and d[2*j+1]) if d[2*i]=j.
44 | * For data[2*i]=-1 is no child.
45 | */
46 | size_t* restrict d;
47 | };
48 |
49 | int data_ll_init(struct data_ll* ll,size_t nmax);
50 | void data_ll_free(struct data_ll* ll);
51 | void data_ll_empty(struct data_ll* ll);
52 | //Insert entry with value val with no parent. Returns id.
53 | static inline size_t data_ll_insert(struct data_ll* ll,size_t val);
54 | //Insert entry with value val with parent id. Returns self id.
55 | static inline size_t data_ll_insert_after(struct data_ll* ll,size_t id,size_t val);
56 | /* Insert entry with value val with child id. Returns self id.
57 | * NOTE: Does not fix child of father of id.
58 | */
59 | static inline size_t data_ll_insert_before(struct data_ll* ll,size_t id,size_t val);
60 | //Return child id
61 | static inline size_t data_ll_child(const struct data_ll* ll,size_t id);
62 | //Return value
63 | static inline size_t data_ll_val(const struct data_ll* ll,size_t id);
64 |
65 | static inline size_t data_ll_insert(struct data_ll* ll,size_t val)
66 | {
67 | size_t loc;
68 |
69 | if(ll->n==ll->nmax)
70 | {
71 | LOG(5,"Linked list insertion failed: linked list full.")
72 | return (size_t)-1;
73 | }
74 | loc=2*ll->n;
75 | ll->d[loc+1]=val;
76 | return ll->n++;
77 | }
78 |
79 | static inline size_t data_ll_insert_after(struct data_ll* ll,size_t id,size_t val)
80 | {
81 | size_t loc;
82 |
83 | loc=data_ll_insert(ll,val);
84 | if(loc==(size_t)-1)
85 | return loc;
86 | ll->d[2*loc]=ll->d[2*id];
87 | ll->d[2*id]=loc;
88 | return loc;
89 | }
90 |
91 | static inline size_t data_ll_insert_before(struct data_ll* ll,size_t id,size_t val)
92 | {
93 | size_t loc;
94 |
95 | loc=data_ll_insert(ll,val);
96 | if(loc==(size_t)-1)
97 | return loc;
98 | ll->d[2*loc]=id;
99 | return loc;
100 | }
101 |
102 | static inline size_t data_ll_child(const struct data_ll* ll,size_t id)
103 | {
104 | assert(idn);
105 | return ll->d[2*id];
106 | }
107 |
108 | static inline size_t data_ll_val(const struct data_ll* ll,size_t id)
109 | {
110 | assert(idn);
111 | return ll->d[2*id+1];
112 | }
113 |
114 |
115 | #ifdef __cplusplus
116 | }
117 | #endif
118 | #endif
119 |
--------------------------------------------------------------------------------
/base/general_alg.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | /* This lib contains general algorithms helpful in other c programs,
19 | * such as breaking strings, counting instances, and binary search.
20 | */
21 |
22 | #ifndef _HEADER_LIB_GENERAL_ALG_H_
23 | #define _HEADER_LIB_GENERAL_ALG_H_
24 | #include "config.h"
25 | #include
26 | #ifdef __cplusplus
27 | extern "C"
28 | {
29 | #endif
30 |
31 | /* Categorize data according to categorical information into separate arrays.
32 | * s: Source of data.
33 | * c: Categorical information. Each element contains a category of the corresponding element of s.
34 | * d: Destination of categorization. Element i with c[i]=j is put into d[j].
35 | * Modified value after this function indicates size of outcome arrays.
36 | * n: Size of s and c.
37 | */
38 | static inline void general_alg_categorize(const size_t* restrict s,const unsigned char* restrict c,size_t* restrict* restrict d,size_t n);
39 |
40 | /* Categorize data according to embedded categorical information into separate arrays.
41 | * s: Source of data.
42 | * c: Categorical information. Each element contains a category of the corresponding element of s.
43 | * d: Destination of categorization. Element i with c[s[i]]=j is put into d[j].
44 | * Modified value after this function indicates size of outcome arrays.
45 | * n: Size of s and c.
46 | */
47 | static inline void general_alg_categorize_embed(const size_t* restrict s,const unsigned char* restrict c,size_t* restrict* restrict d,size_t n);
48 |
49 | /* Removes duplicates in a sorted array of double, and shifts unique values to
50 | * the front of the array.
51 | * a: array
52 | * n: size of array
53 | * Return: Size of new array
54 | */
55 | static inline size_t remove_sorted_duplicates(double* restrict a,size_t n);
56 |
57 |
58 |
59 |
60 |
61 |
62 | static inline void general_alg_categorize(const size_t* restrict s,const unsigned char* restrict c,size_t* restrict* restrict d,size_t n)
63 | {
64 | size_t i;
65 | for(i=0;i.
17 | */
18 | #ifndef __HEADER_LIB_GSL_BLAS_H__
19 | #define __HEADER_LIB_GSL_BLAS_H__
20 | #include "../config.h"
21 | #ifndef LIBGSL_LOCAL
22 | #include
23 | #else
24 | #include
25 | //#include "../../../gsl/blas/gsl_blas.h"
26 | #endif
27 | #endif
28 |
--------------------------------------------------------------------------------
/base/gsl/cdf.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #ifndef __HEADER_LIB_GSL_CDF_H__
19 | #define __HEADER_LIB_GSL_CDF_H__
20 | #include "../config.h"
21 | #ifndef LIBGSL_LOCAL
22 | #include
23 | #else
24 | #include
25 | //#include "../../../gsl/cdf/gsl_cdf.h"
26 | #endif
27 | #endif
28 |
--------------------------------------------------------------------------------
/base/gsl/errno.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #ifndef __HEADER_LIB_GSL_ERRNO_H__
19 | #define __HEADER_LIB_GSL_ERRNO_H__
20 | #include "../config.h"
21 | #ifndef LIBGSL_LOCAL
22 | #include
23 | #else
24 | #include
25 | //#include "../../../gsl/err/gsl_errno.h"
26 | #endif
27 | #endif
28 |
--------------------------------------------------------------------------------
/base/gsl/histogram.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #ifndef __HEADER_LIB_GSL_HISTOGRAM_H__
19 | #define __HEADER_LIB_GSL_HISTOGRAM_H__
20 | #include "../config.h"
21 | #ifndef LIBGSL_LOCAL
22 | #include
23 | #else
24 | #include
25 | //#include "../../../gsl/histogram/gsl_histogram.h"
26 | #endif
27 | #endif
28 |
--------------------------------------------------------------------------------
/base/gsl/math.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #ifndef __HEADER_LIB_GSL_MATH_H__
19 | #define __HEADER_LIB_GSL_MATH_H__
20 | #include "../config.h"
21 | #ifndef LIBGSL_LOCAL
22 | #include
23 | #else
24 | #include
25 | //#include "../../../gsl/gsl_math.h"
26 | #endif
27 | #endif
28 |
--------------------------------------------------------------------------------
/base/gsl/matrix.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #ifndef __HEADER_LIB_GSL_MATRIX_H__
19 | #define __HEADER_LIB_GSL_MATRIX_H__
20 | #include "../config.h"
21 | #ifndef LIBGSL_LOCAL
22 | #include
23 | #else
24 | #include
25 | //#include "../../../gsl/matrix/gsl_matrix.h"
26 | #endif
27 | #endif
28 |
--------------------------------------------------------------------------------
/base/gsl/permutation.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #ifndef __HEADER_LIB_GSL_PERMUTATION_H__
19 | #define __HEADER_LIB_GSL_PERMUTATION_H__
20 | #include "../config.h"
21 | #ifndef LIBGSL_LOCAL
22 | #include
23 | #else
24 | #include
25 | //#include "../../../gsl/permutation/gsl_permutation.h"
26 | #endif
27 | #endif
28 |
--------------------------------------------------------------------------------
/base/gsl/randist.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #ifndef __HEADER_LIB_GSL_RANDIST_H__
19 | #define __HEADER_LIB_GSL_RANDIST_H__
20 | #include "../config.h"
21 | #ifndef LIBGSL_LOCAL
22 | #include
23 | #else
24 | #include
25 | //#include "../../../gsl/randist/gsl_randist.h"
26 | #endif
27 | #endif
28 |
--------------------------------------------------------------------------------
/base/gsl/rng.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #ifndef __HEADER_LIB_GSL_RNG_H__
19 | #define __HEADER_LIB_GSL_RNG_H__
20 | #include "../config.h"
21 | #ifndef LIBGSL_LOCAL
22 | #include
23 | #else
24 | #include
25 | //#include "../../../gsl/rng/gsl_rng.h"
26 | #endif
27 | #endif
28 |
--------------------------------------------------------------------------------
/base/gsl/sf.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #ifndef __HEADER_LIB_GSL_SF_H__
19 | #define __HEADER_LIB_GSL_SF_H__
20 | #include "../config.h"
21 | #ifndef LIBGSL_LOCAL
22 | #include
23 | #else
24 | #include
25 | //#include "../../../gsl/specfunc/gsl_sf.h"
26 | #endif
27 | #endif
28 |
--------------------------------------------------------------------------------
/base/gsl/sort.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #ifndef __HEADER_LIB_GSL_SORT_H__
19 | #define __HEADER_LIB_GSL_SORT_H__
20 | #include "../config.h"
21 | #ifndef LIBGSL_LOCAL
22 | #include
23 | #include
24 | #else
25 | #include
26 | #include
27 | //#include "../../../gsl/sort/gsl_sort.h"
28 | //#include "../../../gsl/sort/gsl_sort_vector.h"
29 | #endif
30 | #endif
31 |
--------------------------------------------------------------------------------
/base/gsl/statistics.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #ifndef __HEADER_LIB_GSL_STATISTICS_H__
19 | #define __HEADER_LIB_GSL_STATISTICS_H__
20 | #include "../config.h"
21 | #ifndef LIBGSL_LOCAL
22 | #include
23 | #else
24 | #include
25 | //#include "../../../gsl/statistics/gsl_statistics.h"
26 | #endif
27 | #endif
28 |
--------------------------------------------------------------------------------
/base/gsl/vector.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #ifndef __HEADER_LIB_GSL_VECTOR_H__
19 | #define __HEADER_LIB_GSL_VECTOR_H__
20 | #include "../config.h"
21 | #ifndef LIBGSL_LOCAL
22 | #include
23 | #else
24 | #include
25 | //#include "../../../gsl/vector/gsl_vector.h"
26 | #endif
27 | #endif
28 |
--------------------------------------------------------------------------------
/base/lib.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #include "config.h"
19 | #include
20 | #include
21 | #include
22 | #include
23 | #include "gsl/errno.h"
24 | #include "random.h"
25 | #include "logger.h"
26 | #include "lib.h"
27 |
28 | #define MACROSTR(X) #X
29 | #define STR(X) MACROSTR(X)
30 | #define VERSION1_S STR(VERSION1)
31 | #define VERSION2_S STR(VERSION2)
32 | #define VERSION3_S STR(VERSION3)
33 | #define LIBVERSION VERSION1_S "." VERSION2_S "." VERSION3_S
34 | #define LIBNAME STR(LIB_NAME)
35 |
36 | #ifndef LIBINFO
37 | #define LIBINFONAME(X) X
38 | #else
39 | #define LIBINFONAME(X) LIBINFO##X
40 | #endif
41 |
42 |
43 | void LIBINFONAME(lib_init)(unsigned char loglv,unsigned long rs0,size_t nthread)
44 | {
45 | unsigned long rs;
46 | size_t nth;
47 | LOGLV(loglv);
48 | random_init();
49 | rs=rs0?rs0:(unsigned long)time(NULL);
50 | random_seed(rs);
51 | if(nthread)
52 | omp_set_num_threads((int)nthread);
53 | omp_set_nested(0);
54 | nth=(size_t)omp_get_max_threads();
55 | gsl_set_error_handler_off();
56 | LOG(7,"Library started with log level %u, initial random seed %lu, and max thread count "PRINTFSIZET".",loglv,rs,nth)
57 | }
58 |
59 | const char* LIBINFONAME(lib_name)()
60 | {
61 | return LIBNAME;
62 | }
63 |
64 | size_t LIBINFONAME(lib_version1)()
65 | {
66 | return VERSION1;
67 | }
68 |
69 | size_t LIBINFONAME(lib_version2)()
70 | {
71 | return VERSION2;
72 | }
73 |
74 | size_t LIBINFONAME(lib_version3)()
75 | {
76 | return VERSION3;
77 | }
78 |
79 | const char* LIBINFONAME(lib_version)()
80 | {
81 | return LIBVERSION;
82 | }
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
--------------------------------------------------------------------------------
/base/lib.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | //This file contains library related functions
19 | #ifndef _HEADER_LIB_LIB_H_
20 | #define _HEADER_LIB_LIB_H_
21 | #ifdef __cplusplus
22 | extern "C"
23 | {
24 | #endif
25 |
26 |
27 | /* The library needs to be initialized before any other function is called,
28 | * to perform correctly with desired log level and random seed.
29 | * loglv: Logging level, see logger.h.
30 | * rs: Initial random seed. If rs=0, use current time as random seed.
31 | * nthread: Maximum number of threads, If nthread=0, use default setting.
32 | */
33 | void lib_init(unsigned char loglv,unsigned long rs,size_t nthread);
34 |
35 | /* Returns library name
36 | */
37 | const char* lib_name();
38 | /* Returns library version in a.b.c format, or a, b, or c, for subfunctions ending with 1, 2, or 3 respectively.
39 | */
40 | const char* lib_version();
41 | size_t lib_version1();
42 | size_t lib_version2();
43 | size_t lib_version3();
44 |
45 | #ifdef __cplusplus
46 | }
47 | #endif
48 | #endif
49 |
--------------------------------------------------------------------------------
/base/logger.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #include "config.h"
19 | #include
20 | #include
21 | #include "os.h"
22 | #include "macros.h"
23 | #include "logger.h"
24 |
25 | struct logger LOGGER_VARIABLE;
26 |
27 | const char* logger_mname(size_t lv)
28 | {
29 | static const char names[13][15]={"CRITICAL(0)","ERROR(1)","ERROR(2)","ERROR(3)","WARNING(4)","WARNING(5)","WARNING(6)","INFO(7)","INFO(8)","INFO(9)","DEBUG(10)","DEBUG(11)","DEBUG(12)"};
30 | if(lv>12)
31 | return 0;
32 | return names[lv];
33 | }
34 |
35 | void logger_voutput(size_t lv,const char* file,size_t line,const char* fmt,va_list args)
36 | {
37 | char timing[100];
38 | struct tm *str_time;
39 | time_t rawtime;
40 |
41 | time(&rawtime);
42 | str_time=localtime(&rawtime);
43 | strftime(timing,99,"%Y-%m-%d %H:%M:%S",str_time);
44 |
45 | logprintf("%s:%s:%s:"PRINTFSIZET": ",logger_mname(lv),timing,file,line);
46 | logvprintf(fmt,args);
47 | logprintf("%s",_NEWLINE_);
48 | }
49 |
50 | void logger_output(size_t lv,const char* file,size_t line,const char* fmt,...)
51 | {
52 | va_list args;
53 | va_start (args, fmt);
54 | logger_voutput(lv,file,line,fmt,args);
55 | }
56 |
57 | int logger_log(const struct logger* l,size_t lv,const char* file,size_t line,const char* fmt,...)
58 | {
59 | va_list args;
60 | va_start (args, fmt);
61 | if(lv>l->lv)
62 | return 1;
63 | logger_voutput(lv,file,line,fmt,args);
64 | return 0;
65 | }
66 |
67 | int logger_init(struct logger* l,size_t lv)
68 | {
69 | if(!l)
70 | {
71 | logger_output(1,__FILE__,__LINE__,"NULL logger.");
72 | return 1;
73 | }
74 | l->lv=lv;
75 | return 0;
76 | }
77 |
78 | int logger_default_init(size_t lv)
79 | {
80 | return logger_init(&LOGGER_VARIABLE,lv);
81 | }
82 |
83 | struct logger* logger_new(size_t lv)
84 | {
85 | struct logger* l;
86 | CALLOCSIZE(l,1);
87 | if(!l)
88 | {
89 | logger_output(1,__FILE__,__LINE__,"Logger allocation failed.");
90 | return 0;
91 | }
92 | if(logger_init(l,lv))
93 | {
94 | free(l);
95 | return 0;
96 | }
97 | return l;
98 | }
99 |
--------------------------------------------------------------------------------
/base/logger.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | // This file contains the logger and error functions of different levels
19 | #ifndef _HEADER_LIB_LOGGER_H_
20 | #define _HEADER_LIB_LOGGER_H_
21 | #include "config.h"
22 | #include
23 | #include
24 | #include "os.h"
25 | #ifdef __cplusplus
26 | extern "C"
27 | {
28 | #endif
29 |
30 | // global variable name for struct logger
31 | #define LOGGER_VARIABLE logger_variable
32 | // Logging macro. logs with significance level LV, and the rest are in printf format.
33 | #define LOGS(LOGGERX,LV,...) logger_log(LOGGERX,LV,__FILE__,__LINE__,__VA_ARGS__);
34 | #define LOG(LV,...) LOGS(&LOGGER_VARIABLE,LV,__VA_ARGS__)
35 | #define LOGLV(LV) LOGGER_VARIABLE.lv=LV
36 | /* Logging levels:
37 | * CRITICAL(0),ERROR(1),ERROR(2),ERROR(3),WARNING(4),WARNING(5),WARNING(6),INFO(7),INFO(8),INFO(9),DEBUG(10),DEBUG(11),DEBUG(12)
38 | */
39 |
40 | struct logger{
41 | // logger output level. Only message levels.
17 | */
18 | // This file contains the macro definitions, such as cleanup macros
19 |
20 | #ifndef _HEADER_LIB_MACROS_H_
21 | #define _HEADER_LIB_MACROS_H_
22 | #include "config.h"
23 | #include
24 | #include "types.h"
25 | #include "logger.h"
26 |
27 | #define ERRRETV(V,...) {LOG(1,__VA_ARGS__) CLEANUP return V;}
28 | #define ERRRET(...) ERRRETV(1,__VA_ARGS__)
29 |
30 | #define AUTOALLOCHEADER _autoalloc_
31 | #ifndef __STDC_NO_VLA__
32 | /* Automatically allocate memory depending on size. For count<=countmax,
33 | * allocation is through stack. For count>countmax, allocation is through
34 | * heap.
35 | */
36 |
37 | #define AUTOALLOCSUF(TYPE,NAME,COUNT,COUNTMAX,SUFFIX) \
38 | TYPE AUTOALLOCHEADER##NAME[(COUNT)<=(COUNTMAX)?(COUNT):0];\
39 | TYPE * SUFFIX NAME;\
40 | if((COUNT)<=(COUNTMAX))NAME=AUTOALLOCHEADER##NAME;\
41 | else{if(COUNT) NAME=(TYPE*)malloc((COUNT)*(sizeof(TYPE)));\
42 | else NAME=0;}
43 | #define AUTOCALLOCSUF(TYPE,NAME,COUNT,COUNTMAX,SUFFIX) \
44 | TYPE AUTOALLOCHEADER##NAME[(COUNT)<=(COUNTMAX)?(COUNT):0];\
45 | TYPE * SUFFIX NAME;\
46 | if((COUNT)<=(COUNTMAX)){\
47 | NAME=AUTOALLOCHEADER##NAME;\
48 | memset(NAME,0,(COUNT)*sizeof(TYPE));}\
49 | else{if(COUNT) NAME=(TYPE*)calloc(COUNT,sizeof(TYPE));\
50 | else NAME=0;}
51 |
52 | /* Automatically free memory depending on size. Does nothing if memory is on stack,
53 | * frees memory if is on heap.
54 | */
55 | #define AUTOFREE(NAME) if(sizeof(AUTOALLOCHEADER##NAME)==0)CLEANMEM(NAME)
56 | #else
57 | #define AUTOALLOCSUF(TYPE,NAME,COUNT,COUNTMAX,SUFFIX) \
58 | TYPE * SUFFIX NAME=(TYPE*)malloc((COUNT)*sizeof(TYPE));
59 | #define AUTOCALLOCSUF(TYPE,NAME,COUNT,COUNTMAX,SUFFIX) \
60 | TYPE * SUFFIX NAME=(TYPE*)calloc(COUNT,sizeof(TYPE);
61 | #define AUTOFREE(NAME) CLEANMEM(NAME)
62 | #endif
63 | #define AUTOALLOC(TYPE,NAME,COUNT,COUNTMAX) AUTOALLOCSUF(TYPE,NAME,COUNT,COUNTMAX,)
64 | #define AUTOCALLOC(TYPE,NAME,COUNT,COUNTMAX) AUTOCALLOCSUF(TYPE,NAME,COUNT,COUNTMAX,)
65 |
66 |
67 | #define AUTOALLOCVECTOR(NAME,COUNT,COUNTMAX,BASETYPE,VECTYPE) \
68 | AUTOALLOC(BASETYPE,_vec_##NAME,COUNT,COUNTMAX)\
69 | CONCATENATE2(VECTYPE,_view) _vecview_##NAME;\
70 | VECTYPE* NAME=0;\
71 | if(_vec_##NAME)\
72 | {\
73 | _vecview_##NAME=CONCATENATE2(VECTYPE,_view_array)(_vec_##NAME,COUNT);\
74 | NAME=&_vecview_##NAME.vector;\
75 | }
76 | #define AUTOALLOCMATRIX(NAME,COUNT1,COUNT2,COUNTMAX,BASETYPE,MATTYPE) \
77 | AUTOALLOC(BASETYPE,_mat_##NAME,(COUNT1)*(COUNT2),COUNTMAX)\
78 | CONCATENATE2(VECTYPE,_view) _matview_##NAME;\
79 | MATTYPE* NAME=0;\
80 | if(_mat_##NAME)\
81 | {\
82 | _matview_##NAME=CONCATENATE2(MATTYPE,_view_array)(_mat_##NAME,COUNT1,COUNT2);\
83 | NAME=&_matview_##NAME.matrix;\
84 | }\
85 |
86 | #define AUTOFREEVEC(NAME) if(NAME){AUTOFREE(_vec_##NAME)NAME=0;}
87 | #define AUTOFREEMAT(NAME) if(NAME){AUTOFREE(_mat_##NAME)NAME=0;}
88 |
89 | #define AUTOALLOCVECO(NAME,COUNT,COUNTMAX) AUTOALLOCVECTOR(NAME,COUNT,COUNTMAX,float, VECTORO)
90 | #define AUTOALLOCVECD(NAME,COUNT,COUNTMAX) AUTOALLOCVECTOR(NAME,COUNT,COUNTMAX,double, VECTORD)
91 | #define AUTOALLOCVECC(NAME,COUNT,COUNTMAX) AUTOALLOCVECTOR(NAME,COUNT,COUNTMAX,char, VECTORC)
92 | #define AUTOALLOCVECUC(NAME,COUNT,COUNTMAX) AUTOALLOCVECTOR(NAME,COUNT,COUNTMAX,unsigned char, VECTORUC)
93 | #define AUTOALLOCVECI(NAME,COUNT,COUNTMAX) AUTOALLOCVECTOR(NAME,COUNT,COUNTMAX,int, VECTORI)
94 | #define AUTOALLOCVECL(NAME,COUNT,COUNTMAX) AUTOALLOCVECTOR(NAME,COUNT,COUNTMAX,long, VECTORL)
95 | #define AUTOALLOCVECUL(NAME,COUNT,COUNTMAX) AUTOALLOCVECTOR(NAME,COUNT,COUNTMAX,unsigned long, VECTORUL)
96 | #define AUTOALLOCVECF(NAME,COUNT,COUNTMAX) AUTOALLOCVECTOR(NAME,COUNT,COUNTMAX,FTYPE, VECTORF)
97 | #define AUTOALLOCVECG(NAME,COUNT,COUNTMAX) AUTOALLOCVECTOR(NAME,COUNT,COUNTMAX,GTYPE, VECTORG)
98 | #define AUTOALLOCMATO(NAME, COUNT1,COUNT2,COUNTMAX) AUTOALLOCMATRIX(NAME,COUNT1,COUNT2,COUNTMAX,float, MATRIXO)
99 | #define AUTOALLOCMATD(NAME, COUNT1,COUNT2,COUNTMAX) AUTOALLOCMATRIX(NAME,COUNT1,COUNT2,COUNTMAX,double, MATRIXD)
100 | #define AUTOALLOCMATC(NAME, COUNT1,COUNT2,COUNTMAX) AUTOALLOCMATRIX(NAME,COUNT1,COUNT2,COUNTMAX,char, MATRIXC)
101 | #define AUTOALLOCMATUC(NAME,COUNT1,COUNT2,COUNTMAX) AUTOALLOCMATRIX(NAME,COUNT1,COUNT2,COUNTMAX,unsigned char, MATRIXUC)
102 | #define AUTOALLOCMATI(NAME, COUNT1,COUNT2,COUNTMAX) AUTOALLOCMATRIX(NAME,COUNT1,COUNT2,COUNTMAX,int, MATRIXI)
103 | #define AUTOALLOCMATL(NAME, COUNT1,COUNT2,COUNTMAX) AUTOALLOCMATRIX(NAME,COUNT1,COUNT2,COUNTMAX,long, MATRIXL)
104 | #define AUTOALLOCMATUL(NAME,COUNT1,COUNT2,COUNTMAX) AUTOALLOCMATRIX(NAME,COUNT1,COUNT2,COUNTMAX,unsigned long, MATRIXUL)
105 | #define AUTOALLOCMATF(NAME, COUNT1,COUNT2,COUNTMAX) AUTOALLOCMATRIX(NAME,COUNT1,COUNT2,COUNTMAX,FTYPE, MATRIXF)
106 | #define AUTOALLOCMATG(NAME, COUNT1,COUNT2,COUNTMAX) AUTOALLOCMATRIX(NAME,COUNT1,COUNT2,COUNTMAX,GTYPE, MATRIXG)
107 |
108 | // Cleanup macros
109 | #define CLEANANY(X,F) if(X){F(X);X=0;}
110 | #define CLEANMEM(X) CLEANANY(X,free)
111 | #define CLEANVECO(X) CLEANANY(X,VECTOROF(free))
112 | #define CLEANVECD(X) CLEANANY(X,VECTORDF(free))
113 | #define CLEANVECC(X) CLEANANY(X,VECTORCF(free))
114 | #define CLEANVECUC(X) CLEANANY(X,VECTORUCF(free))
115 | #define CLEANVECI(X) CLEANANY(X,VECTORIF(free))
116 | #define CLEANVECL(X) CLEANANY(X,VECTORLF(free))
117 | #define CLEANVECUL(X) CLEANANY(X,VECTORULF(free))
118 | #define CLEANVECF(X) CLEANANY(X,VECTORFF(free))
119 | #define CLEANVECG(X) CLEANANY(X,VECTORGF(free))
120 |
121 | #define CLEANMATO(X) CLEANANY(X,MATRIXOF(free))
122 | #define CLEANMATD(X) CLEANANY(X,MATRIXDF(free))
123 | #define CLEANMATC(X) CLEANANY(X,MATRIXCF(free))
124 | #define CLEANMATUC(X) CLEANANY(X,MATRIXUCF(free))
125 | #define CLEANMATI(X) CLEANANY(X,MATRIXIF(free))
126 | #define CLEANMATL(X) CLEANANY(X,MATRIXLF(free))
127 | #define CLEANMATUL(X) CLEANANY(X,MATRIXULF(free))
128 | #define CLEANMATF(X) CLEANANY(X,MATRIXFF(free))
129 | #define CLEANMATG(X) CLEANANY(X,MATRIXGF(free))
130 |
131 | #define CLEANPERM(X) CLEANANY(X,gsl_permutation_free)
132 | #define CLEANHIST(X) CLEANANY(X,gsl_histogram_free)
133 | #define CLEANFILE(X) CLEANANY(X,fclose)
134 | #define CLEANMMATF(X,N) if(X){for(i=0;i.
17 | */
18 | #include "config.h"
19 | #include
20 | #include
21 | #include
22 | #include
23 | #include "gsl/sf.h"
24 | #include "gsl/math.h"
25 | #include "types.h"
26 | #include "logger.h"
27 | #include "math.h"
28 |
29 |
30 | void math_cdf_quantile_calc(double start,double step,size_t n,double left,double right,double (*func)(double,const void*),const void* param,double eps,double* ans)
31 | {
32 | size_t i,nleft,nright;
33 | double mid,midv,t1;
34 |
35 | while(n)
36 | {
37 | if(right-left0?(size_t)ceil(t1/step):0;
49 | if(nleft>n)
50 | nleft=n;
51 | nright=n-nleft;
52 | //Small side first
53 | if(nleft<=nright)
54 | {
55 | if(nleft)
56 | {
57 | math_cdf_quantile_calc(start,step,nleft,left,mid,func,param,eps,ans);
58 | start+=step*(double)nleft;
59 | n=nright;
60 | ans+=nleft;
61 | }
62 | left=mid;
63 | }
64 | else
65 | {
66 | if(nright)
67 | {
68 | math_cdf_quantile_calc(start+(double)nleft*step,step,nright,mid,right,func,param,eps,ans+nleft);
69 | n=nleft;
70 | }
71 | right=mid;
72 | }
73 | }
74 | }
75 |
76 |
77 | /* This function is modified from GNU Scientific Library (GSL) version 1.16.
78 | * See https://www.gnu.org/software/gsl/.
79 | */
80 | int math_sf_2F1_m1(const double a, const double b, const double c,const double x, gsl_sf_result * result)
81 | {
82 | double sum_pos = 0.0;
83 | double sum_neg = 0.0;
84 | double del_pos = 0.0;
85 | double del_neg = 0.0;
86 | double del = 0.0;
87 | double k = 0.0;
88 | int i = 0;
89 |
90 | if(fabs(c) < GSL_DBL_EPSILON) {
91 | result->val = 0.0; /* FIXME: ?? */
92 | result->err = 1.0;
93 | return 1;
94 | }
95 |
96 | do {
97 | if(++i > 30000) {
98 | result->val = sum_pos - sum_neg;
99 | result->err = del_pos + del_neg;
100 | result->err += 2.0 * GSL_DBL_EPSILON * (sum_pos + sum_neg);
101 | result->err += 2.0 * GSL_DBL_EPSILON * (2.0*sqrt(k)+1.0) * fabs(result->val);
102 | return 1;
103 | }
104 | del *= (a+k)*(b+k) * x / ((c+k) * (k+1.0)); /* Gauss series */
105 |
106 | if(del > 0.0) {
107 | del_pos = del;
108 | sum_pos += del;
109 | }
110 | else if(del == 0.0) {
111 | /* Exact termination (a or b was a negative integer).
112 | */
113 | del_pos = 0.0;
114 | del_neg = 0.0;
115 | break;
116 | }
117 | else {
118 | del_neg = -del;
119 | sum_neg -= del;
120 | }
121 |
122 | k += 1.0;
123 | } while(fabs((del_pos + del_neg)/(sum_pos-sum_neg)) > GSL_DBL_EPSILON);
124 |
125 | result->val = sum_pos - sum_neg;
126 | result->err = del_pos + del_neg;
127 | result->err += 2.0 * GSL_DBL_EPSILON * (sum_pos + sum_neg);
128 | result->err += 2.0 * GSL_DBL_EPSILON * (2.0*sqrt(k) + 1.0) * fabs(result->val);
129 |
130 | return 0;
131 | }
132 |
--------------------------------------------------------------------------------
/base/math.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | /* This lib contains mathematical functions:
19 | * 1: Special functions
20 | * 2: Cumulative density function related
21 | */
22 |
23 | #ifndef _HEADER_LIB_MATH_H_
24 | #define _HEADER_LIB_MATH_H_
25 | #include "config.h"
26 | #include
27 | #include
28 | #include "gsl/math.h"
29 | #include "gsl/sf.h"
30 |
31 | #ifdef __cplusplus
32 | extern "C"
33 | {
34 | #endif
35 |
36 | /**************************************************
37 | * Special functions
38 | **************************************************/
39 | // Calculates ln(Gamma(n/2))
40 | static inline double math_sf_lngammahalf(size_t n);
41 |
42 | // Calculates exp(x)-1, where x can be close to 0.
43 | static inline double math_sf_expminusone(double x);
44 |
45 | // Calculates log(x+1), where x can be close to 0.
46 | static inline double math_sf_logplusone(double x);
47 |
48 | // Calculates hypergeometric function minus 1, i.e. 2F1(a,b,c;x)-1
49 | int math_sf_2F1_m1(const double a, const double b, const double c,const double x, gsl_sf_result * result);
50 |
51 | /**************************************************
52 | * CDF related functions
53 | **************************************************/
54 |
55 | /* Locate quantiles of CDF with binary search.
56 | * start: Start quantile location to be calculated
57 | * step: Step of quantile location
58 | * n: Number of quantiles to calculate
59 | * left: All quantiles are known >left.
60 | * right: All quantiles are known left.
71 | * right: All quantiles are known 1E-4?exp(x)-1:x*(1+(x/2)*(1+(x/3)*(1+x/4)));
110 | }
111 |
112 |
113 | static inline double math_sf_logplusone(double x)
114 | {
115 | return fabs(x)>1E-4?log(x+1):x*(1-(x/2)*(1+((x*2)/3)*(1-(x*3)/4)));
116 | }
117 |
118 | static inline void math_cdf_quantile(size_t n,double left,double right,double (*func)(double,const void*),const void* param,double eps,double* ans)
119 | {
120 | double step=1./(double)n;
121 | assert(n>1);
122 | assert((func(left,param)1-step));
123 | math_cdf_quantile_calc(step,step,n-1,left,right,func,param,eps,ans);
124 | }
125 |
126 | #ifdef __cplusplus
127 | }
128 | #endif
129 | #endif
130 |
--------------------------------------------------------------------------------
/base/os.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | // This file contains OS specific routines
19 | #ifndef _HEADER_LIB_OS_H_
20 | #define _HEADER_LIB_OS_H_
21 |
22 | #ifdef _NEWLINE_
23 | #undef _NEWLINE_
24 | #endif
25 |
26 | // OS dependent new line
27 | #if defined(unix) || defined(__unix__) || defined(__unix) || defined(__APPLE__) || defined(__MACH__) || defined(__linux__)
28 | #define _NEWLINE_ "\n"
29 | #define PRINTFSIZET "%zu"
30 | #endif
31 | #if defined(_WIN32) || defined(_WIN64)
32 | #define _NEWLINE_ "\r\n"
33 | #define PRINTFSIZET "%Iu"
34 | #endif
35 | #ifndef _NEWLINE_
36 | #error Unsupported OS
37 | #endif
38 |
39 | #endif
40 |
--------------------------------------------------------------------------------
/base/random.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #include "random.h"
19 |
20 | gsl_rng* random_gen;
21 |
--------------------------------------------------------------------------------
/base/random.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | /* This file contains the low level randomization routines.
19 | */
20 |
21 | #ifndef _HEADER_LIB_RANDOM_H_
22 | #define _HEADER_LIB_RANDOM_H_
23 | #include "config.h"
24 | #include
25 | #include "gsl/rng.h"
26 | #include "gsl/randist.h"
27 | #include "logger.h"
28 | #include "types.h"
29 | #ifdef __cplusplus
30 | extern "C"
31 | {
32 | #endif
33 |
34 | extern gsl_rng* random_gen;
35 |
36 | #define random_new() gsl_rng_alloc(gsl_rng_taus2)
37 |
38 | static inline void random_init_any(gsl_rng** rng)
39 | {
40 | *rng=random_new();
41 | if(!(*rng))
42 | LOG(1,"Can't allocate random number generator.")
43 | }
44 | #define random_init() random_init_any(&random_gen)
45 |
46 | #define random_seed_any(r,s) gsl_rng_set(r,s)
47 | #define random_seed(s) random_seed_any(random_gen,s)
48 |
49 | #define random_free_any(r) gsl_rng_free(r)
50 | #define random_free() random_free_any(&random_gen)
51 |
52 | #define random_seed_now_any(r) random_seed_any(r,(unsigned long int)time(NULL))
53 | #define random_seed_now() random_seed_now_any(random_gen)
54 |
55 | // Generate uniformly distributed random number
56 | #define random_uniform_any(r) gsl_rng_uniform(r)
57 | #define random_uniform() random_uniform_any(random_gen)
58 | #define random_uniformi_any(r,n) gsl_rng_uniform_int(r,n)
59 | #define random_uniformi(n) random_uniformi_any(random_gen,n)
60 | // Generate gaussian distributed random number
61 | #define random_gaussian_any(r,sigma) gsl_ran_gaussian(r,sigma)
62 | #define random_gaussian(sigma) random_gaussian_any(random_gen,sigma)
63 |
64 | // Randomly shuffle items
65 | #define random_shufflevf_any(r,f) gsl_ran_shuffle(r,(f)->data,(f)->size,(f)->stride*sizeof(FTYPE))
66 | #define random_shufflevf(f) random_shufflevf_any(random_gen,f)
67 |
68 | //Random shuffle gsl_permutation
69 | #define random_shuffle_any(r,f) gsl_ran_shuffle(r,(f)->data,(f)->size,sizeof(size_t))
70 | #define random_shuffle(f) random_shuffle_any(random_gen,f)
71 |
72 |
73 | #ifdef __cplusplus
74 | }
75 | #endif
76 | #endif
77 |
--------------------------------------------------------------------------------
/base/supernormalize.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #include "config.h"
19 | #include
20 | #include
21 | #include "gsl/sort.h"
22 | #include "logger.h"
23 | #include "macros.h"
24 | #include "threading.h"
25 | #include "data_process.h"
26 | #include "supernormalize.h"
27 |
28 | void supernormalize_byrow_single_buffed(MATRIXF* m,gsl_permutation *p1,const FTYPE* restrict Pinv)
29 | {
30 | size_t i,j;
31 |
32 | for(j=0;jsize1;j++)
33 | {
34 | VECTORFF(view) vvs=MATRIXFF(row)(m,j);
35 |
36 | //Rank
37 | CONCATENATE3(gsl_sort_vector,FTYPE_SUF,_index)(p1,&(vvs.vector));
38 | //Distribution
39 | for(i=0;isize2;i++)
40 | MATRIXFF(set)(m,j,gsl_permutation_get(p1,i),Pinv[i]);
41 | }
42 | //Normalize again for unit variance
43 | MATRIXFF(normalize_row)(m);
44 | }
45 |
46 | int supernormalize_byrow_single(MATRIXF* m,const FTYPE* restrict Pinv)
47 | {
48 | gsl_permutation *p1;
49 |
50 | p1=gsl_permutation_alloc(m->size2);
51 | if(!p1)
52 | {
53 | LOG(1,"Can't allocate permutations.")
54 | return 1;
55 | }
56 | supernormalize_byrow_single_buffed(m,p1,Pinv);
57 | gsl_permutation_free(p1);
58 | return 0;
59 | }
60 |
61 | void supernormalize_byrow_buffed(MATRIXF* m,gsl_permutation * const *p,FTYPE* Pinv)
62 | {
63 | size_t nth=(size_t)omp_get_max_threads();
64 | LOG(10,"Supernormalization started for matrix size ("PRINTFSIZET"*"PRINTFSIZET") on "PRINTFSIZET" threads.",m->size1,m->size2,nth)
65 | supernormalize_Pinv(m->size2,Pinv);
66 |
67 | #pragma omp parallel
68 | {
69 | size_t nid=(size_t)omp_get_thread_num();
70 | size_t n1,n2;
71 | MATRIXFF(view) mv;
72 |
73 | threading_get_startend(m->size1,&n1,&n2);
74 | if(n2>n1)
75 | {
76 | mv=MATRIXFF(submatrix)(m,n1,0,n2-n1,m->size2);
77 | supernormalize_byrow_single_buffed(&mv.matrix,p[nid],Pinv);
78 | }
79 | }
80 |
81 | LOG(10,"Supernormalization completed.")
82 | }
83 |
84 | int supernormalize_byrow(MATRIXF* m)
85 | {
86 | #define CLEANUP for(i=0;isize2);
96 | ret=!!Pinv;
97 | for(i=0;isize2);
100 | ret=ret&&p[i];
101 | }
102 |
103 | if(!ret)
104 | ERRRET("Not enough memory.")
105 | supernormalize_byrow_buffed(m,p,Pinv);
106 | CLEANUP
107 | return 0;
108 | #undef CLEANUP
109 | }
110 |
111 | int supernormalizef_byrow_single(MATRIXF* m,const FTYPE* restrict Pinv,FTYPE fluc)
112 | {
113 | int ret;
114 | ret=supernormalizea_byrow_single(m,Pinv);
115 | MATRIXFF(fluc)(m,fluc);
116 | MATRIXFF(normalize_row)(m);
117 | return ret;
118 | }
119 |
120 | void supernormalizef_byrow_buffed(MATRIXF* m,gsl_permutation * const *p,FTYPE* Pinv,FTYPE fluc)
121 | {
122 | supernormalize_byrow_buffed(m,p,Pinv);
123 | MATRIXFF(fluc)(m,fluc);
124 | MATRIXFF(normalize_row)(m);
125 | }
126 |
127 | int supernormalizef_byrow(MATRIXF* m,FTYPE fluc)
128 | {
129 | int ret;
130 | ret=supernormalize_byrow(m);
131 | MATRIXFF(fluc)(m,fluc);
132 | MATRIXFF(normalize_row)(m);
133 | return ret;
134 | }
135 |
136 | void supernormalizer_byrow_single_buffed(MATRIXF* m,gsl_permutation *p1,VECTORF* vb,const gsl_rng* r)
137 | {
138 | size_t i,j;
139 | VECTORFF(view) vvs;
140 |
141 | for(j=0;jsize1;j++)
142 | {
143 | //Random data
144 | for(i=0;isize2;i++)
145 | VECTORFF(set)(vb,i,(FTYPE)random_gaussian_any(r,1));
146 | CONCATENATE2(gsl_sort_vector,FTYPE_SUF)(vb);
147 |
148 | //Rank
149 | vvs=MATRIXFF(row)(m,j);
150 | CONCATENATE3(gsl_sort_vector,FTYPE_SUF,_index)(p1,&(vvs.vector));
151 | //Distribution
152 | for(i=0;isize2;i++)
153 | MATRIXFF(set)(m,j,gsl_permutation_get(p1,i),VECTORFF(get)(vb,i));
154 | }
155 | //Normalize again for unit variance
156 | MATRIXFF(normalize_row)(m);
157 | }
158 |
159 | void supernormalizer_byrow_buffed(MATRIXF* m,MATRIXF* mb,gsl_permutation * const *p,gsl_rng * const* rng)
160 | {
161 | size_t nth=(size_t)omp_get_max_threads();
162 | LOG(10,"Randomized normalization started for matrix size ("PRINTFSIZET"*"PRINTFSIZET") on "PRINTFSIZET" threads.",m->size1,m->size2,nth)
163 |
164 | #pragma omp parallel
165 | {
166 | size_t nid=(size_t)omp_get_thread_num();
167 | size_t n1,n2;
168 | MATRIXFF(view) mv;
169 | VECTORFF(view) vv;
170 |
171 | threading_get_startend(m->size1,&n1,&n2);
172 | if(n2>n1)
173 | {
174 | mv=MATRIXFF(submatrix)(m,n1,0,n2-n1,m->size2);
175 | vv=MATRIXFF(row)(mb,nid);
176 | supernormalizer_byrow_single_buffed(&mv.matrix,p[nid],&vv.vector,rng[nid]);
177 | }
178 | }
179 |
180 | LOG(10,"Randomized normalization completed.")
181 | }
182 |
183 | int supernormalizer_byrow(MATRIXF* m)
184 | {
185 | #define CLEANUP for(i=0;isize2);
197 | ret=!!mb;
198 | for(i=0;isize2);
201 | r[i]=random_new();
202 | ret=ret&&p[i]&&r[i];
203 | }
204 | if(!ret)
205 | ERRRET("Not enough memory.")
206 | random_seed_any(r[0],(size_t)time(NULL));
207 | for(i=1;i.
17 | */
18 | /* This is the header file for supernormalization, i.e. transforming
19 | * samples of a variable to normal distribution N(0,1). Two method
20 | * are provided: deterministic and random.
21 | */
22 |
23 | #ifndef _HEADER_LIB_SUPERNORMALIZE_H_
24 | #define _HEADER_LIB_SUPERNORMALIZE_H_
25 | #include "config.h"
26 | #include "gsl/permutation.h"
27 | #include "gsl/cdf.h"
28 | #include "gsl/math.h"
29 | #include "random.h"
30 | #include "types.h"
31 | #ifdef __cplusplus
32 | extern "C"
33 | {
34 | #endif
35 |
36 | /**********************************************************************
37 | * Deterministic supernormalization
38 | **********************************************************************/
39 |
40 | /* Supernormalize matrix per row with single thread and buff provided.
41 | * Supernormalization takes place by converting the existing data into a normal distribution
42 | * with 0 mean and 1 variance. Due to numerical errors, their values may be inexact. This is performed
43 | * by first converting data into their ranking, and assign new values according to the cummulative
44 | * distribution function of the respective fraction. After that, a normalization is perform to scale
45 | * the new data into 0 mean and 1 variance.
46 | * m: Matrix to be supernormalized. Overwrites data.
47 | * p1: Permutation objects for ranking conversion
48 | * Pinv: Inverse transformation from ranking to normal distribution
49 | * (precalculated CDF values of normal distribution of the respective ranking)
50 | */
51 | void supernormalize_byrow_single_buffed(MATRIXF* m,gsl_permutation *p1,const FTYPE* restrict Pinv);
52 |
53 | /* Supernormalize matrix per row with single thread and buff provided.
54 | * See supernormalize_byrow_single_buffed for detail.
55 | * m: Matrix to be supernormalized. Overwrites data.
56 | * Pinv: Inverse transformation from ranking to normal distribution
57 | * (precalculated CDF values of normal distribution of the respective ranking)
58 | * Return: 0 on success.
59 | */
60 | int supernormalize_byrow_single(MATRIXF* m,const FTYPE* restrict Pinv);
61 |
62 | /* Obtain Inverse CDF for normal distribution of n fractiles.
63 | * n: n
64 | * Pinv: Inverse CDF of normal distribution. Return[i]=CDF^(-1)((i+1)/(n+1)).
65 | */
66 | static inline void supernormalize_Pinv(size_t n,FTYPE*restrict Pinv);
67 |
68 |
69 | /* Supernormalizes and overwrites each row of matrix m.
70 | * Supernormalize into 0 mean and 1 variance, and fulfills normal distribution
71 | * Therefore numbers are assigned purely according to the rankings.
72 | * Uses multiple threads
73 | * Ties are ordered sequentially by GSL (potential increased correlation between rows)
74 | * With or without buffer included:
75 | * m: (n1,n2) Matrix to be supernormalized
76 | * p: (nth) permutation buffer
77 | * Pinv:Buffer to calculate and place inverse CDF
78 | * nth: Number of threads.
79 | * Return: 0 if success.
80 | */
81 | void supernormalize_byrow_buffed(MATRIXF* m,gsl_permutation * const *p,FTYPE* Pinv);
82 | int supernormalize_byrow(MATRIXF* m);
83 |
84 | /**********************************************************************
85 | * Fluctuations after deterministic supernormalization
86 | **********************************************************************/
87 |
88 | /* Same with supernormalize_byrow_single,
89 | * supernormalize_byrow_buffed, and supernormalize_byrow,
90 | * but with an extra parameter fluc:
91 | * After supernormalization, every element x is fluctuated randomly,
92 | * being replaced by x*(1+y*fluc), where y is uniformly distributed in [-1,1).
93 | * The new matrix is then normalized to 0 mean and unit variance.
94 | * Return: 0 if success.
95 | */
96 | int supernormalizef_byrow_single(MATRIXF* m,const FTYPE* restrict Pinv,FTYPE fluc);
97 | void supernormalizef_byrow_buffed(MATRIXF* m,gsl_permutation * const *p,FTYPE* Pinv,FTYPE fluc);
98 | int supernormalizef_byrow(MATRIXF* m,FTYPE fluc);
99 |
100 | /**********************************************************************
101 | * Auto fluctuations after deterministic supernormalization
102 | **********************************************************************/
103 |
104 | /* Only fluctuates when m->size2<30, with fluc=2*m->size2^(-2).
105 | */
106 | static inline int supernormalizea_byrow_single(MATRIXF* m,const FTYPE* restrict Pinv);
107 | static inline void supernormalizea_byrow_buffed(MATRIXF* m,gsl_permutation * const *p,FTYPE* Pinv);
108 | static inline int supernormalizea_byrow(MATRIXF* m);
109 |
110 | /**********************************************************************
111 | * Random supernormalization
112 | **********************************************************************/
113 |
114 | //Check their supernormalize counterparts for definition.
115 | void supernormalizer_byrow_buffed(MATRIXF* m,MATRIXF* mb,gsl_permutation * const *p,gsl_rng * const* rng);
116 |
117 | int supernormalizer_byrow(MATRIXF* m);
118 |
119 |
120 | /**********************************************************************
121 | * Inline functions
122 | **********************************************************************/
123 |
124 | static inline void supernormalize_Pinv(size_t n,FTYPE* restrict Pinv)
125 | {
126 | size_t i;
127 |
128 | for(i=0;isize2<30)
135 | return supernormalizef_byrow_single(m,Pinv,(FTYPE)(2./gsl_pow_2((FTYPE)m->size2)));
136 | else
137 | return supernormalize_byrow_single(m,Pinv);
138 | }
139 |
140 | static inline void supernormalizea_byrow_buffed(MATRIXF* m,gsl_permutation * const *p,FTYPE* Pinv)
141 | {
142 | if(m->size2<30)
143 | supernormalizef_byrow_buffed(m,p,Pinv,(FTYPE)(2./gsl_pow_2((FTYPE)m->size2)));
144 | else
145 | supernormalize_byrow_buffed(m,p,Pinv);
146 | }
147 |
148 | static inline int supernormalizea_byrow(MATRIXF* m)
149 | {
150 | if(m->size2<30)
151 | return supernormalizef_byrow(m,(FTYPE)(2./gsl_pow_2((FTYPE)m->size2)));
152 | else
153 | return supernormalize_byrow(m);
154 | }
155 |
156 | #ifdef __cplusplus
157 | }
158 | #endif
159 | #endif
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
--------------------------------------------------------------------------------
/base/threading.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | // This file contains the functions for multi-threading
19 | #ifndef _HEADER_LIB_THREADING_H_
20 | #define _HEADER_LIB_THREADING_H_
21 | #include "config.h"
22 | #include
23 | #include
24 |
25 | #ifdef __cplusplus
26 | extern "C"
27 | {
28 | #endif
29 |
30 |
31 | /* Calculate the split position of the big problem into smaller ones.
32 | * ntotal: Total size of the problem
33 | * nthread: Total number of threads
34 | * x: ID of current thread
35 | * Return: The start position of problem id by thread x
36 | */
37 | static inline size_t threading_get_start_bare(size_t ntotal,size_t nthread,size_t x);
38 |
39 | /* Calculate the start and end position of the big problem for any thread.
40 | * ntotal: Total size of the problem
41 | * start,
42 | * end: Return location of start and end positions for any thread
43 | * id: ID of current thread
44 | * ida: Total number of threads
45 | */
46 | static inline void threading_get_startend_from(size_t ntotal,size_t *start,size_t *end,size_t id,size_t ida);
47 |
48 | /* Calculate the start and end position of the big problem for current thread (with openMP)
49 | * ntotal: Total size of the problem
50 | * start,
51 | * end: Return location of start and end positions for current thread
52 | */
53 | static inline void threading_get_startend(size_t ntotal,size_t *start,size_t *end);
54 |
55 |
56 | static inline size_t threading_get_start_bare(size_t ntotal,size_t nthread,size_t x)
57 | {
58 | size_t i,j;
59 | i=ntotal/nthread;
60 | j=ntotal-i*nthread;
61 | if(j>x)
62 | j=x;
63 | j+=i*x;
64 | if(j>ntotal)
65 | j=ntotal;
66 | return j;
67 | }
68 |
69 | static inline void threading_get_startend_from(size_t ntotal,size_t *start,size_t *end,size_t id,size_t ida)
70 | {
71 | *start=threading_get_start_bare(ntotal,ida,id);
72 | *end=threading_get_start_bare(ntotal,ida,id+1);
73 | }
74 |
75 | static inline void threading_get_startend(size_t ntotal,size_t *start,size_t *end)
76 | {
77 | size_t id=(size_t)omp_get_thread_num();
78 | size_t ida=(size_t)omp_get_num_threads();
79 | threading_get_startend_from(ntotal,start,end,id,ida);
80 | }
81 |
82 | #ifdef __cplusplus
83 | }
84 | #endif
85 |
86 | #endif
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
--------------------------------------------------------------------------------
/base/types.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | // This file contains the numerical type definitions, especially vectors and matrices
19 |
20 | #ifndef _HEADER_LIB_TYPES_H_
21 | #define _HEADER_LIB_TYPES_H_
22 | #include "config.h"
23 | #include
24 | #include "gsl/vector.h"
25 | #include "gsl/matrix.h"
26 | #include "gsl/blas.h"
27 |
28 |
29 | #if FTYPEBITS == 32
30 | // Type definition
31 | #define FTYPE float
32 | // Type suffix definition, for gsl vector and matrix functions
33 | #define FTYPE_SUF _float
34 | // BLAS function macro
35 | #define BLASF(X) BLASFO(X)
36 | // Minimal value
37 | #define FTYPE_MIN FLT_MIN
38 | #define FTYPE_MAX FLT_MAX
39 | #elif FTYPEBITS == 64
40 | #define FTYPE double
41 | #define FTYPE_SUF
42 | #define BLASF(X) BLASFD(X)
43 | #define FTYPE_MIN DBL_MIN
44 | #define FTYPE_MAX DBL_MAX
45 | #else
46 | #error Unknown float type bit count.
47 | #endif
48 | #if GTYPEBITS == 8
49 | #define GTYPE unsigned char
50 | #define GTYPE_SUF _uchar
51 | #else
52 | #error Unknown genotype type bit count.
53 | #endif
54 | #define BLASFO(X) gsl_blas_s ## X
55 | #define BLASFD(X) gsl_blas_d ## X
56 |
57 | #define CONCATENATE2_(X,Y) X ## Y
58 | #define CONCATENATE2(X,Y) CONCATENATE2_(X,Y)
59 | #define CONCATENATE3_(X,Y,Z) X ## Y ## Z
60 | #define CONCATENATE3(X,Y,Z) CONCATENATE3_(X,Y,Z)
61 | #define CONCATENATE4_(X,Y,Z,W) X ## Y ## Z ## W
62 | #define CONCATENATE4(X,Y,Z,W) CONCATENATE4_(X,Y,Z,W)
63 |
64 | // vector type macro
65 | #define VECTORO gsl_vector_float
66 | #define VECTORD gsl_vector
67 | #define VECTORC gsl_vector_char
68 | #define VECTORUC gsl_vector_uchar
69 | #define VECTORI gsl_vector_int
70 | #define VECTORL gsl_vector_long
71 | #define VECTORUL gsl_vector_ulong
72 | #define VECTORF CONCATENATE2(gsl_vector,FTYPE_SUF)
73 | #define VECTORG CONCATENATE2(gsl_vector,GTYPE_SUF)
74 | // vector function type macro
75 | #define VECTOROF(X) gsl_vector_float_ ## X
76 | #define VECTORDF(X) gsl_vector_ ## X
77 | #define VECTORCF(X) gsl_vector_char_ ## X
78 | #define VECTORUCF(X) gsl_vector_uchar_ ## X
79 | #define VECTORIF(X) gsl_vector_int_ ## X
80 | #define VECTORLF(X) gsl_vector_long_ ## X
81 | #define VECTORULF(X) gsl_vector_ulong_ ## X
82 | #define VECTORFF(X) CONCATENATE2(VECTORF,_ ## X)
83 | #define VECTORGF(X) CONCATENATE2(VECTORG,_ ## X)
84 | // matrix type macro
85 | #define MATRIXO gsl_matrix_float
86 | #define MATRIXD gsl_matrix
87 | #define MATRIXC gsl_matrix_char
88 | #define MATRIXUC gsl_matrix_uchar
89 | #define MATRIXI gsl_matrix_int
90 | #define MATRIXL gsl_matrix_long
91 | #define MATRIXUL gsl_matrix_ulong
92 | #define MATRIXF CONCATENATE2(gsl_matrix,FTYPE_SUF)
93 | #define MATRIXG CONCATENATE2(gsl_matrix,GTYPE_SUF)
94 | // matrix function type macro
95 | #define MATRIXOF(X) gsl_matrix_float_ ## X
96 | #define MATRIXDF(X) gsl_matrix_ ## X
97 | #define MATRIXCF(X) gsl_matrix_char_ ## X
98 | #define MATRIXUCF(X) gsl_matrix_uchar_ ## X
99 | #define MATRIXIF(X) gsl_matrix_int_ ## X
100 | #define MATRIXLF(X) gsl_matrix_long_ ## X
101 | #define MATRIXULF(X) CONCATENATE2(gsl_matrix_ulong_ ## X
102 | #define MATRIXFF(X) CONCATENATE2(MATRIXF,_ ## X)
103 | #define MATRIXGF(X) CONCATENATE2(MATRIXG,_ ## X)
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 | #endif
157 |
--------------------------------------------------------------------------------
/cycle/cycle.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | /* This lib contains the general definitions of cycle detection routines.
19 | */
20 |
21 | #ifndef _HEADER_LIB_CYCLE_H_
22 | #define _HEADER_LIB_CYCLE_H_
23 | #include "../base/config.h"
24 | #include
25 | #include "vg.h"
26 |
27 | #ifdef __cplusplus
28 | extern "C"
29 | {
30 | #endif
31 |
32 | #define CYCLEF(X) cycle_vg_ ## X
33 |
34 | #ifdef __cplusplus
35 | }
36 | #endif
37 | #endif
38 |
--------------------------------------------------------------------------------
/cycle/vg.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #include "../base/config.h"
19 | #include
20 | #include
21 | #include "../base/general_alg.h"
22 | #include "../base/macros.h"
23 | #include "vg.h"
24 |
25 | int cycle_vg_init(struct cycle_vg_system* restrict vg,size_t dim,size_t amax)
26 | {
27 | int ret;
28 | assert(vg);
29 | ret=0;
30 | vg->n=dim;
31 | vg->nam=amax;
32 | vg->nim=vg->nom=(size_t)-1;
33 |
34 | MALLOCSIZE(vg->lvf,dim);
35 | MALLOCSIZE(vg->lvb,dim);
36 | MALLOCSIZE(vg->go,dim);
37 | MALLOCSIZE(vg->goi,dim);
38 | ret=ret||data_ll_init(&vg->gao,amax)||data_ll_init(&vg->gai,amax);
39 | MALLOCSIZE(vg->gaof,dim);
40 | MALLOCSIZE(vg->gaif,dim);
41 | MALLOCSIZE(vg->gni,dim);
42 | MALLOCSIZE(vg->gno,dim);
43 | MALLOCSIZE(vg->lao,dim);
44 | MALLOCSIZE(vg->lai,dim);
45 | MALLOCSIZE(vg->buff,dim);
46 | MALLOCSIZE(vg->buff2,dim);
47 | ret=ret||data_heap_init(&vg->lvfl,dim)||data_heapdec_init(&vg->lvbl,dim);
48 | if(ret||!(vg->lvf&&vg->lvb&&vg->go&&vg->goi&&vg->gaof&&vg->gaif&&vg->gni&&vg->gno&&vg->lao&&vg->lai&&vg->buff&&vg->buff2))
49 | {
50 | cycle_vg_free(vg);
51 | LOG(1,"Not enough memory.")
52 | return 1;
53 | }
54 | //Initialize for empty graph.
55 | if(cycle_vg_empty(vg))
56 | {
57 | cycle_vg_free(vg);
58 | return 1;
59 | }
60 | return 0;
61 | }
62 |
63 | struct cycle_vg_system* cycle_vg_new(size_t dim,size_t amax)
64 | {
65 | struct cycle_vg_system* vg;
66 | MALLOCSIZE(vg,1);
67 | if(!vg)
68 | {
69 | LOG(1,"Not enough memory.")
70 | return 0;
71 | }
72 | if(cycle_vg_init(vg,dim,amax))
73 | {
74 | free(vg);
75 | return 0;
76 | }
77 | return vg;
78 | }
79 |
80 | int cycle_vg_free(struct cycle_vg_system* restrict vg)
81 | {
82 | #define FREEMEM(X) if(X){free(X);X=0;}
83 | FREEMEM(vg->lvf)
84 | FREEMEM(vg->lvb)
85 | FREEMEM(vg->go)
86 | FREEMEM(vg->goi)
87 | FREEMEM(vg->gaof)
88 | FREEMEM(vg->gaif)
89 | FREEMEM(vg->gni)
90 | FREEMEM(vg->gno)
91 | FREEMEM(vg->lao)
92 | FREEMEM(vg->lai)
93 | FREEMEM(vg->buff)
94 | FREEMEM(vg->buff2)
95 | data_ll_free(&vg->gao);
96 | data_ll_free(&vg->gai);
97 | data_heap_free(&vg->lvbl);
98 | data_heapdec_free(&vg->lvfl);
99 | return 0;
100 | #undef FREEMEM
101 | }
102 |
103 | int cycle_vg_empty(struct cycle_vg_system* restrict vg)
104 | {
105 | size_t i;
106 | vg->na=0;
107 | memset(vg->gaof,-1,vg->n*sizeof(*vg->gaof));
108 | memset(vg->gaif,-1,vg->n*sizeof(*vg->gaif));
109 | memset(vg->gni,0,vg->n*sizeof(*vg->gni));
110 | memset(vg->gno,0,vg->n*sizeof(*vg->gno));
111 | data_ll_empty(&vg->gao);
112 | data_ll_empty(&vg->gai);
113 | for(i=0;in;i++)
114 | {
115 | vg->go[i]=i;
116 | vg->goi[i]=i;
117 | }
118 | return 0;
119 | }
120 |
121 | void cycle_vg_restore_order(struct cycle_vg_system* restrict vg,size_t vv)
122 | {
123 | size_t t;
124 | char cond;
125 | size_t* p[2];
126 | size_t* ps;
127 |
128 | t=vg->go[vv];
129 | cond=!!vg->lvfl.n;
130 | if(cond)
131 | {
132 | size_t t1;
133 | t1=data_heap_top(&vg->lvfl);
134 | if(t1buff;
142 | p[1]=vg->buff2;
143 | general_alg_categorize_embed(vg->goi,vg->lvf,p,t);
144 | *(p[0]++)=vg->goi[t];
145 | memcpy(p[0],vg->buff2,(size_t)(p[1]-vg->buff2)*sizeof(*p[0]));
146 | memcpy(vg->buff+t+1,vg->goi+t+1,(vg->n-t-1)*sizeof(*vg->buff));
147 |
148 | ps=vg->buff;
149 | vg->buff=vg->goi;
150 | vg->goi=ps;
151 | cycle_vg_fix_go(vg);
152 | return;
153 | }
154 |
155 | p[0]=vg->buff;
156 | p[1]=vg->buff2;
157 | general_alg_categorize_embed(vg->goi,vg->lvf,p,t);
158 | *(p[1]++)=vg->goi[t];
159 | ps=p[0];
160 | p[0]=p[1];
161 | p[1]=ps;
162 | general_alg_categorize_embed(vg->goi+t+1,vg->lvb,p,vg->n-t-1);
163 | memcpy(p[1],vg->buff2,(size_t)(p[0]-vg->buff2)*sizeof(*p[1]));
164 |
165 | ps=vg->buff;
166 | vg->buff=vg->goi;
167 | vg->goi=ps;
168 | cycle_vg_fix_go(vg);
169 | return;
170 | }
171 |
172 | int cycle_vg_add(struct cycle_vg_system* restrict vg,size_t v1,size_t v2)
173 | {
174 |
175 | //Validity check
176 | assert(v1!=v2);
177 | if(vg->na>=vg->nam)
178 | return 1;
179 | if(vg->go[v1]go[v2])
180 | return cycle_vg_add_arc(vg,v1,v2);
181 |
182 | //Initialize
183 | data_heap_empty(&vg->lvfl);
184 | data_heapdec_empty(&vg->lvbl);
185 | memset(vg->lvf,0,vg->n*sizeof(*vg->lvf));
186 | memset(vg->lvb,0,vg->n*sizeof(*vg->lvb));
187 | memset(vg->lao,-1,vg->n*sizeof(*vg->lao));
188 | memset(vg->lai,-1,vg->n*sizeof(*vg->lai));
189 |
190 | //Test loop
191 | //Enter function, line 1
192 | vg->lvf[v2]=1;
193 | vg->lvb[v1]=1;
194 | vg->lao[v2]=vg->gaof[v2];
195 | vg->lai[v1]=vg->gaif[v1];
196 | //line 2
197 | if(vg->gaof[v2]!=(size_t)-1)
198 | data_heap_push(&vg->lvfl,vg->go[v2]);
199 | //line 3
200 | if(vg->gaif[v1]!=(size_t)-1)
201 | data_heapdec_push(&vg->lvbl,vg->go[v1]);
202 | //line 4&5 (while)
203 | while((vg->lvfl.n>0)&&(vg->lvbl.n>0))
204 | {
205 | size_t vu,vx,vy,vz;
206 |
207 | vu=data_heap_top(&vg->lvfl);
208 | vz=data_heapdec_top(&vg->lvbl);
209 | if(vu>=vz)
210 | break;
211 | vu=vg->goi[vu];
212 | vz=vg->goi[vz];
213 | //Enter macro, line 1
214 | vx=data_ll_val(&vg->gao,vg->lao[vu]);
215 | vy=data_ll_val(&vg->gai,vg->lai[vz]);
216 | //line 2
217 | vg->lao[vu]=data_ll_child(&vg->gao,vg->lao[vu]);
218 | vg->lai[vz]=data_ll_child(&vg->gai,vg->lai[vz]);
219 | //line 3
220 | if(vg->lao[vu]==(size_t)-1)
221 | data_heap_pop(&vg->lvfl);
222 | if(vg->lai[vz]==(size_t)-1)
223 | data_heapdec_pop(&vg->lvbl);
224 | //line 4, first half
225 | if(vg->lvb[vx])
226 | return 1;
227 | //line 5-8 (if)
228 | if(!vg->lvf[vx])
229 | {
230 | //line 6,7
231 | vg->lvf[vx]=1;
232 | if(vg->gaof[vx]!=(size_t)-1)
233 | {
234 | vg->lao[vx]=vg->gaof[vx];
235 | data_heap_push(&vg->lvfl,vg->go[vx]);
236 | }
237 | }
238 | //line 4, second half
239 | if(vg->lvf[vy])
240 | return 1;
241 | //line 9-12 (if)
242 | if(!vg->lvb[vy])
243 | {
244 | //line 10,11
245 | vg->lvb[vy]=1;
246 | if(vg->gaif[vy]!=(size_t)-1)
247 | {
248 | vg->lai[vy]=vg->gaif[vy];
249 | data_heapdec_push(&vg->lvbl,vg->go[vy]);
250 | }
251 | }
252 | }
253 |
254 | //Add arc
255 | if(cycle_vg_add_arc(vg,v1,v2))
256 | return 1;
257 |
258 | //Recover ordering
259 | cycle_vg_restore_order(vg,v1);
260 | return 0;
261 | }
262 |
263 | void cycle_vg_extract_graph(const struct cycle_vg_system* restrict vg,MATRIXUC* g)
264 | {
265 | size_t i;
266 | size_t t1;
267 |
268 | assert((vg->n==g->size1)&&(vg->n==g->size2));
269 | MATRIXUCF(set_zero)(g);
270 | for(i=0;in;i++)
271 | {
272 | t1=vg->gaof[i];
273 | while(t1!=(size_t)-1)
274 | {
275 | MATRIXUCF(set)(g,i,data_ll_val(&vg->gao,t1),1);
276 | t1=data_ll_child(&vg->gao,t1);
277 | }
278 | }
279 | }
280 |
--------------------------------------------------------------------------------
/cycle/vg.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | /* This lib contains the definitions of Vertex Guided Search and topological order maintenance.
19 | */
20 |
21 | #ifndef _HEADER_LIB_CYCLE_VG_H_
22 | #define _HEADER_LIB_CYCLE_VG_H_
23 | #include "../base/config.h"
24 | #include
25 | #include "../base/data_struct.h"
26 | #include "../base/logger.h"
27 | #include "../base/types.h"
28 | // #include "cycle_general.h"
29 | #ifdef __cplusplus
30 | extern "C"
31 | {
32 | #endif
33 |
34 |
35 | struct cycle_vg_system
36 | {
37 | //Constant parameters:
38 | //Number of vertices of the system
39 | size_t n;
40 | //Maximum number of arcs of the system
41 | size_t nam;
42 |
43 | //Constant Parameters requiring manual enabling after initialization:
44 | //Maximum number of incoming arcs for each vertex
45 | size_t nim;
46 | //Maximum number of outgoing arcs for each vertex
47 | size_t nom;
48 |
49 | //Graph construction variables:
50 | //Current number of arcs
51 | size_t na;
52 | //Order of vertices
53 | size_t* restrict go;
54 | //Inverse of go
55 | size_t* restrict goi;
56 | /* Graph representation for arcs out with linked list.
57 | * Each value j in linked list i corresponds to arc (i,j).
58 | * First item of each linked list i is specified in gaof.
59 | */
60 | struct data_ll gao;
61 | //First item of each linked list i of gao, or -1 of not exist.
62 | size_t* restrict gaof;
63 | //Below for arcs in
64 | struct data_ll gai;
65 | size_t* restrict gaif;
66 | //Number of incoming arcs for each vertex
67 | size_t* restrict gni;
68 | //Number of outgoing arcs for each vertex
69 | size_t* restrict gno;
70 |
71 |
72 |
73 | //Loop detection temporary variables:
74 | //Vertices visitedness forward/backward, i.e. membership of F,B
75 | unsigned char* restrict lvf;
76 | unsigned char* restrict lvb;
77 | //Vertices to be visited forward/backward, i.e. membership of FL,BL
78 | struct data_heap lvfl;
79 | struct data_heapdec lvbl;
80 | /* Current arc id of those from/to a specific vertex.
81 | * (i,lao[i]) is the current out arc from i during the search, indexed by gao.
82 | * (lai[i],i) is the current in arc to i during the search, index by gai.
83 | */
84 | size_t* restrict lao;
85 | size_t* restrict lai;
86 | //Buffer for calculation during loop detection and order maintenance.
87 | size_t* buff;
88 | size_t* buff2;
89 | };
90 |
91 | /* Initialize cycle detection system with vertex count and max number of arc count
92 | * vg: Cycle detection system.
93 | * dim: Number of vertices.
94 | * amax: Max number of arcs.
95 | * Return: 0 on success.
96 | */
97 | int cycle_vg_init(struct cycle_vg_system* restrict vg,size_t dim,size_t amax);
98 | struct cycle_vg_system* cycle_vg_new(size_t dim,size_t amax);
99 | int cycle_vg_free(struct cycle_vg_system* restrict vg);
100 |
101 | /* Re-initialize existing cycle detection system to the same size.
102 | * vg: Cycle detection system.
103 | * Return: 0 on success.
104 | */
105 | int cycle_vg_empty(struct cycle_vg_system* restrict vg);
106 |
107 | /* Obtain the number of vertices of the system
108 | * vg: Cycle detection system.
109 | * Return: Number of vertices on success.
110 | */
111 | static inline size_t cycle_vg_dim(const struct cycle_vg_system* restrict vg);
112 |
113 | /* Add arc v1->v2 to current graph in vg without loop checks.
114 | * vg: Cycle detection system.
115 | * v1: Source of arc
116 | * v2: Destination of arc
117 | * Return: 1 if arc full, or otherwise 0 for success.
118 | */
119 | static inline int cycle_vg_add_arc(struct cycle_vg_system* restrict vg,size_t v1,size_t v2);
120 |
121 | // Fix vertex order array base on its inverse, and vice versa
122 | static inline void cycle_vg_fix_go(struct cycle_vg_system* restrict vg);
123 | static inline void cycle_vg_fix_goi(struct cycle_vg_system* restrict vg);
124 |
125 | /* Restore order of vertices after adding a backward arc.
126 | * vg: Cycle detection system.
127 | * vv: Source of newly added arc
128 | */
129 | void cycle_vg_restore_order(struct cycle_vg_system* restrict vg,size_t vv);
130 |
131 | /* Try to add arc v1->v2 to current graph in vg.
132 | * vg: Cycle detection system.
133 | * v1: Source of arc
134 | * v2: Destination of arc
135 | * Return: 0 if success, or 1 if failed because of loop or full arc.
136 | */
137 | int cycle_vg_add(struct cycle_vg_system* restrict vg,size_t v1,size_t v2);
138 |
139 | /* Extracts graph representation into matrix form.
140 | * vg: Cycle detection system.
141 | * g: (n,n) destination matrix. g[i,j]=1 if arc (i,j) exists, and 0 if not.
142 | */
143 | void cycle_vg_extract_graph(const struct cycle_vg_system* restrict vg,MATRIXUC* g);
144 |
145 |
146 |
147 |
148 |
149 | static inline size_t cycle_vg_dim(const struct cycle_vg_system* restrict vg)
150 | {
151 | return vg->n;
152 | }
153 |
154 | static inline int cycle_vg_add_arc(struct cycle_vg_system* restrict vg,size_t v1,size_t v2)
155 | {
156 | size_t ret;
157 |
158 | if((vg->gno[v1]>=vg->nom)||(vg->gni[v2]>=vg->nim))
159 | return 1;
160 | ret=data_ll_insert_before(&vg->gao,vg->gaof[v1],v2);
161 | if(ret==(size_t)-1)
162 | return 1;
163 | vg->gaof[v1]=ret;
164 | ret=data_ll_insert_before(&vg->gai,vg->gaif[v2],v1);
165 | vg->gaif[v2]=ret;
166 | vg->na++;
167 | vg->gno[v1]++;
168 | vg->gni[v2]++;
169 | return 0;
170 | }
171 |
172 | static inline void cycle_vg_fix_go(struct cycle_vg_system* restrict vg)
173 | {
174 | size_t i;
175 | for(i=0;in;i++)
176 | vg->go[vg->goi[i]]=i;
177 | }
178 |
179 | static inline void cycle_vg_fix_goi(struct cycle_vg_system* restrict vg)
180 | {
181 | size_t i;
182 | for(i=0;in;i++)
183 | vg->goi[vg->go[i]]=i;
184 | }
185 |
186 |
187 | #ifdef __cplusplus
188 | }
189 | #endif
190 | #endif
191 |
--------------------------------------------------------------------------------
/doc.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lingfeiwang/findr/8aed971a7c0ade736eb764809f82728fb2fc72a8/doc.pdf
--------------------------------------------------------------------------------
/netr/one.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #include
19 | #include
20 | #include "../base/gsl/math.h"
21 | #include "../base/gsl/permutation.h"
22 | #include "../base/gsl/sort.h"
23 | #include "../base/logger.h"
24 | #include "../base/macros.h"
25 | #include "../base/data_process.h"
26 | #include "../cycle/cycle.h"
27 | #include "one.h"
28 |
29 |
30 | size_t netr_one_greedy(const MATRIXF* p,MATRIXUC* net,size_t nam,size_t nimax,size_t nomax)
31 | {
32 | #define CLEANUP CLEANVECF(v)CYCLEF(free)(&cs);CLEANPERM(perm)
33 | #define TOID(N,V1,V2) V1=(N)/(n-1);V2=(N)%(n-1);if((V2)>=(V1))V2++;
34 |
35 | struct CYCLEF(system) cs;
36 | VECTORF* v=0;
37 | gsl_permutation* perm=0;
38 | int ret;
39 | size_t n,na,i,ntot;
40 |
41 |
42 |
43 | //Initialize
44 | n=p->size1;
45 | assert((n==p->size2)&&(n==net->size1)&&(n==net->size2));
46 | assert(nimax&&nomax&&nam);
47 | ntot=n*(n-1);
48 | nam=GSL_MIN(ntot/2,nam);
49 | {
50 | size_t t1;
51 | t1=GSL_MIN(nimax,nomax);
52 | if(nam/n>=t1)
53 | nam=t1*n;
54 | }
55 | ret=CYCLEF(init)(&cs,n,nam);
56 | if(ret)
57 | ERRRETV(0,"Failed to initialize cycle detection.")
58 | cs.nim=nimax;
59 | cs.nom=nomax;
60 | v=VECTORFF(alloc)(ntot);
61 | perm=gsl_permutation_alloc(ntot);
62 | if(!(v&&perm))
63 | ERRRETV(0,"Not enough memory.")
64 |
65 | //Obtain edge order
66 | MATRIXFF(flatten_nodiag)(p,v);
67 | ret=CONCATENATE3(gsl_sort_vector,FTYPE_SUF,_index)(perm,v);
68 | if(ret)
69 | ERRRETV(0,"Failed to sort vector.")
70 | CLEANVECF(v)
71 |
72 | //Add edges
73 | for(i=0,na=0;(i=(V1))V2++;
93 |
94 | struct CYCLEF(system) cs;
95 | VECTORF* v=0;
96 | gsl_permutation* perm=0;
97 | int ret;
98 | size_t n,na,i,ntot;
99 | int sign[2]={1,-1};
100 | clock_t cstart,cnow;
101 |
102 | //Initialize
103 | n=p->size1;
104 | assert((n==p->size2)&&(n==net->size1)&&(n==net->size2));
105 | assert(nimax&&nomax);
106 | ntot=n*(n-1);
107 | nam=GSL_MIN(ntot/2,nam);
108 | ret=CYCLEF(init)(&cs,n,nam);
109 | if(ret)
110 | ERRRETV(0,"Failed to initialize cycle detection.")
111 | cs.nim=nimax;
112 | cs.nom=nomax;
113 | v=VECTORFF(alloc)(ntot);
114 | perm=gsl_permutation_alloc(ntot);
115 | if(!(v&&perm))
116 | ERRRETV(0,"Not enough memory.")
117 |
118 | //Obtain edge order
119 | MATRIXFF(flatten_nodiag)(p,v);
120 | ret=CONCATENATE3(gsl_sort_vector,FTYPE_SUF,_index)(perm,v);
121 | if(ret)
122 | ERRRETV(0,"Failed to sort vector.")
123 | CLEANVECF(v)
124 |
125 | //Add edges
126 | MATRIXLF(set_zero)(net);
127 | cstart=clock();
128 | MATRIXDF(set_all)(time,(double)cstart);
129 | for(i=0,na=0;(i.
17 | */
18 | /* This lib contains the implementation of network reconstruction
19 | * algorithms for a single network.
20 | */
21 |
22 | #ifndef _HEADER_LIB_NETR_ONE_H_
23 | #define _HEADER_LIB_NETR_ONE_H_
24 | #include "../base/config.h"
25 | #include
26 | #include "../base/types.h"
27 |
28 | #ifdef __cplusplus
29 | extern "C"
30 | {
31 | #endif
32 |
33 | /* Construct a deterministic single best Direct Acyclic Graph from prior pij information,
34 | * and stop when the number of edges reaches threshold or no edge can be added.
35 | * This method sorts pij values and attempt to add edges from the most likely one,
36 | * therefore named 'greedy'.
37 | * p: (n,n) for pij matrix
38 | * net: (n,n) for constructed network. net[i,j]=1 if edge (i,j) exists, 0 if not.
39 | * nam: Maximum number of edges. Set to (size_t)-1 for unlimited.
40 | * nimax: Maximum number of incoming edges for each node. Set to (size_t)-1 for unlimited.
41 | * nomax: Maximum number of outgoing edges for each node. Set to (size_t)-1 for unlimited.
42 | * Return: Number of edges, or 0 if failed.
43 | */
44 | size_t netr_one_greedy(const MATRIXF* p,MATRIXUC* net,size_t nam,size_t nimax,size_t nomax);
45 |
46 | /* Construct a deterministic single best Direct Acyclic Graph from prior pij information,
47 | * and stop when the number of edges reaches threshold or no edge can be added.
48 | * This method sorts pij values and attempt to add edges from the most likely one.
49 | * Additional information is obtained in the output network variable.
50 | * p: (n,n) for pij matrix
51 | * net: (n,n) for constructed network. net[i,j]=0 indiates the edge is never tried.
52 | net[i,j]!=0 indicates the edge has been tried. Its absolute values(=x) indicates
53 | the edge is tried at the x-th edge addition attempt. net[i,j]>0 indicates successful
54 | edge addition and <0 indicates failure.
55 | * time:(n,n) for CPU time passed from starting to add edges to finish trying
56 | * to add this edge in CPU seconds.
57 | * nam: Maximum number of edges.
58 | * nimax: Maximum number of incoming edges for each node. Set to (size_t)-1 for unlimited.
59 | * nomax: Maximum number of outgoing edges for each node. Set to (size_t)-1 for unlimited.
60 | * Return: Number of edges, or 0 if failed.
61 | */
62 | size_t netr_one_greedy_info(const MATRIXF* p,MATRIXL* net,MATRIXD* time,size_t nam,size_t nimax,size_t nomax);
63 |
64 | #ifdef __cplusplus
65 | }
66 | #endif
67 | #endif
68 |
--------------------------------------------------------------------------------
/pij/cassist/cassist.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #include "../../base/config.h"
19 | #include
20 | #include
21 | #include
22 | #include "../../base/logger.h"
23 | #include "../../base/macros.h"
24 | #include "../../base/const.h"
25 | #include "../../base/supernormalize.h"
26 | #include "../../base/threading.h"
27 | #include "../../base/data_process.h"
28 | #include "llr.h"
29 | #include "llrtopij.h"
30 | #include "llrtopv.h"
31 | #include "cassist.h"
32 |
33 |
34 | int pijs_cassist_pv(const MATRIXF* g,const MATRIXF* t,const MATRIXF* t2,VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,size_t memlimit)
35 | {
36 | #define CLEANUP CLEANMATF(gnew)CLEANMATF(tnew)CLEANMATF(tnew2)
37 | MATRIXF *gnew; //(ng,ns) Supernormalized transcript matrix
38 | MATRIXF *tnew,*tnew2; //(nt,ns) Supernormalized transcript matrix
39 | int ret;
40 | size_t ns=g->size2;
41 | #ifndef NDEBUG
42 | size_t nt;
43 | size_t ng=g->size1;
44 |
45 | nt=t2->size1;
46 | ns=g->size2;
47 | #endif
48 |
49 | gnew=tnew=tnew2=0;
50 |
51 | //Validation
52 | assert(!((t->size1!=ng)||(t->size2!=ns)||(t2->size2!=ns)
53 | ||(p1&&(p1->size!=ng))
54 | ||(p2&&((p2->size1!=ng)||(p2->size2!=nt)))
55 | ||(p3&&((p3->size1!=ng)||(p3->size2!=nt)))
56 | ||(p4&&((p4->size1!=ng)||(p4->size2!=nt)))
57 | ||(p5&&((p5->size1!=ng)||(p5->size2!=nt)))));
58 | assert(memlimit);
59 |
60 | if(ns<4)
61 | ERRRET("Cannot compute p-values with fewer than 4 samples.")
62 |
63 | {
64 | size_t mem1;
65 | mem1=(4*t->size1*t->size2+2*t2->size1*t2->size2+p1->size+p2->size1*p2->size2*4)*sizeof(FTYPE);
66 | if(memlimit<=mem1)
67 | ERRRET("Memory limit lower than minimum memory needed. Try increasing your memory usage limit.")
68 | LOG(10,"Memory limit: %lu bytes.",memlimit)
69 | }
70 |
71 | gnew=MATRIXFF(alloc)(g->size1,g->size2);
72 | tnew=MATRIXFF(alloc)(t->size1,t->size2);
73 | tnew2=MATRIXFF(alloc)(t2->size1,t2->size2);
74 | if(!(gnew&&tnew&&tnew2))
75 | ERRRET("Not enough memory.")
76 |
77 | //Step 1: Supernormalization
78 | LOG(9,"Supernormalizing...")
79 | MATRIXFF(memcpy)(gnew,g);
80 | ret=supernormalizea_byrow(gnew);
81 | MATRIXFF(memcpy)(tnew,t);
82 | ret=ret||supernormalizea_byrow(tnew);
83 | MATRIXFF(memcpy)(tnew2,t2);
84 | ret=ret||supernormalizea_byrow(tnew2);
85 | if(ret)
86 | ERRRET("Supernormalization failed.")
87 |
88 | //Step 2: Log likelihood ratios from nonpermuted data
89 | LOG(9,"Calculating real log likelihood ratios...")
90 | pij_cassist_llr(gnew,tnew,tnew2,p1,p2,p3,p4,p5);
91 | //Step 3: Convert log likelihood ratios to p-values
92 | LOG(9,"Converting log likelihood ratios into p-values...")
93 | pij_cassist_llrtopvs(p1,p2,p3,p4,p5,ns);
94 | //Cleanup
95 | CLEANUP
96 | return ret;
97 | #undef CLEANUP
98 | }
99 |
100 | int pijs_cassist(const MATRIXF* g,const MATRIXF* t,const MATRIXF* t2,VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,char nodiag,size_t memlimit)
101 | {
102 | #define CLEANUP CLEANMATF(gnew)CLEANMATF(tnew)CLEANMATF(tnew2)
103 | MATRIXF *gnew; //(ng,ns) Supernormalized transcript matrix
104 | MATRIXF *tnew,*tnew2; //(nt,ns) Supernormalized transcript matrix
105 | VECTORFF(view) vv;
106 | int ret;
107 | size_t ns=g->size2;
108 | #ifndef NDEBUG
109 | size_t nt;
110 | size_t ng=g->size1;
111 |
112 | nt=t2->size1;
113 | ns=g->size2;
114 | #endif
115 |
116 | gnew=tnew=tnew2=0;
117 |
118 | //Validation
119 | assert(!((t->size1!=ng)||(t->size2!=ns)||(t2->size2!=ns)
120 | ||(p1&&(p1->size!=ng))
121 | ||(p2&&((p2->size1!=ng)||(p2->size2!=nt)))
122 | ||(p3&&((p3->size1!=ng)||(p3->size2!=nt)))
123 | ||(p4&&((p4->size1!=ng)||(p4->size2!=nt)))
124 | ||(p5&&((p5->size1!=ng)||(p5->size2!=nt)))));
125 | assert(memlimit);
126 |
127 | if(ns<4)
128 | ERRRET("Cannot compute probabilities with fewer than 4 samples.")
129 | //Defaults to 8GB memory usage
130 | {
131 | size_t mem1;
132 | mem1=(4*t->size1*t->size2+2*t2->size1*t2->size2+p1->size+p2->size1*p2->size2*4)*sizeof(FTYPE);
133 | if(memlimit<=mem1)
134 | ERRRET("Memory limit lower than minimum memory needed. Try increasing your memory usage limit.")
135 | LOG(10,"Memory limit: %lu bytes.",memlimit)
136 | }
137 |
138 | gnew=MATRIXFF(alloc)(g->size1,g->size2);
139 | tnew=MATRIXFF(alloc)(t->size1,t->size2);
140 | tnew2=MATRIXFF(alloc)(t2->size1,t2->size2);
141 | if(!(gnew&&tnew&&tnew2))
142 | ERRRET("Not enough memory.")
143 |
144 | //Check for identical rows in input data
145 | {
146 | VECTORFF(view) vbuff1=MATRIXFF(column)(tnew,0);
147 | VECTORFF(view) vbuff2=MATRIXFF(row)(tnew2,0);
148 | MATRIXFF(cmprow)(t,t2,&vbuff1.vector,&vbuff2.vector,nodiag,1);
149 | }
150 |
151 | //Step 1: Supernormalization
152 | LOG(9,"Supernormalizing...")
153 | MATRIXFF(memcpy)(gnew,g);
154 | ret=supernormalizea_byrow(gnew);
155 | MATRIXFF(memcpy)(tnew,t);
156 | ret=ret||supernormalizea_byrow(tnew);
157 | MATRIXFF(memcpy)(tnew2,t2);
158 | ret=ret||supernormalizea_byrow(tnew2);
159 | if(ret)
160 | ERRRET("Supernormalization failed.")
161 |
162 | //Step 2: Log likelihood ratios from nonpermuted data
163 | LOG(9,"Calculating real log likelihood ratios...")
164 | pij_cassist_llr(gnew,tnew,tnew2,p1,p2,p3,p4,p5);
165 | //Step 3: Convert log likelihood ratios to probabilities
166 | if((ret=pij_cassist_llrtopijs(p1,p2,p3,p4,p5,ns,nodiag)))
167 | LOG(4,"Failed to convert all log likelihood ratios to probabilities.")
168 | if(nodiag)
169 | {
170 | vv=MATRIXFF(diagonal)(p2);
171 | VECTORFF(set_zero)(&vv.vector);
172 | vv=MATRIXFF(diagonal)(p3);
173 | VECTORFF(set_zero)(&vv.vector);
174 | vv=MATRIXFF(diagonal)(p4);
175 | VECTORFF(set_zero)(&vv.vector);
176 | vv=MATRIXFF(diagonal)(p5);
177 | VECTORFF(set_zero)(&vv.vector);
178 | }
179 |
180 | //Cleanup
181 | CLEANUP
182 | return ret;
183 | #undef CLEANUP
184 | }
185 |
186 | int pij_cassist(const MATRIXF* g,const MATRIXF* t,const MATRIXF* t2,MATRIXF* ans,char nodiag,size_t memlimit)
187 | {
188 | #define CLEANUP CLEANVECF(p1)CLEANMATF(p2)CLEANMATF(p3)CLEANMATF(p4)
189 | VECTORF *p1;
190 | MATRIXF *p2,*p3,*p4;
191 | size_t ng=g->size1;
192 | size_t nt=t2->size1;
193 |
194 | assert(g&&t&&t2&&ans&&pijs);
195 | assert((g->size2==t->size2)&&(g->size2==t2->size2));
196 | assert((t->size1==ng)&&(ans->size1==ng)&&(ans->size2==nt));
197 | p1=VECTORFF(alloc)(ng);
198 | p2=MATRIXFF(alloc)(ng,nt);
199 | p3=MATRIXFF(alloc)(ng,nt);
200 | p4=MATRIXFF(alloc)(ng,nt);
201 | if(!(p1&&p2&&p3&&p4))
202 | ERRRET("Not enough memory.")
203 | if(pijs_cassist(g,t,t2,p1,p2,p3,p4,ans,nodiag,memlimit))
204 | ERRRET("pij_cassist_pijs failed.")
205 |
206 | //Combine tests
207 | #pragma omp parallel
208 | {
209 | size_t ng1,ng2;
210 | MATRIXFF(view) mva,mv2,mv4;
211 | threading_get_startend(g->size1,&ng1,&ng2);
212 | if(ng1size2);
215 | mv2=MATRIXFF(submatrix)(p2,ng1,0,ng2-ng1,p2->size2);
216 | mv4=MATRIXFF(submatrix)(p4,ng1,0,ng2-ng1,p4->size2);
217 | MATRIXFF(mul_elements)(&mva.matrix,&mv2.matrix);
218 | MATRIXFF(add)(&mva.matrix,&mv4.matrix);
219 | MATRIXFF(scale)(&mva.matrix,0.5);
220 | }
221 | }
222 | //Cleanup
223 | CLEANUP
224 | return 0;
225 | #undef CLEANUP
226 | }
227 |
228 | int pij_cassist_trad(const MATRIXF* g,const MATRIXF* t,const MATRIXF* t2,MATRIXF* ans,char nodiag,size_t memlimit)
229 | {
230 | #define CLEANUP CLEANVECF(p1)CLEANMATF(p2)CLEANMATF(p4)CLEANMATF(p5)
231 | VECTORF *p1;
232 | MATRIXF *p2,*p4,*p5;
233 | size_t ng=g->size1;
234 | size_t nt=t2->size1;
235 |
236 | assert(g&&t&&t2&&ans);
237 | assert((g->size2==t->size2)&&(g->size2==t2->size2));
238 | assert((t->size1==ng)&&(ans->size1==ng)&&(ans->size2==nt));
239 | p1=VECTORFF(alloc)(ng);
240 | p2=MATRIXFF(alloc)(ng,nt);
241 | p4=MATRIXFF(alloc)(ng,nt);
242 | p5=MATRIXFF(alloc)(ng,nt);
243 | if(!(p1&&p2&&p5&&p4))
244 | ERRRET("Not enough memory.")
245 | if(pijs_cassist(g,t,t2,p1,p2,ans,p4,p5,nodiag,memlimit))
246 | ERRRET("pij_cassist_pijs failed.")
247 |
248 | //Combine tests
249 | MATRIXFF(mul_elements)(ans,p2);
250 |
251 | //Cleanup
252 | CLEANUP
253 | return 0;
254 | #undef CLEANUP
255 | }
256 |
--------------------------------------------------------------------------------
/pij/cassist/cassist.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | /* This part contains the main interface function of genotype assisted pij inference.
19 | */
20 |
21 | #ifndef _HEADER_LIB_PIJ_CASSIST_H_
22 | #define _HEADER_LIB_PIJ_CASSIST_H_
23 | #include "../../base/config.h"
24 | #include "../../base/types.h"
25 | #ifdef __cplusplus
26 | extern "C"
27 | {
28 | #endif
29 |
30 | /* Estimates the p-value of A B against A->B from genotype and expression data with 5 tests.
31 | * E is always the best eQTL of A. Full data is required.
32 | * g: (ng,ns) Genotype data for E, =0,1,...,nv-1. Each is the best eQTL of the corresponding gene in t.
33 | * t: (ng,ns) Expression data of A.
34 | * t2: (nt,ns) Expression data of B. Can be A or a superset of A.
35 | * p1: (ng) P-values of step 1. Tests E->A v.s. E A.
36 | * p2: (ng,nt) P-values of step 2. Tests E->B v.s. E B.
37 | * p3: (ng,nt) P-values of step 3. Tests E->A->B v.s. E->A->B with E->B.
38 | * p4: (ng,nt) P-values of step 4. Tests E->A->B with E->B v.s. E->A B.
39 | * p5: (ng,nt) P-values of step 5. Tests E->A->B with E->B v.s. A<-E->B.
40 | * memlimit: The function is able to split very large datasets (ng and nt) into smaller chunks for inference. This variable specifies the approximate memory usage limit. Note: For large datasets, a too small memory limit can fail the function. For unlimited memory, set memlimit=-1.
41 | * Return: 0 on sucess
42 | * Appendix:
43 | * ng: Number of genes with best eQTL.
44 | * nt: Number of genes with expression data for B
45 | * ns: Number of samples.
46 | */
47 | int pijs_cassist_pv(const MATRIXF* g,const MATRIXF* t,const MATRIXF* t2,VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,size_t memlimit);
48 |
49 | /* Estimates the probability of A->B from genotype and expression data with 5 tests.
50 | * E is always the best eQTL of A. Full data is required.
51 | * g: (ng,ns) Genotype data for E, =0,1,...,nv-1. Each is the best eQTL of the corresponding gene in t.
52 | * t: (ng,ns) Expression data of A.
53 | * t2: (nt,ns) Expression data of B. Can be A or a superset of A.
54 | * p1: (ng) Probabilities of step 1. Tests E->A v.s. E A. For nodiag=0, because the function expects significant eQTLs, p1 always return 1. For nodiag=1, uses diagonal elements of p2. Consider replacing p1 with your own (1-FDR) from eQTL discovery.
55 | * p2: (ng,nt) Probabilities of step 2. Tests E->B v.s. E B.
56 | * p3: (ng,nt) Probabilities of step 3. Tests E->A->B v.s. E->A->B with E->B.
57 | * p4: (ng,nt) Probabilities of step 4. Tests E->A->B with E->B v.s. E->A B.
58 | * p5: (ng,nt) Probabilities of step 5. Tests E->A->B with E->B v.s. A<-E->B.
59 | * nv: Number of possible values each genotype entry may take, =number of alleles+1.
60 | * nodiag: When the top ng rows of t2 is exactly t, diagonals of p2 and p3 are meaningless. In this case, set nodiag to 1 to avoid inclusion of NANs. For nodiag=0, t and t2 should not have any identical genes.
61 | * memlimit: The function is able to split very large datasets (ng and nt) into smaller chunks for inference. This variable specifies the approximate memory usage limit. Note: For large datasets, a too small memory limit can fail the function. For unlimited memory, set memlimit=-1.
62 | * Return: 0 on sucess
63 | * Appendix:
64 | * ng: Number of genes with best eQTL.
65 | * nt: Number of genes with expression data for B
66 | * ns: Number of samples.
67 | */
68 | int pijs_cassist(const MATRIXF* g,const MATRIXF* t,const MATRIXF* t2,VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,char nodiag,size_t memlimit);
69 |
70 | /* Estimates the probability of A->B from genotype and expression data with defaults combination of tests. Uses results from pijs_gassist_tot or pijs_gassist_a. Variables have the same definitions except:
71 | * ans: (ng,nt) Predicted probability of A->B based on default combination of 5 tests. The default combination is (p2*p5+p4)/2. Note: this combination does not include p1.
72 | * Return: 0 on sucess
73 | */
74 | int pij_cassist(const MATRIXF* g,const MATRIXF* t,const MATRIXF* t2,MATRIXF* ans,char nodiag,size_t memlimit);
75 |
76 | /* Estimates the probability of A->B from genotype and expression data with traditional causal inference method.
77 | * NOTE: This is not and is not intended as a loyal reimplementation of the Trigger R package. Instead, it aims at reusing methods and tests of Findr to produce inferences that mimicks the three tests performed by Trigger. Many implementational details are different between this function and Trigger, althrough a significant (but not full) overlap has been observed in existing studies. This method does not include p1.
78 | * Inputs and ouputs are the same as function pij_gassist_a.
79 | */
80 | int pij_cassist_trad(const MATRIXF* g,const MATRIXF* t,const MATRIXF* t2,MATRIXF* ans,char nodiag,size_t memlimit);
81 |
82 | #ifdef __cplusplus
83 | }
84 | #endif
85 | #endif
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
--------------------------------------------------------------------------------
/pij/cassist/llr.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #include "../../base/config.h"
19 | #include
20 | #include
21 | #include
22 | #include
23 | #include "../../base/gsl/blas.h"
24 | #include "../../base/random.h"
25 | #include "../../base/const.h"
26 | #include "../../base/logger.h"
27 | #include "../../base/macros.h"
28 | #include "../../base/data_process.h"
29 | #include "../../base/threading.h"
30 | #include "llr.h"
31 |
32 |
33 | /* Calculates the 5 log likelihood ratios of Trigger with nonpermuted data in block form:
34 | * 1. E->A v.s. E no relation with A
35 | * 2. A<-E->B with A--B v.s. E->A<-B
36 | * 3. E->A->B v.s. A<-E->B with A--B
37 | * 4. A<-E->B with A->B v.s. E->A
38 | * 5. A<-E->B with A->B v.s. A<-E->B
39 | * Uses GSL BLAS.
40 | * Note: for each row, g must be the best eQTL of t of the same row.
41 | */
42 | static void pij_cassist_llr_block(const MATRIXF* g,const MATRIXF* t,const MATRIXF* t2,VECTORF* llr1,MATRIXF* llr2,MATRIXF* llr3,MATRIXF* llr4,MATRIXF* llr5)
43 | {
44 | size_t i,j;
45 | VECTORFF(view) vv;
46 | size_t ng=g->size1;
47 | size_t nt=t2->size1;
48 |
49 | assert(ng&&(t->size1==ng)&&(llr1->size==ng)&&(llr2->size1==ng)&&(llr3->size1==ng)
50 | &&(llr4->size1==ng)&&(llr5->size1==ng));
51 | assert(nt&&(llr2->size2==nt)&&(llr3->size2==nt)&&(llr4->size2==nt)&&(llr5->size2==nt));
52 | assert(g->size2&&(t->size2==g->size2)&&(t2->size2==g->size2));
53 |
54 | //llr1=rho_EA
55 | MATRIXFF(cov2_1v1_bounded)(g,t,llr1);
56 | //llr2=rho_EB
57 | MATRIXFF(cov2_bounded)(g,t2,llr2);
58 | //llr5=rho_AB
59 | MATRIXFF(cov2_bounded)(t,t2,llr5);
60 |
61 | //llr4=rho_EA*rho_EB
62 | MATRIXFF(memcpy)(llr4,llr2);
63 | for(i=0;isize1;
141 | nt=t2->size1;
142 | ns=t->size2;
143 | #endif
144 |
145 | //Validation
146 | assert(!((g->size2!=ns)||(t2->size2!=ns)||(t->size1!=ng)||(llr2->size1!=ng)||(llr2->size2!=nt)||(llr3->size1!=ng)||(llr3->size2!=nt)||(llr4->size1!=ng)||(llr4->size2!=nt)||(llr5->size1!=ng)||(llr5->size2!=nt)));
147 | assert(!(llr1->size!=ng));
148 |
149 | #pragma omp parallel
150 | {
151 | size_t n1,n2;
152 | threading_get_startend(t->size1,&n1,&n2);
153 | if(n2>n1)
154 | {
155 | MATRIXFF(const_view) mvg=MATRIXFF(const_submatrix)(g,n1,0,n2-n1,g->size2);
156 | MATRIXFF(const_view) mvt=MATRIXFF(const_submatrix)(t,n1,0,n2-n1,t->size2);
157 | VECTORFF(view) vvllr1;
158 | MATRIXFF(view) mvllr2,mvllr3,mvllr4,mvllr5;
159 | vvllr1=VECTORFF(subvector)(llr1,n1,n2-n1);
160 | mvllr2=MATRIXFF(submatrix)(llr2,n1,0,n2-n1,llr2->size2);
161 | mvllr3=MATRIXFF(submatrix)(llr3,n1,0,n2-n1,llr3->size2);
162 | mvllr4=MATRIXFF(submatrix)(llr4,n1,0,n2-n1,llr4->size2);
163 | mvllr5=MATRIXFF(submatrix)(llr5,n1,0,n2-n1,llr5->size2);
164 | pij_cassist_llr_block(&mvg.matrix,&mvt.matrix,t2,&vvllr1.vector,&mvllr2.matrix,&mvllr3.matrix,&mvllr4.matrix,&mvllr5.matrix);
165 | }
166 | }
167 | }
168 |
169 |
--------------------------------------------------------------------------------
/pij/cassist/llr.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | /* This part contains the log likelihood ratio calculations.
19 | */
20 |
21 | #ifndef _HEADER_LIB_PIJ_CASSIST_LLR_H_
22 | #define _HEADER_LIB_PIJ_CASSIST_LLR_H_
23 | #include "../../base/config.h"
24 | #include "../../base/types.h"
25 | #ifdef __cplusplus
26 | extern "C"
27 | {
28 | #endif
29 |
30 | /* Multithread calculation of log likelihood ratios for 5 tests.
31 | * g: MATRIXF (ng,ns) Full genotype data matrix
32 | * t: MATRIXF (ng,ns) Supernormalized transcript data matrix of A
33 | * t2: MATRIXF (nt,ns) Supernormalized transcript data matrix of B
34 | * llr1: VECTORF (ng). Log likelihood ratios for test 1. Tests E->A v.s. E A.
35 | * llr2: MATRIXF (ng,nt). Log likelihood ratios for test 2. Tests E->B v.s. E B.
36 | * llr3: MATRIXF (ng,nt). Log likelihood ratios for test 3. Tests E->A->B v.s. E->A->B with E->B.
37 | * llr4: MATRIXF (ng,nt). Log likelihood ratios for test 4. Tests E->A->B with E->B v.s. E->A B.
38 | * llr5: MATRIXF (ng,nt). Log likelihood ratios for test 5. Tests E->A->B with E->B v.s. A<-E->B.
39 | */
40 | void pij_cassist_llr(const MATRIXF* g,const MATRIXF* t,const MATRIXF* t2,VECTORF* llr1,MATRIXF* llr2,MATRIXF* llr3,MATRIXF* llr4,MATRIXF* llr5);
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 | #ifdef __cplusplus
69 | }
70 | #endif
71 | #endif
72 |
--------------------------------------------------------------------------------
/pij/cassist/llrtopij.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #include "../../base/config.h"
19 | #include
20 | #include
21 | #include
22 | #include "../../base/logger.h"
23 | #include "llrtopij.h"
24 | #include "../llrtopij.h"
25 |
26 |
27 |
28 |
29 |
30 |
31 | /* Always return probability of step 1 is 1. This is useful when best eQTL are already selected in advance.
32 | */
33 | static inline int pij_cassist_llrtopij1_1(VECTORF* p1)
34 | {
35 | LOG(9,"Converting LLR to probabilities for step 1. Filling with 1.")
36 | VECTORFF(set_all)(p1,1);
37 | return 0;
38 | }
39 |
40 | /* Functions to convert LLR of specific steps into probabilities.
41 | * Uses pij_llrtopij_convert with different settings of n1d and n2d.
42 | * Function name suffices indicate which LLR to convert.
43 | */
44 | static inline int pij_cassist_llrtopij1(VECTORF* d)
45 | {
46 | LOG(9,"Converting LLR to probabilities for step 1 on per A basis.")
47 | return pij_cassist_llrtopij1_1(d);
48 | }
49 |
50 | static inline int pij_cassist_llrtopij2(MATRIXF* d,size_t ns,char nodiag)
51 | {
52 | LOG(9,"Converting LLR to probabilities for step 2 on per A basis.")
53 | assert(ns>2);
54 | return pij_llrtopij_convert_single_self(d,1,ns-2,nodiag,0);
55 | }
56 |
57 | static inline int pij_cassist_llrtopij3(MATRIXF* d,size_t ns,char nodiag)
58 | {
59 | LOG(9,"Converting LLR to probabilities for step 3 on per A basis.")
60 | assert(ns>3);
61 | if(pij_llrtopij_convert_single_self(d,1,ns-3,nodiag,0))
62 | return 1;
63 | MATRIXFF(scale)(d,-1);
64 | MATRIXFF(add_constant)(d,1);
65 | return 0;
66 | }
67 |
68 | static inline int pij_cassist_llrtopij4(MATRIXF* d,size_t ns,char nodiag)
69 | {
70 | LOG(9,"Converting LLR to probabilities for step 4 on per A basis.")
71 | assert(ns>3);
72 | return pij_llrtopij_convert_single_self(d,2,ns-3,nodiag,0);
73 | }
74 |
75 | static inline int pij_cassist_llrtopij5(MATRIXF* d,size_t ns,char nodiag)
76 | {
77 | LOG(9,"Converting LLR to probabilities for step 5 on per A basis.")
78 | assert(ns>3);
79 | return pij_llrtopij_convert_single_self(d,1,ns-3,nodiag,0);
80 | }
81 |
82 |
83 | int pij_cassist_llrtopijs(VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,size_t ns,char nodiag)
84 | {
85 | int ret=0,ret2=0;
86 |
87 | if(ns<4)
88 | {
89 | LOG(0,"Cannot convert log likelihood ratios to probabilities. Needs at least 4 samples.")
90 | return 1;
91 | }
92 | ret=ret||(ret2=pij_cassist_llrtopij2(p2,ns,nodiag));
93 | if(ret2)
94 | LOG(1,"Failed to convert log likelihood ratios to probabilities in step 2.")
95 | //For p1, if nodiag, copy p2 data, otherwise set all to 1.
96 | if(nodiag)
97 | {
98 | VECTORFF(view) vv;
99 | vv=MATRIXFF(diagonal)(p2);
100 | ret=ret||(ret2=VECTORFF(memcpy)(p1,&vv.vector));
101 | }
102 | else
103 | ret=ret||(ret2=pij_cassist_llrtopij1_1(p1));
104 | if(ret2)
105 | LOG(1,"Failed to convert log likelihood ratios to probabilities in step 1.")
106 | ret=ret||(ret2=pij_cassist_llrtopij3(p3,ns,nodiag));
107 | if(ret2)
108 | LOG(1,"Failed to convert log likelihood ratios to probabilities in step 3.")
109 | ret=ret||(ret2=pij_cassist_llrtopij4(p4,ns,nodiag));
110 | if(ret2)
111 | LOG(1,"Failed to convert log likelihood ratios to probabilities in step 4.")
112 | ret=ret||(ret2=pij_cassist_llrtopij5(p5,ns,nodiag));
113 | if(ret2)
114 | LOG(1,"Failed to convert log likelihood ratios to probabilities in step 5.")
115 | return ret;
116 | }
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
--------------------------------------------------------------------------------
/pij/cassist/llrtopij.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | /* This file contains the conversion from log likelihood ratio to probabilities
19 | *
20 | */
21 |
22 | #ifndef _HEADER_LIB_PIJ_CASSIST_LLRTOPIJ_H_
23 | #define _HEADER_LIB_PIJ_CASSIST_LLRTOPIJ_H_
24 | #include "../../base/config.h"
25 | #include "../../base/types.h"
26 | #ifdef __cplusplus
27 | extern "C"
28 | {
29 | #endif
30 |
31 | /* Converts four LLRs into probabilities together.
32 | * Uses pij_cassit_llrtopij1_a to pij_cassit_llrtopij5_a.
33 | * See above functions for parameter definitions.
34 | * Return: 0 if all functions are successful.
35 | */
36 | int pij_cassist_llrtopijs(VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,size_t ns,char nodiag);
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 | #ifdef __cplusplus
52 | }
53 | #endif
54 | #endif
55 |
--------------------------------------------------------------------------------
/pij/cassist/llrtopv.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | /* This file contains the conversion from log likelihood ratio to p-values for continuous anchors
19 | *
20 | */
21 |
22 | #ifndef _HEADER_LIB_PIJ_CASSIST_LLRTOPV_H_
23 | #define _HEADER_LIB_PIJ_CASSIST_LLRTOPV_H_
24 | #include "../../base/config.h"
25 | #include "../../base/types.h"
26 | #include "../llrtopv.h"
27 | #ifdef __cplusplus
28 | extern "C"
29 | {
30 | #endif
31 |
32 | /* Converts log likelihood ratios into p-values for continuous assisted causal inference test for each test separately.
33 | * d: MATRIXF of any size, as input of LLRs and also output of corresponding p-values
34 | * ns: Number of samples, to be used to calculate the null distribution
35 | */
36 | static inline void pij_cassist_llrtopv1(VECTORF* d,size_t ns)
37 | {
38 | assert(ns>3);
39 | pij_llrtopv_block(d,1,ns-2);
40 | }
41 |
42 | static inline void pij_cassist_llrtopv2(MATRIXF* d,size_t ns)
43 | {
44 | assert(ns>3);
45 | pij_llrtopvm(d,1,ns-2);
46 | }
47 |
48 | static inline void pij_cassist_llrtopv3(MATRIXF* d,size_t ns)
49 | {
50 | assert(ns>3);
51 | pij_llrtopvm(d,1,ns-3);
52 | }
53 |
54 | static inline void pij_cassist_llrtopv4(MATRIXF* d,size_t ns)
55 | {
56 | assert(ns>3);
57 | pij_llrtopvm(d,2,ns-3);
58 | }
59 |
60 | static inline void pij_cassist_llrtopv5(MATRIXF* d,size_t ns)
61 | {
62 | assert(ns>3);
63 | pij_llrtopvm(d,1,ns-3);
64 | }
65 |
66 | /* Converts log likelihood ratios into p-values for continuous assisted causal inference test for all tests together
67 | * p1: (ng)
68 | * p2: (ng,nt)
69 | * p3: (ng,nt)
70 | * p4: (ng,nt)
71 | * p5: (ng,nt) Data for 5 tests from p1 to p5, as input for log likelihood ratios,
72 | and also as output for converted p-values.
73 | * ns: Number of samples.
74 | */
75 | static inline void pij_cassist_llrtopvs(VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,size_t ns)
76 | {
77 | pij_cassist_llrtopv1(p1,ns);
78 | pij_cassist_llrtopv2(p2,ns);
79 | pij_cassist_llrtopv3(p3,ns);
80 | pij_cassist_llrtopv4(p4,ns);
81 | pij_cassist_llrtopv5(p5,ns);
82 | }
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 | #ifdef __cplusplus
122 | }
123 | #endif
124 | #endif
125 |
--------------------------------------------------------------------------------
/pij/gassist/gassist.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | /* This part contains the main interface function of genotype assisted pij inference.
19 | */
20 |
21 | #ifndef _HEADER_LIB_PIJ_GASSIST_H_
22 | #define _HEADER_LIB_PIJ_GASSIST_H_
23 | #include "../../base/config.h"
24 | #include "../../base/types.h"
25 | #ifdef __cplusplus
26 | extern "C"
27 | {
28 | #endif
29 |
30 | /* Estimates the p-value of A B against A->B from genotype and expression data with 5 tests.
31 | * E is always the best eQTL of A. Full data is required.
32 | * g: (ng,ns) Genotype data, =0,1,...,nv-1. Each is the best eQTL of the corresponding gene in t.
33 | * t: (ng,ns) Expression data of A.
34 | * t2: (nt,ns) Expression data of B. Can be A or a superset of A.
35 | * p1: (ng) P-values of step 1. Tests E->A v.s. E A.
36 | * p2: (ng,nt) P-values of step 2. Tests E->B v.s. E B.
37 | * p3: (ng,nt) P-values of step 3. Tests E->A->B v.s. E->A->B with E->B.
38 | * p4: (ng,nt) P-values of step 4. Tests E->A->B with E->B v.s. E->A B.
39 | * p5: (ng,nt) P-values of step 5. Tests E->A->B with E->B v.s. A<-E->B.
40 | * nv: Number of possible values each genotype entry may take, =number of alleles+1.
41 | * memlimit: The function is able to split very large datasets (ng and nt) into smaller chunks for inference. This variable specifies the approximate memory usage limit. Note: For large datasets, a too small memory limit can fail the function. For unlimited memory, set memlimit=-1.
42 | * Return: 0 on sucess
43 | * Appendix:
44 | * ng: Number of genes with best eQTL.
45 | * nt: Number of genes with expression data for B
46 | * ns: Number of samples.
47 | */
48 | int pijs_gassist_pv(const MATRIXG* g,const MATRIXF* t,const MATRIXF* t2,VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,size_t nv,size_t memlimit);
49 |
50 | /* Estimates the probability of A->B from genotype and expression data with 5 tests.
51 | * E is always the best eQTL of A. Full data is required.
52 | * g: (ng,ns) Genotype data, =0,1,...,nv-1. Each is the best eQTL of the corresponding gene in t.
53 | * t: (ng,ns) Expression data of A.
54 | * t2: (nt,ns) Expression data of B. Can be A or a superset of A.
55 | * p1: (ng) Probabilities of step 1. Tests E->A v.s. E A. For nodiag=0, because the function expects significant eQTLs, p1 always return 1. For nodiag=1, uses diagonal elements of p2. Consider replacing p1 with your own (1-FDR) from eQTL discovery.
56 | * p2: (ng,nt) Probabilities of step 2. Tests E->B v.s. E B.
57 | * p3: (ng,nt) Probabilities of step 3. Tests E->A->B v.s. E->A->B with E->B.
58 | * p4: (ng,nt) Probabilities of step 4. Tests E->A->B with E->B v.s. E->A B.
59 | * p5: (ng,nt) Probabilities of step 5. Tests E->A->B with E->B v.s. A<-E->B.
60 | * nv: Number of possible values each genotype entry may take, =number of alleles+1.
61 | * nodiag: When the top ng rows of t2 is exactly t, diagonals of p2 and p3 are meaningless. In this case, set nodiag to 1 to avoid inclusion of NANs. For nodiag=0, t and t2 should not have any identical genes.
62 | * memlimit: The function is able to split very large datasets (ng and nt) into smaller chunks for inference. This variable specifies the approximate memory usage limit. Note: For large datasets, a too small memory limit can fail the function. For unlimited memory, set memlimit=-1.
63 | * Return: 0 on sucess
64 | * Appendix:
65 | * ng: Number of genes with best eQTL.
66 | * nt: Number of genes with expression data for B
67 | * ns: Number of samples.
68 | */
69 | int pijs_gassist(const MATRIXG* g,const MATRIXF* t,const MATRIXF* t2,VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,size_t nv,char nodiag,size_t memlimit);
70 |
71 | /* Estimates the probability of A->B from genotype and expression data with defaults combination of tests. Uses results from pijs_gassist. Variables have the same definitions except:
72 | * ans: (ng,nt) Predicted probability of A->B based on default combination of 5 tests. The default combination is (p2*p5+p4)/2. Note: this combination does not include p1.
73 | * Return: 0 on sucess
74 | */
75 | int pij_gassist(const MATRIXG* g,const MATRIXF* t,const MATRIXF* t2,MATRIXF* ans,size_t nv,char nodiag,size_t memlimit);
76 |
77 | /* Estimates the probability of A->B from genotype and expression data with traditional causal inference method.
78 | * NOTE: This is not and is not intended as a loyal reimplementation of the Trigger R package. Instead, it aims at reusing methods and tests of Findr to produce inferences that mimicks the three tests performed by Trigger. Many implementational details are different between this function and Trigger, althrough a significant (but not full) overlap has been observed in existing studies. This method does not include p1.
79 | * Inputs and ouputs are the same as function pij_gassist.
80 | */
81 | int pij_gassist_trad(const MATRIXG* g,const MATRIXF* t,const MATRIXF* t2,MATRIXF* ans,size_t nv,char nodiag,size_t memlimit);
82 |
83 | #ifdef __cplusplus
84 | }
85 | #endif
86 | #endif
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
--------------------------------------------------------------------------------
/pij/gassist/llr.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | /* This part contains the log likelihood ratio calculations.
19 | */
20 |
21 | #ifndef _HEADER_LIB_PIJ_GASSIST_LLR_H_
22 | #define _HEADER_LIB_PIJ_GASSIST_LLR_H_
23 | #include "../../base/config.h"
24 | #include "../../base/types.h"
25 | #ifdef __cplusplus
26 | extern "C"
27 | {
28 | #endif
29 |
30 | /* Multithread calculation of log likelihood ratios for 5 tests.
31 | * g: MATRIXF (ng,ns) Full genotype data matrix
32 | * t: MATRIXF (ng,ns) Supernormalized transcript data matrix of A
33 | * t2: MATRIXF (nt,ns) Supernormalized transcript data matrix of B
34 | * llr1: VECTORF (ng). Log likelihood ratios for test 1. Tests E->A v.s. E A.
35 | * llr2: MATRIXF (ng,nt). Log likelihood ratios for test 2. Tests E->B v.s. E B.
36 | * llr3: MATRIXF (ng,nt). Log likelihood ratios for test 3. Tests E->A->B v.s. E->A->B with E->B.
37 | * llr4: MATRIXF (ng,nt). Log likelihood ratios for test 4. Tests E->A->B with E->B v.s. E->A B.
38 | * llr5: MATRIXF (ng,nt). Log likelihood ratios for test 5. Tests E->A->B with E->B v.s. A<-E->B.
39 | * nv: Number of possible values for each genotype
40 | * Return: 0 on success.
41 | */
42 | int pij_gassist_llr(const MATRIXG* g,const MATRIXF* t,const MATRIXF* t2,VECTORF* llr1,MATRIXF* llr2,MATRIXF* llr3,MATRIXF* llr4,MATRIXF* llr5,size_t nv);
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 | #ifdef __cplusplus
75 | }
76 | #endif
77 | #endif
78 |
--------------------------------------------------------------------------------
/pij/gassist/llrtopij.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #include "../../base/config.h"
19 | #include
20 | #include
21 | #include
22 | #include "../../base/gsl/math.h"
23 | #include "../../base/gsl/histogram.h"
24 | #include "../../base/gsl/blas.h"
25 | #include "../../base/logger.h"
26 | #include "../../base/threading.h"
27 | #include "../../base/macros.h"
28 | #include "../../base/data_process.h"
29 | #include "../llrtopij.h"
30 | #include "llrtopij.h"
31 |
32 |
33 |
34 | /* Always return probability of step 1 is 1. This is useful when best eQTL are already selected in advance.
35 | */
36 | static inline int pij_gassist_llrtopij1_1(VECTORF* p1)
37 | {
38 | LOG(9,"Converting LLR to probabilities for step 1. Filling with 1.")
39 | VECTORFF(set_all)(p1,1);
40 | return 0;
41 | }
42 |
43 | /* Convert real log likelihood ratios into probability functions.
44 | * This function converts every A in hypothesis (E->A->B) separately.
45 | * Suppose there are ng (E,A) pairs and nt Bs, this function converts ng times,
46 | * each for one (E,A) pair but all Bs.
47 | * d: (ng,nt) Input log likelihood ratios for construction of
48 | * histograms and calculation of probability of true hypothesis.
49 | * g: (ng,ns) Original genotype matrix, used for analytical calculation
50 | * of null distribution. Every element=0,1,...,nv-1.
51 | * h: [nv-1]. Null histogram of the specific test.
52 | * Output of pij_nullhist.
53 | * nv: Maximum number of values each g may take.
54 | * nodiag: If diagonal elements of d should be removed in construction of real
55 | * histogram. This should be set to true (!=0) when t is identical with
56 | * the top rows of t2 (in calculation of llr).
57 | * Return: 0 if success.
58 | */
59 | static int pij_gassist_llrtopij_convert_self(MATRIXF* d,const MATRIXG* g,const gsl_histogram * const * h, size_t nv,char nodiag,long nodiagshift)
60 | {
61 | #define CLEANUP CLEANVECG(vcount)CLEANAMHIST(hreal,nth)CLEANAMHIST(hc,nth)\
62 | CLEANMATD(mb1)CLEANMATD(mb2)CLEANMATD(mnull)CLEANMATF(mb3)CLEANVECD(vwidth)
63 |
64 | VECTORG *vcount;
65 | size_t ng=g->size1;
66 | size_t i,nbin;
67 | //gsl_histogram **hreal,**hc;
68 | MATRIXD *mb1,*mb2,*mnull;
69 | MATRIXF *mb3;
70 | VECTORD *vwidth;
71 | VECTORDF(view) vv1;
72 | size_t nth;
73 |
74 | mb1=mb2=mnull=0;
75 | mb3=0;
76 | vwidth=0;
77 | vcount=0;
78 | //Validity checks
79 | {
80 | int nth0=omp_get_max_threads();
81 | assert(nth0>0);
82 | nth=(size_t)nth0;
83 | }
84 |
85 |
86 | AUTOCALLOC(gsl_histogram*,hreal,nth,64)
87 | AUTOCALLOC(gsl_histogram*,hc,nth,64)
88 | if(!(hreal&&hc))
89 | ERRRET("Not enough memory.");
90 |
91 | //Construct null density histograms
92 | nbin=h[0]->n;
93 | //Memory allocation
94 | {
95 | size_t n1,n2;
96 | pij_llrtopij_convert_histograms_get_buff_sizes(nbin,&n1,&n2);
97 | mb1=MATRIXDF(alloc)(nth,n1);
98 | mb2=MATRIXDF(alloc)(nth,n2);
99 | mnull=MATRIXDF(alloc)(nth,nbin);
100 | mb3=MATRIXFF(alloc)(nth,d->size2);
101 | vwidth=VECTORDF(alloc)(nbin);
102 | if(!(mb1&&mb2&&mnull&&mb3&&vwidth))
103 | ERRRET("Not enough memory.")
104 | }
105 |
106 | //Prepare for real histogram
107 | {
108 | int ret;
109 | for(i=0,ret=1;in+2);
113 | ret=ret&&hreal[i]&&hc[i];
114 | }
115 | vcount=VECTORGF(alloc)(ng);
116 | if(!(ret&&vcount))
117 | ERRRET("Not enough memory.");
118 | }
119 |
120 | {
121 | VECTORUC *vb4=VECTORUCF(alloc)(nv);
122 | if(!vb4)
123 | ERRRET("Not enough memory.");
124 | MATRIXGF(countv_byrow_buffed)(g,vcount,vb4);
125 | CLEANVECUC(vb4)
126 | }
127 |
128 | //Conversion
129 | for(i=2;i<=nv;i++)
130 | {
131 | vv1=VECTORDF(view_array)(h[i-2]->range+1,nbin);
132 | VECTORDF(memcpy)(vwidth,&vv1.vector);
133 | vv1=VECTORDF(view_array)(h[i-2]->range,nbin);
134 | VECTORDF(sub)(vwidth,&vv1.vector);
135 | vv1=VECTORDF(view_array)(h[i-2]->bin,nbin);
136 | #pragma omp parallel
137 | {
138 | size_t ng1,ng2,id;
139 | size_t j;
140 | long k;
141 | VECTORDF(view) vvreal,vvnull,vvb1,vvb2;
142 | VECTORFF(view) vvb3,vva;
143 |
144 | id=(size_t)omp_get_thread_num();
145 | vvreal=VECTORDF(view_array)(hreal[id]->bin,nbin);
146 | vvnull=MATRIXDF(row)(mnull,id);
147 | vvb1=MATRIXDF(row)(mb1,id);
148 | vvb2=MATRIXDF(row)(mb2,id);
149 | vvb3=MATRIXFF(row)(mb3,id);
150 | threading_get_startend(ng,&ng1,&ng2);
151 |
152 | for(j=ng1;jrange,h[i-2]->range,(nbin+1)*sizeof(*hreal[id]->range));
158 | memset(hreal[id]->bin,0,nbin*sizeof(*hreal[id]->bin));
159 | //Construct real histogram
160 | if(nodiag&&((long)j+nodiagshift>=0)&&((long)j+nodiagshift<(long)d->size2))
161 | {
162 | for(k=(long)j+nodiagshift-1;k>=0;k--)
163 | gsl_histogram_increment(hreal[id],MATRIXFF(get)(d,j,(size_t)k));
164 | for(k=(long)j+nodiagshift+1;k<(long)d->size2;k++)
165 | gsl_histogram_increment(hreal[id],MATRIXFF(get)(d,j,(size_t)k));
166 | VECTORDF(scale)(&vvreal.vector,1./(double)(d->size2-1));
167 | }
168 | else
169 | {
170 | for(k=0;k<(long)d->size2;k++)
171 | gsl_histogram_increment(hreal[id],MATRIXFF(get)(d,j,(size_t)k));
172 | VECTORDF(scale)(&vvreal.vector,1./(double)(d->size2));
173 | }
174 |
175 | //Convert to density histogram
176 | VECTORDF(div)(&vvreal.vector,vwidth);
177 | //Convert to probability central histogram
178 | pij_llrtopij_convert_histograms_buffed(hreal[id],&vvnull.vector,hc[id],&vvb1.vector,&vvb2.vector);
179 | //Convert likelihoods to probabilities
180 | vva=MATRIXFF(row)(d,j);
181 | pij_llrtopij_histogram_interpolate_linear(hc[id],&vvb3.vector,&vva.vector);
182 | }
183 | }
184 | }
185 | CLEANUP
186 | return 0;
187 | #undef CLEANUP
188 | }
189 |
190 | int pij_gassist_llrtopijs(const MATRIXG* g,VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,size_t nv,const gsl_histogram * const * h[4],char nodiag,long nodiagshift)
191 | {
192 | int ret=0,ret2=0;
193 | if(g->size2<=3)
194 | {
195 | LOG(0,"Needs at least 4 samples to compute probabilities.")
196 | return 1;
197 | }
198 | ret=ret||(ret2=pij_gassist_llrtopij_convert_self(p2,g,h[0],nv,nodiag,nodiagshift));
199 | if(ret2)
200 | LOG(1,"Failed to log likelihood ratios to probabilities in step 2.")
201 | //For p1, if nodiag, copy p2 data, otherwise set all to 1.
202 | if(nodiag)
203 | {
204 | VECTORFF(view) vv;
205 | vv=MATRIXFF(superdiagonal)(p2,(size_t)nodiagshift);
206 | ret=(ret2=VECTORFF(memcpy)(p1,&vv.vector));
207 | }
208 | else
209 | ret=(ret2=pij_gassist_llrtopij1_1(p1));
210 | if(ret2)
211 | LOG(1,"Failed to log likelihood ratios to probabilities in step 1.")
212 | ret=ret||(ret2=pij_gassist_llrtopij_convert_self(p3,g,h[1],nv,nodiag,nodiagshift));
213 | if(ret2)
214 | LOG(1,"Failed to log likelihood ratios to probabilities in step 3.")
215 | MATRIXFF(scale)(p3,-1);
216 | MATRIXFF(add_constant)(p3,1);
217 | ret=ret||(ret2=pij_gassist_llrtopij_convert_self(p4,g,h[2],nv,nodiag,nodiagshift));
218 | if(ret2)
219 | LOG(1,"Failed to log likelihood ratios to probabilities in step 4.")
220 | ret=ret||(ret2=pij_gassist_llrtopij_convert_self(p5,g,h[3],nv,nodiag,nodiagshift));
221 | if(ret2)
222 | LOG(1,"Failed to log likelihood ratios to probabilities in step 5.")
223 | return ret;
224 | }
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
--------------------------------------------------------------------------------
/pij/gassist/llrtopij.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | /* This file contains the conversion from log likelihood ratio to probabilities
19 | *
20 | */
21 |
22 | #ifndef _HEADER_LIB_PIJ_GASSIST_LLRTOPIJ_H_
23 | #define _HEADER_LIB_PIJ_GASSIST_LLRTOPIJ_H_
24 | #include "../../base/config.h"
25 | #include "../../base/gsl/histogram.h"
26 | #include "../../base/types.h"
27 | #ifdef __cplusplus
28 | extern "C"
29 | {
30 | #endif
31 |
32 |
33 |
34 | /* Converts four LLRs into probabilities together.
35 | * Uses pij_gassist_llrtopij1 to pij_gassist_llrtopij5.
36 | * See above functions for parameter definitions.
37 | * h: Null histograms. 0 to 3 for tests 2 to 5.
38 | * Return: 0 if all functions are successful.
39 | */
40 | int pij_gassist_llrtopijs(const MATRIXG* g,VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,size_t nv,const gsl_histogram * const * h[4],char nodiag,long nodiagshift);
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 | #ifdef __cplusplus
53 | }
54 | #endif
55 | #endif
56 |
--------------------------------------------------------------------------------
/pij/gassist/llrtopv.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #include "../../base/config.h"
19 | #include
20 | #include
21 | #include
22 | #include "../../base/logger.h"
23 | #include "../../base/threading.h"
24 | #include "../../base/macros.h"
25 | #include "../llrtopv.h"
26 |
27 | /* Convert log likelihood ratios into p-values for matrix
28 | * d: (ng,nt) Data, as input for log likelihood ratios,
29 | and also as output for converted p-values.
30 | * g: (ng,ns) Original genotype matrix, used for analytical calculation
31 | * of null distribution. Every element=0,1,...,nv-1.
32 | Has matching rows with data d.
33 | * nv: Maximum number of values each g may take.
34 | * n1c,
35 | * n1d,
36 | * n2c,
37 | * n2d: Parameters to specify null distribution. See pij_nullhist
38 | * Return: 0 if success.
39 | */
40 | static int pij_gassist_llrtopv_block(MATRIXF* d,const MATRIXG* g,size_t nv,long n1c,size_t n1d,long n2c,size_t n2d)
41 | {
42 | #define CLEANUP AUTOFREE(nexist)
43 |
44 | size_t i,j,nvr;
45 | VECTORFF(view) vv;
46 | assert(d->size1==g->size1);
47 | assert(MATRIXGF(max)(g)size1;i++)
54 | {
55 | //Count for number of genotypes
56 | memset(nexist,0,nv*sizeof(nexist[0]));
57 | for(j=0;jsize2;j++)
58 | nexist[MATRIXGF(get)(g,i,j)]=1;
59 | nvr=0;
60 | for(j=0;j1);
63 | nvr-=2;
64 | assert(((long)nvr*n1c+(long)n1d>0)&&((long)n2d>(long)nvr*n2c));
65 | vv=MATRIXFF(row)(d,i);
66 | pij_llrtopv_block(&vv.vector,(size_t)((long)nvr*n1c+(long)n1d),(size_t)((long)n2d-(long)nvr*n2c));
67 | }
68 |
69 | CLEANUP
70 | return 0;
71 | #undef CLEANUP
72 | }
73 |
74 | /* Convert log likelihood ratios into p-values for vector
75 | * d: (ng) Data, as input for log likelihood ratios,
76 | and also as output for converted p-values.
77 | * g: (ng,ns) Original genotype matrix, used for analytical calculation
78 | * of null distribution. Every element=0,1,...,nv-1.
79 | Has matching rows with data d.
80 | * nv: Maximum number of values each g may take.
81 | * n1c,
82 | * n1d,
83 | * n2c,
84 | * n2d: Parameters to specify null distribution. See pij_nullhist
85 | * Return: 0 if success.
86 | */
87 | static inline int pij_gassist_llrtopv_vec_block(VECTORF* d,const MATRIXG* g,size_t nv,long n1c,size_t n1d,long n2c,size_t n2d)
88 | {
89 | MATRIXFF(view) mv=MATRIXFF(view_vector)(d,d->size,1);
90 | assert(d->size==g->size1);
91 | assert((long)n2d>n2c*(long)nv);
92 | return pij_gassist_llrtopv_block(&mv.matrix,g,nv,n1c,n1d,n2c,n2d);
93 | }
94 |
95 | /* Convert log likelihood ratios into p-values for matrix in single thread
96 | * p1: (ng)
97 | * p2: (ng,nt)
98 | * p3: (ng,nt)
99 | * p4: (ng,nt)
100 | * p5: (ng,nt) Data for 5 tests from p1 to p5, as input for log likelihood ratios,
101 | and also as output for converted p-values.
102 | * g: (ng,ns) Original genotype matrix, used for analytical calculation
103 | * of null distribution. Every element=0,1,...,nv-1.
104 | Has matching rows with data p1 to p5..
105 | * nv: Maximum number of values each g may take.
106 | * Return: 0 if success.
107 | */
108 | static int pij_gassist_llrtopvs_block(VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,const MATRIXG* g,size_t nv)
109 | {
110 | int ret=0,ret2=0;
111 | assert((p1->size==g->size1)&&(p2->size1==g->size1)&&(p3->size1==g->size1)&&(p4->size1==g->size1)&&(p5->size1==g->size1));
112 | assert((p2->size2==p3->size2)&&(p2->size2==p4->size2)&&(p2->size2==p5->size2));
113 |
114 | ret=ret||(ret2=pij_gassist_llrtopv_vec_block(p1,g,nv,1,1,1,g->size2-2));
115 | if(ret2)
116 | LOG(1,"Failed to log likelihood ratios to p-values in step 1.")
117 | ret=ret||(ret2=pij_gassist_llrtopv_block(p2,g,nv,1,1,1,g->size2-2));
118 | if(ret2)
119 | LOG(1,"Failed to log likelihood ratios to p-values in step 2.")
120 | ret=ret||(ret2=pij_gassist_llrtopv_block(p3,g,nv,1,1,1,g->size2-3));
121 | if(ret2)
122 | LOG(1,"Failed to log likelihood ratios to p-values in step 3.")
123 | ret=ret||(ret2=pij_gassist_llrtopv_block(p4,g,nv,1,2,1,g->size2-3));
124 | if(ret2)
125 | LOG(1,"Failed to log likelihood ratios to p-values in step 4.")
126 | ret=ret||(ret2=pij_gassist_llrtopv_block(p5,g,nv,0,1,1,g->size2-3));
127 | if(ret2)
128 | LOG(1,"Failed to log likelihood ratios to p-values in step 5.")
129 | return ret;
130 | }
131 |
132 | int pij_gassist_llrtopvs(VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,const MATRIXG* g,size_t nv)
133 | {
134 | int ret=0;
135 | assert((p1->size==g->size1)&&(p2->size1==g->size1)&&(p3->size1==g->size1)&&(p4->size1==g->size1)&&(p5->size1==g->size1));
136 | assert((p2->size2==p3->size2)&&(p2->size2==p4->size2)&&(p2->size2==p5->size2));
137 |
138 | if(g->size2size,&ng1,&ng2);
150 | if(ng2>ng1)
151 | {
152 | size_t dn=ng2-ng1;
153 | VECTORFF(view) vv1;
154 | MATRIXFF(view) mv2,mv3,mv4,mv5;
155 | MATRIXGF(const_view) mvg=MATRIXGF(const_submatrix)(g,ng1,0,dn,g->size2);
156 | vv1=VECTORFF(subvector)(p1,ng1,dn);
157 | mv2=MATRIXFF(submatrix)(p2,ng1,0,dn,p2->size2);
158 | mv3=MATRIXFF(submatrix)(p3,ng1,0,dn,p2->size2);
159 | mv4=MATRIXFF(submatrix)(p4,ng1,0,dn,p2->size2);
160 | mv5=MATRIXFF(submatrix)(p5,ng1,0,dn,p2->size2);
161 | ret2=pij_gassist_llrtopvs_block(&vv1.vector,&mv2.matrix,&mv3.matrix,&mv4.matrix,&mv5.matrix,&mvg.matrix,nv);
162 | }
163 | #pragma omp critical
164 | ret=ret||ret2;
165 | }
166 | return ret;
167 | }
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
--------------------------------------------------------------------------------
/pij/gassist/llrtopv.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | /* This file contains the conversion from log likelihood ratio to p-values for discrete anchors, e.g. genotypes
19 | *
20 | */
21 | #ifndef _HEADER_LIB_PIJ_GASSIST_LLRTOPV_H_
22 | #define _HEADER_LIB_PIJ_GASSIST_LLRTOPV_H_
23 | #include "../../base/config.h"
24 | #include "../../base/types.h"
25 | #ifdef __cplusplus
26 | extern "C"
27 | {
28 | #endif
29 |
30 | /* Convert log likelihood ratios into p-values for matrix in multi thread
31 | * p1: (ng)
32 | * p2: (ng,nt)
33 | * p3: (ng,nt)
34 | * p4: (ng,nt)
35 | * p5: (ng,nt) Data for 5 tests from p1 to p5, as input for log likelihood ratios,
36 | and also as output for converted p-values of LLRs of each test.
37 | * g: (ng,ns) Original genotype matrix, used for analytical calculation
38 | * of null distribution. Every element=0,1,...,nv-1.
39 | Has matching rows with data p1 to p5..
40 | * nv: Maximum number of values each g may take.
41 | * Return: 0 if success.
42 | */
43 | int pij_gassist_llrtopvs(VECTORF* p1,MATRIXF* p2,MATRIXF* p3,MATRIXF* p4,MATRIXF* p5,const MATRIXG* g,size_t nv);
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 | #ifdef __cplusplus
83 | }
84 | #endif
85 | #endif
86 |
--------------------------------------------------------------------------------
/pij/gassist/nullhist.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #include "../../base/config.h"
19 | #include
20 | #include
21 | #include
22 | #include "../../base/logger.h"
23 | #include "../../base/gsl/histogram.h"
24 | #include "../nullhist.h"
25 | #include "nullhist.h"
26 |
27 | int pij_gassist_nullhists(gsl_histogram** h[4],size_t nt,size_t ns,size_t nv,const FTYPE dmax[4])
28 | {
29 | //Construct null density histograms
30 | h[0]=pij_nullhist((double)dmax[0],nv,nt,1,1,1,ns-2);
31 | h[1]=pij_nullhist((double)dmax[1],nv,nt,1,1,1,ns-3);
32 | h[2]=pij_nullhist((double)dmax[2],nv,nt,1,2,1,ns-3);
33 | h[3]=pij_nullhist((double)dmax[3],nv,nt,0,1,1,ns-3);
34 | if(h[0]&&h[1]&&h[2]&&h[3])
35 | return 0;
36 |
37 | LOG(1,"pij_nullhist failed.")
38 | return 1;
39 | }
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
--------------------------------------------------------------------------------
/pij/gassist/nullhist.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | /* This file provides the analytical method to construct histograms
19 | * for the null pdf of LLRs of genotype assisted pij inference.
20 | */
21 |
22 | #ifndef _HEADER_LIB_PIJ_GASSIST_NULLHIST_H_
23 | #define _HEADER_LIB_PIJ_GASSIST_NULLHIST_H_
24 | #include "../../base/config.h"
25 | #include "../../base/gsl/histogram.h"
26 | #include "../../base/types.h"
27 | #ifdef __cplusplus
28 | extern "C"
29 | {
30 | #endif
31 |
32 | /* Produce null histograms for all tests (2 to 5).
33 | * h: Output location of null histograms. 0 to 3 for tests 2 to 5.
34 | * nt: Number of targets
35 | * ns: Number of samples
36 | * nv: Number of values, = number of alleles + 1
37 | * dmax: Maximum value of all LLRs, for histogram construction.
38 | * It can be larger than the maximum of d, if memlimit is not infinite.
39 | * 0 to 4 for tests 1 to 5.
40 | * Return: 0 on success and 1 otherwise
41 | */
42 | int pij_gassist_nullhists(gsl_histogram** h[4],size_t nt,size_t ns,size_t nv,const FTYPE dmax[4]);
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 | #ifdef __cplusplus
65 | }
66 | #endif
67 | #endif
68 |
--------------------------------------------------------------------------------
/pij/llrtopij.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | /* This file contains the conversion from log likelihood ratio to probabilities
19 | *
20 | */
21 |
22 | #ifndef _HEADER_LIB_PIJ_LLRTOPIJ_H_
23 | #define _HEADER_LIB_PIJ_LLRTOPIJ_H_
24 | #include "../base/config.h"
25 | #include "../base/gsl/histogram.h"
26 | #include "../base/types.h"
27 | #ifdef __cplusplus
28 | extern "C"
29 | {
30 | #endif
31 |
32 |
33 | /* Use central histogram to estimate distribution probabilities of any point
34 | * within the histogram range. Linear intepolation is used.
35 | * Points outside histogram range gives boundary output
36 | * hc: central histogram for estimation
37 | * d: data (x coordinates of histogram) to be estimated their probabilities
38 | * ans: output of estimated probabilities
39 | */
40 | void pij_llrtopij_histogram_interpolate_linear(const gsl_histogram *hc,const VECTORF* d,VECTORF* ans);
41 |
42 | /* Calculate buffer sizes for histogram conversion in pij_llrtopij_convert_histograms_buffed.
43 | * n: Number of histogram bins. This must match pij_llrtopij_convert_histograms_buffed.
44 | * n1,
45 | * n2: Sizes of two buffers for VECTORD.
46 | */
47 | void pij_llrtopij_convert_histograms_get_buff_sizes(size_t n,size_t *n1,size_t *n2);
48 |
49 | /* Allocate buffer for histogram conversion in pij_llrtopij_convert_histograms_buffed.
50 | * n: Number of histogram bins. This must match pij_llrtopij_convert_histograms_buffed.
51 | * vb1,
52 | * vb2: Output locations of allocated buffers.
53 | * Return: 0 on success.
54 | */
55 | int pij_llrtopij_convert_histograms_make_buffs(size_t n,VECTORD** vb1,VECTORD** vb2);
56 |
57 | /* Convert density histograms of null and real distribution into probability central
58 | * histogram with buffer provided. Both histograms must be distributions
59 | * (sum to unity and nonnegative).
60 | * hreal: (n) Real density histogram to convert from. Also changed in calculation.
61 | * vnull: (n) Null density histogram in vector format. Also changed in calculation.
62 | * hc: (n+2) Central probability histogram as output.
63 | * vb1,
64 | * vb2: Buffers needed for conversion. To allocate buffers, use
65 | pij_llrtopij_convert_histograms_make_buffs.
66 | */
67 | void pij_llrtopij_convert_histograms_buffed(gsl_histogram* hreal,VECTORD* vnull,gsl_histogram* hc,VECTORD* vb1,VECTORD* vb2);
68 |
69 | /* Convert density histograms of null and real distribution into probability central histogram. Both histograms must be distributions (sum to unity and nonnegative).
70 | * hreal: (n) Real density histogram to convert from. Also changed in calculation.
71 | * vnull: (n) Null density histogram in vector format. Also changed in calculation.
72 | * hc: (n+2) Central probability histogram as output.
73 | * Return: 0 if success.
74 | */
75 | int pij_llrtopij_convert_histograms(gsl_histogram* hreal,VECTORD* vnull,gsl_histogram* hc);
76 |
77 |
78 | /* Obtains the maximum of matrix, possibly ignoring diagonal elements.
79 | * Fails in the presence of NAN, and warns and updates at INFs.
80 | * d: Matrix/Vector to get maximum, and update any INFs
81 | * nodiag: Whether to ignore diagonal values when searching for maximum.
82 | * Return: 0 if NAN is found, or the non-INF maximum otherwise.
83 | */
84 | FTYPE pij_llrtopij_llrmatmax(MATRIXF* d,char nodiag);
85 | FTYPE pij_llrtopij_llrvecmax(VECTORF* d);
86 |
87 |
88 | /* Convert LLR of real data to probabilities, when the distribution
89 | * of LLR of null distribution can be calculated analytically to follow
90 | * x=-0.5*log(1-z1/(z1+z2)), where z1~chi2(n1),z2~chi2(n2).
91 | * The conversion is performed for each gene A, i.e. per row of d and dconv.
92 | * This function is older than pij_llrtopij_convert_single so it is not parallel.
93 | * Make it parallel before using.
94 | * d: [nrow,nx] The data to use for calculation of conversion rule from LLR to pij.
95 | * dconv: [nrow,nd] The data of LLR to actually convert to pij. Can be same with d.
96 | * ans: [nrow,nd] The output location of converted pij from dconv.
97 | * n1,
98 | * n2: Parameters of null distribution.
99 | * nodiag: Whether diagonal elements of d should be ignored when converting
100 | * to probabilities.
101 | * nodiagshift: Diangonal column shift for nodiag==1.
102 | * For nodiagshift>0/<0, use upper/lower diagonal.
103 | * Return: 0 on success.
104 | */
105 | int pij_llrtopij_convert_single(const MATRIXF* d,const MATRIXF* dconv,MATRIXF* ans,size_t n1,size_t n2,char nodiag,long nodiagshift);
106 |
107 | // Same with pij_llrtopij_convert_single, for d=dconv=ans. Saves memory.
108 | int pij_llrtopij_convert_single_self(MATRIXF* d,size_t n1,size_t n2,char nodiag,long nodiagshift);
109 |
110 |
111 |
112 |
113 | #ifdef __cplusplus
114 | }
115 | #endif
116 | #endif
117 |
--------------------------------------------------------------------------------
/pij/llrtopv.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #include "../base/config.h"
19 | #include
20 | #include
21 | #include
22 | #include "../base/macros.h"
23 | #include "../base/threading.h"
24 | #include "llrtopv.h"
25 |
26 | void pij_llrtopvm(MATRIXF* p,size_t n1,size_t n2)
27 | {
28 | #pragma omp parallel
29 | {
30 | size_t m1,m2;
31 |
32 | threading_get_startend(p->size1,&m1,&m2);
33 | if(m2>m1)
34 | {
35 | MATRIXFF(view) mvp=MATRIXFF(submatrix)(p,m1,0,m2-m1,p->size2);
36 | pij_llrtopvm_block(&mvp.matrix,n1,n2);
37 | }
38 | }
39 | }
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
--------------------------------------------------------------------------------
/pij/llrtopv.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | /* This file contains the conversion from log likelihood ratio to p-values
19 | *
20 | */
21 | #ifndef _HEADER_LIB_PIJ_LLRTOPV_H_
22 | #define _HEADER_LIB_PIJ_LLRTOPV_H_
23 | #include "../base/config.h"
24 | #include "../base/types.h"
25 | #include "nulldist.h"
26 | #ifdef __cplusplus
27 | extern "C"
28 | {
29 | #endif
30 |
31 |
32 | /* Converts a vector of log likelihood ratios into p-values with the same null distribution.
33 | * Single thread.
34 | * For null distribution, see pij_nulldist_cdfQ.
35 | * p: data as input for LLR and output for p-values
36 | * n1,
37 | * n2: Null distribution parameters.
38 | */
39 | static inline void pij_llrtopv_block(VECTORF* p,size_t n1,size_t n2);
40 | // Converts a matrix with the same null distribution in single thread
41 | static inline void pij_llrtopvm_block(MATRIXF* p,size_t n1,size_t n2);
42 | // Converts a matrix with the same null distribution in multi threads
43 | void pij_llrtopvm(MATRIXF* p,size_t n1,size_t n2);
44 |
45 |
46 |
47 | static inline void pij_llrtopv_block(VECTORF* p,size_t n1,size_t n2)
48 | {
49 | size_t i;
50 | for(i=0;isize;i++)
51 | VECTORFF(set)(p,i,(FTYPE)pij_nulldist_cdfQ(VECTORFF(get)(p,i),n1,n2));
52 | }
53 |
54 | static inline void pij_llrtopvm_block(MATRIXF* p,size_t n1,size_t n2)
55 | {
56 | size_t i,j;
57 | for(i=0;isize1;i++)
58 | for(j=0;jsize2;j++)
59 | MATRIXFF(set)(p,i,j,(FTYPE)pij_nulldist_cdfQ(MATRIXFF(get)(p,i,j),n1,n2));
60 | }
61 |
62 | #ifdef __cplusplus
63 | }
64 | #endif
65 | #endif
66 |
--------------------------------------------------------------------------------
/pij/nulldist.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #include "../base/config.h"
19 | #include
20 | #include
21 | #include
22 | #include
23 | #include
24 | #include "../base/gsl/blas.h"
25 | #include "../base/gsl/math.h"
26 | #include "../base/logger.h"
27 | #include "../base/macros.h"
28 | #include "../base/histogram.h"
29 | #include "../base/data_process.h"
30 | #include "nulldist.h"
31 | #pragma GCC diagnostic ignored "-Wunused-parameter"
32 |
33 |
34 |
35 | /*************************************************************
36 | * Generic functions for any step
37 | *************************************************************/
38 |
39 | void pij_nulldist_pdfs(const VECTORD* loc,VECTORD* ans,const void* param)
40 | {
41 | const struct pij_nulldist_pdfs_param *p=param;
42 | size_t nd=loc->size;
43 | size_t i;
44 |
45 | //Part 1: (1-exp(-2*x))^((n1-2)/2)
46 | for(i=0;in1/2-1);
49 | //Part 2: exp(-n2*x)
50 | gsl_blas_daxpy(-(double)p->n2,loc,ans);
51 | //Part 3: 2*Gamma((n1+n2)/2)/(Gamma(n1/2)*Gamma(n2/2)
52 | VECTORDF(add_constant)(ans,M_LN2+math_sf_lngammahalf(p->n1+p->n2)-math_sf_lngammahalf(p->n1)-math_sf_lngammahalf(p->n2));
53 | //Final: all log
54 | for(i=0;isize;
61 | size_t i,j;
62 |
63 | assert(n1d&&n2d);
64 | assert(nd&&(ans->size2==nd)&&(vb2->size==nd));
65 | //Calculate vb2=log(1-exp(-2x))
66 | VECTORDF(memcpy)(vb2,loc);
67 | VECTORDF(scale)(vb2,-2);
68 | for(i=0;isize1;i++)
86 | {
87 | VECTORDF(const_view) vv1=MATRIXDF(const_row)(ans,i-1);
88 | VECTORDF(view) vv2=MATRIXDF(row)(ans,i);
89 | VECTORDF(memcpy)(&vv2.vector,&vv1.vector);
90 | VECTORDF(add)(&vv2.vector,vb2);
91 | }
92 |
93 | //Include nv-dependent coefficients
94 | for(i=0;isize1;i++)
95 | {
96 | VECTORDF(view) vv=MATRIXDF(row)(ans,i);
97 | VECTORDF(add_constant)(&vv.vector,(FTYPE)(M_LN2+math_sf_lngammahalf((size_t)((long)i*(n1c-n2c)+(long)(n1d+n2d)))-math_sf_lngammahalf((size_t)((long)i*n1c+(long)n1d))-math_sf_lngammahalf((size_t)(-(long)i*n2c+(long)n2d))));
98 | }
99 | //Convert log pdf to pdf
100 | for(i=0;isize1;i++)
101 | for(j=0;jsize2;j++)
102 | MATRIXDF(set)(ans,i,j,exp(MATRIXDF(get)(ans,i,j)));
103 | }
104 |
105 | static int pij_nulldist_calcpdf(long n1c,size_t n1d,long n2c,size_t n2d,const VECTORD* loc,MATRIXD* ans)
106 | {
107 | #define CLEANUP AUTOFREEVEC(vb)
108 | AUTOALLOCVECD(vb,loc->size,30000)
109 | if(!vb)
110 | ERRRET("Not enough memory.")
111 | pij_nulldist_calcpdf_buffed(n1c,n1d,n2c,n2d,loc,ans,vb);
112 | CLEANUP
113 | return 0;
114 | #undef CLEANUP
115 | }
116 |
117 | int pij_nulldist_hist_pdf(const double* restrict range,size_t nbin,double* restrict hist,size_t n1,size_t n2,size_t n)
118 | {
119 | #define CLEANUP CLEANVECD(loc)CLEANVECD(val)
120 | VECTORD *loc,*val;
121 | VECTORDF(view) vvh=VECTORDF(view_array)(hist,nbin);
122 | MATRIXDF(view) mvv;
123 | size_t i;
124 | size_t nsp;
125 |
126 | assert(n&&(n<10));
127 | nsp=(size_t)1<<(n-1);
128 | loc=VECTORDF(alloc)(nbin*nsp);
129 | val=VECTORDF(alloc)(nbin*nsp);
130 | if(!(loc&&val))
131 | ERRRET("Not enough memory.")
132 |
133 | //Construct bin ranges
134 | {
135 | VECTORDF(const_view) vvc=VECTORDF(const_view_array)(range,nbin+1);
136 | histogram_finer_central(&vvc.vector,loc,nsp);
137 | }
138 |
139 | mvv=MATRIXDF(view_vector)(val,1,val->size);
140 | //Calculate bin values
141 | if(pij_nulldist_calcpdf(0,n1,0,n2,loc,&mvv.matrix))
142 | {
143 | CLEANUP
144 | return 1;
145 | }
146 |
147 | //Shrink to output
148 | VECTORDF(set_zero)(&vvh.vector);
149 | for(i=0;i.
17 | */
18 | /* This part contains analytical calculation of the histogram of log likelihood ratio from null hypothesis.
19 | * Each function is applicable to one or more stages, which are stated in the function name as pij_nulldistX_..., where X is the applicable stage.
20 | * For each stage, different methods to calculate histogram can coexist. The method is declared in the function name as suffix:
21 | * _cdf: Calculate histogram as the difference of cdf.
22 | * This is applicable when distribution is single-variable integrable.
23 | * _pdf: Calculate histogram as the pdf mean of points evenly split within the bin. This is applicable when distribution is single-variable non-integrable.
24 | * _sim: Construct histogram by sampling. This is applicable when distribution is multi-variable non-integrable.
25 | */
26 |
27 | #ifndef _HEADER_LIB_PIJ_NULLDIST_H_
28 | #define _HEADER_LIB_PIJ_NULLDIST_H_
29 | #include "../base/gsl/cdf.h"
30 | #include "../base/config.h"
31 | #include "../base/types.h"
32 | #include "../base/math.h"
33 | #ifdef __cplusplus
34 | extern "C"
35 | {
36 | #endif
37 |
38 |
39 | /*************************************************************
40 | * Generic functions for any step
41 | *************************************************************/
42 |
43 | struct pij_nulldist_pdfs_param
44 | {
45 | size_t n1;
46 | size_t n2;
47 | };
48 |
49 | /* Calculate the pdf p(x|n1,n2) for x=-0.5*log(1-z1/(z1+z2)),
50 | * where z1~chi2(n1), z2~chi2(n2).
51 | * p(x|n1,n2)=2*(1-exp(-2*x))^((i-2)/2)*exp(-n2*x)*Gamma((n1+n2)/2)
52 | * /(Gamma(n1/2)*Gamma(n2/2)).
53 | * loc: (nd) Locations of x to calculate p(x|n1,n2).
54 | * ans: (nd) Calculated p(x|n1,n2).
55 | * param: Parameters. See struct pij_nulldist_pdfs_param.
56 | */
57 | void pij_nulldist_pdfs(const VECTORD* loc,VECTORD* ans,const void* param);
58 |
59 |
60 | /* Calculate the pdf p(x|i) for x=-0.5*log(1-z1_i/(z1_i+z2_i)),
61 | * where z1_i ~ chi2(i*n1c+n1d), z2_i~chi2(-i*n2c+n2d), i=0,...,nmax-1.
62 | * p(x|i)=2*(1-exp(-2*x))^((i*n1c+n1d-2)/2)*exp((i*n2c-n2d)*x)*Gamma((i*(n1c-n2c)+n1d+n2d)/2)/(Gamma((i*n1c+n1d)/2)*Gamma((-i*n2c+n2d)/2)).
63 | * Buffer is provided.
64 | * n1c,
65 | * n1d,
66 | * n2c,
67 | * n2d,
68 | * nmax: As indicated in equation.
69 | * loc: (nd) Locations of x to calculate p(x|i).
70 | * ans: (nmax,nd) Calculated p(x|i). ans[j,k]=p(loc[k]|i=nsubmin+j).
71 | * vb2: (nd) Buffer. =log(1-exp(-2x))*n1c/2+x*n2c
72 | * nd: loc->size
73 | */
74 | void pij_nulldist_calcpdf_buffed(long n1c,size_t n1d,long n2c,size_t n2d,const VECTORD* loc,MATRIXD* ans,VECTORD* vb2);
75 |
76 | /* Calculate density histogram of null distribution based on pdf function.
77 | * This uses interpolation within each bin, similarly with pij_nulldist_nullhist_pdf.
78 | * Null distribution is for x=-0.5*log(1-z1/(z1+z2)), z1~chi2(n1), z2~chi2(n2)
79 | * nbin: Number of bins for histogram
80 | * range: (nbin+1) Histogram range
81 | * hist: (nbin) Output of histogram bins
82 | * n1,
83 | * n2: Parameters of null distribution
84 | * n: Log_2 Number of points for interpolation within each bin.
85 | */
86 | int pij_nulldist_hist_pdf(const double* restrict range,size_t nbin,double* restrict hist,size_t n1,size_t n2,size_t n);
87 |
88 | // CDF for x=-log(1-y)/2, y=z1/(z1+z2), z1~chi2(n1), z2~chi2(n2), i.e. y~Beta(n1/2,n2/2)
89 | static inline double pij_nulldist_cdfQ(double x,const size_t n1,const size_t n2);
90 |
91 | /*****************************************************
92 | * Inline functions
93 | *****************************************************/
94 |
95 | static inline double pij_nulldist_cdfQ(double x,const size_t n1,const size_t n2)
96 | {
97 | double x1;
98 | x1=gsl_cdf_beta_Q(-math_sf_expminusone(-2*x),(double)n1/2,(double)n2/2);
99 | assert((x1>=0)&&(x1<=1));
100 | return x1;
101 | }
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 | #ifdef __cplusplus
117 | }
118 | #endif
119 | #endif
120 |
--------------------------------------------------------------------------------
/pij/nullhist.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #include "../base/config.h"
19 | #include
20 | #include
21 | #include
22 | #include "../base/gsl/histogram.h"
23 | #include "../base/logger.h"
24 | #include "../base/macros.h"
25 | #include "../base/histogram.h"
26 | #include "nulldist.h"
27 | #include "nullhist.h"
28 |
29 |
30 |
31 | gsl_histogram* pij_nullhist_single(double dmax,size_t nd,size_t n1,size_t n2)
32 | {
33 | #define CLEANUP CLEANHIST(h)
34 | struct pij_nulldist_pdfs_param param={n1,n2};
35 | size_t nbin;
36 | gsl_histogram *h=0;
37 |
38 | assert(n1&&n2);
39 | dmax*=(1+1E-6);
40 | nbin=histogram_unequalbins_param_count(nd);
41 | if(nbin<5)
42 | ERRRETV(0,"Determined "PRINTFSIZET" bins constructed. Bin count too small.",nbin)
43 | else if(nbin<10)
44 | LOG(5,"Determined "PRINTFSIZET" bins, smaller than recommended minimum bin count (10).",nbin)
45 | else
46 | LOG(10,"Determined "PRINTFSIZET" bins.",nbin)
47 | h=gsl_histogram_alloc(nbin);
48 | if(!h)
49 | ERRRETV(0,"Not enough memory.")
50 | //Null density histogram
51 | gsl_histogram_set_ranges_uniform(h,0,dmax);
52 | //Set null histogram ranges
53 | if(histogram_unequalbins_fromnullpdfs(nbin,h->range,pij_nulldist_pdfs,¶m))
54 | ERRRETV(0,"histogram_unequalbins_fromnullpdfs failed.")
55 | //Calculate null density histogram
56 | if(pij_nulldist_hist_pdf(h->range,nbin,h->bin,param.n1,param.n2,5))
57 | ERRRETV(0,"pij_nulldist_hist_pdf failed.")
58 | return h;
59 | #undef CLEANUP
60 | }
61 |
62 | gsl_histogram** pij_nullhist(double dmax,size_t nv,size_t nd,long n1c,size_t n1d,long n2c,size_t n2d)
63 | {
64 | #define CLEANUP if(h){for(i=0;i=2);
71 | dmax*=(1+1E-6);
72 | CALLOCSIZE(h,nv-1);
73 | if(!h)
74 | ERRRETV(0,"Not enough memory.")
75 | nbin=histogram_unequalbins_param_count(nd);
76 | if(nbin<5)
77 | ERRRETV(0,"Determined "PRINTFSIZET" bins constructed. Bin count too small.",nbin)
78 | else if(nbin<10)
79 | LOG(5,"Determined "PRINTFSIZET" bins, smaller than recommended minimum bin count (10).",nbin)
80 | else
81 | LOG(10,"Determined "PRINTFSIZET" bins.",nbin)
82 | ret=1;
83 | for(i=0;irange,pij_nulldist_pdfs,¶m))
95 | ERRRETV(0,"histogram_unequalbins_fromnullpdfs failed.")
96 | //Calculate null density histogram
97 | if(pij_nulldist_hist_pdf(h[i]->range,nbin,h[i]->bin,param.n1,param.n2,5))
98 | ERRRETV(0,"pij_nulldist_hist_pdf failed.")
99 | }
100 | return h;
101 | #undef CLEANUP
102 | }
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
--------------------------------------------------------------------------------
/pij/nullhist.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | /* This part produces the histogram of parametric null distributions.
19 | */
20 |
21 | #ifndef _HEADER_LIB_PIJ_NULLHIST_H_
22 | #define _HEADER_LIB_PIJ_NULLHIST_H_
23 | #include "../base/config.h"
24 | #include "../base/types.h"
25 | #include "../base/gsl/histogram.h"
26 | #ifdef __cplusplus
27 | extern "C"
28 | {
29 | #endif
30 |
31 |
32 | /* Construct one null histogram for a specific genotype value count.
33 | * The function calculates the null density histogram for random variable:
34 | * x=-0.5*log(1-z1/(z1+z2)), where z1~chi2(n1),z2~chi2(n2),
35 | * Histogram bin count and width are automatically determined
36 | * from real data count (nd).
37 | * For bin range settings, see histogram_unequalbins_fromnullcdf.
38 | * For null density histogram from pdf, see pij_nulldist_hist_pdf.
39 | * dmax: Specifies the histogram bound as [0,dmax).
40 | * nd: Count of real data to form real histograms. This is used to
41 | * automatically decide number of bins and widths.
42 | * n1,
43 | * n2: Parameters of null distribution.
44 | * Return: Constructed null distribution histograms with preset
45 | * bin ranges and values as density.
46 | */
47 | gsl_histogram* pij_nullhist_single(double dmax,size_t nd,size_t n1,size_t n2);
48 |
49 | /* Construct multiple null histograms for different genotype value counts.
50 | * The function calculates the null density histogram for random variable:
51 | * x=-0.5*log(1-z1/(z1+z2)), where z1~chi2(i*n1c+n1d),z2~chi2(-i*n2c+n2d),
52 | * i=0,...,nv-2. Histogram bin count and width are automatically determined
53 | * from real data count (nd).
54 | * For bin range settings, see histogram_unequalbins_fromnullcdf.
55 | * For null density histogram from pdf, see pij_nulldist_hist_pdf.
56 | * dmax: Specifies the histogram bound as [0,dmax).
57 | * nv: Maximum number of values each genotype can type. Must be nv>=2.
58 | * This limits the possible values of kv in distribution, and
59 | * also output histogram count.
60 | * nd: Count of real data to form real histograms. This is used to
61 | * automatically decide number of bins and widths.
62 | * n1c,
63 | * n1d,
64 | * n2c
65 | * n2d: Parameters of null distribution.
66 | * Return: [nv-1]. Constructed null distribution histograms with preset
67 | * bin ranges and values as density. Genotypes with i values have
68 | * histogram stored in Return[i-2].
69 | */
70 | gsl_histogram** pij_nullhist(double dmax,size_t nv,size_t nd,long n1c,size_t n1d,long n2c,size_t n2d);
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 | #ifdef __cplusplus
84 | }
85 | #endif
86 | #endif
87 |
--------------------------------------------------------------------------------
/pij/rank.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | #include "../base/config.h"
19 | #include
20 | #include
21 | #include
22 | #include
23 | #include "../base/gsl/blas.h"
24 | #include "../base/random.h"
25 | #include "../base/const.h"
26 | #include "../base/logger.h"
27 | #include "../base/macros.h"
28 | #include "../base/data_process.h"
29 | #include "../base/supernormalize.h"
30 | #include "../base/threading.h"
31 | #include "llrtopij.h"
32 | #include "llrtopv.h"
33 | #include "rank.h"
34 |
35 | /* Calculates the log likelihood ratio correlated v.s. uncorrelated models.
36 | * Uses GSL BLAS.
37 | */
38 | static void pij_rank_llr_block(const MATRIXF* t,const MATRIXF* t2,MATRIXF* llr)
39 | {
40 | size_t i,j;
41 | size_t ng=t->size1;
42 | size_t nt=t2->size1;
43 | #ifndef NDEBUG
44 | size_t ns=t->size2;
45 | #endif
46 | assert((t2->size2==ns));
47 | assert((llr->size1==ng));
48 | assert((llr->size2==nt));
49 | MATRIXFF(cov2_bounded)(t,t2,llr);
50 | MATRIXFF(mul_elements)(llr,llr);
51 | MATRIXFF(scale)(llr,-1);
52 | MATRIXFF(add_constant)(llr,1);
53 | for(i=0;isize2==t2->size2)&&(llr->size1==t->size1)&&(llr->size2==t2->size1));
70 | #pragma omp parallel
71 | {
72 | size_t n1,n2;
73 |
74 | threading_get_startend(t->size1,&n1,&n2);
75 | if(n2>n1)
76 | {
77 | MATRIXFF(const_view) mvt=MATRIXFF(const_submatrix)(t,n1,0,n2-n1,t->size2);
78 | MATRIXFF(view) mvllr;
79 | mvllr=MATRIXFF(submatrix)(llr,n1,0,n2-n1,llr->size2);
80 | pij_rank_llr_block(&mvt.matrix,t2,&mvllr.matrix);
81 | }
82 | }
83 | }
84 |
85 | /* Converts log likelihood ratios into p-values for ranked correlation test
86 | * d: MATRIXF of any size, as input of LLRs and also output of corresponding p-values
87 | * ns: Number of samples, to be used to calculate the null distribution
88 | */
89 | static inline void pij_rank_llrtopv(MATRIXF* d,size_t ns)
90 | {
91 | assert(ns>2);
92 | pij_llrtopvm(d,1,ns-2);
93 | }
94 |
95 | int pij_rank_pv(const MATRIXF* t,const MATRIXF* t2,MATRIXF* p,size_t memlimit)
96 | {
97 | #define CLEANUP CLEANMATF(tnew)CLEANMATF(tnew2)
98 | MATRIXF *tnew,*tnew2; //(nt,ns) Supernormalized transcript matrix
99 | int ret;
100 | size_t ng,nt,ns;
101 |
102 | ng=t->size1;
103 | nt=t2->size1;
104 | ns=t->size2;
105 |
106 | tnew=tnew2=0;
107 |
108 | //Validation
109 | assert((t2->size2==ns)&&(p->size1==ng)&&(p->size2==nt)&&memlimit);
110 | if(ns<3)
111 | ERRRET("Needs at least 3 samples to compute p-values.")
112 |
113 | {
114 | size_t mem;
115 | mem=(2*t->size1*t->size2+2*t2->size1*t2->size2+p->size1*p->size2)*FTYPEBITS/8;
116 | if(memlimit<=mem)
117 | ERRRET("Memory limit lower than minimum memory needed. Try increasing your memory usage limit.")
118 | LOG(10,"Memory limit: %lu bytes.",memlimit)
119 | }
120 |
121 | tnew=MATRIXFF(alloc)(ng,ns);
122 | tnew2=MATRIXFF(alloc)(nt,ns);
123 | if(!(tnew&&tnew2))
124 | ERRRET("Not enough memory.")
125 |
126 | //Step 1: Supernormalization
127 | LOG(9,"Supernormalizing...")
128 | MATRIXFF(memcpy)(tnew,t);
129 | ret=supernormalizea_byrow(tnew);
130 | MATRIXFF(memcpy)(tnew2,t2);
131 | ret=ret||supernormalizea_byrow(tnew2);
132 | if(ret)
133 | ERRRET("Supernormalization failed.")
134 |
135 | //Step 2: Log likelihood ratios from nonpermuted data
136 | LOG(9,"Calculating real log likelihood ratios...")
137 | pij_rank_llr(tnew,tnew2,p);
138 | //Step 3: Convert log likelihood ratios to probabilities
139 | LOG(9,"Converting likelihood ratios into p-values...")
140 | pij_rank_llrtopv(p,ns);
141 |
142 | //Cleanup
143 | CLEANUP
144 | return 0;
145 | #undef CLEANUP
146 | }
147 |
148 | /* Convert LLR into probabilities per A. Uses pij_llrtopij_convert.
149 | * ans: (ng,nt) Source real LLRs to compare with null LLRs,
150 | * also output location of converted probabilities.
151 | * ns: Number of samples, used for calculation of null distribution
152 | * nodiag: Whether diagonal elements of d should be ignored when converting
153 | * to probabilities
154 | * nodiagshift: Offdiagonal shift.
155 | * For nodiagshift>0/<0, use upper/lower diagonals of corresponding id.
156 | * Return: 0 if succeed.
157 | */
158 | static int pij_rank_llrtopij(MATRIXF* ans,size_t ns,char nodiag,long nodiagshift)
159 | {
160 | LOG(9,"Converting LLR to probabilities on per A basis.")
161 | if(ns<=2)
162 | {
163 | LOG(0,"Needs at least 3 samples to compute probabilities.")
164 | return 1;
165 | }
166 | return pij_llrtopij_convert_single_self(ans,1,ns-2,nodiag,nodiagshift);
167 | }
168 |
169 | int pij_rank(const MATRIXF* t,const MATRIXF* t2,MATRIXF* p,char nodiag,size_t memlimit)
170 | {
171 | #define CLEANUP CLEANMATF(tnew)CLEANMATF(tnew2)
172 | MATRIXF *tnew,*tnew2; //(nt,ns) Supernormalized transcript matrix
173 | VECTORFF(view) vv;
174 | int ret;
175 | size_t ng,nt,ns;
176 |
177 | ng=t->size1;
178 | nt=t2->size1;
179 | ns=t->size2;
180 |
181 | tnew=tnew2=0;
182 |
183 | //Validation
184 | assert((t2->size2==ns)&&(p->size1==ng)&&(p->size2==nt)&&memlimit);
185 |
186 | if(ns<=2)
187 | ERRRET("Needs at least 3 samples to compute probabilities.")
188 | {
189 | size_t mem;
190 | mem=(2*t->size1*t->size2+2*t2->size1*t2->size2+p->size1*p->size2)*FTYPEBITS/8;
191 | if(memlimit<=mem)
192 | ERRRET("Memory limit lower than minimum memory needed. Try increasing your memory usage limit.")
193 | LOG(10,"Memory limit: %lu bytes.",memlimit)
194 | }
195 |
196 | tnew=MATRIXFF(alloc)(ng,ns);
197 | tnew2=MATRIXFF(alloc)(nt,ns);
198 | if(!(tnew&&tnew2))
199 | ERRRET("Not enough memory.")
200 |
201 | //Check for identical rows in input data
202 | {
203 | VECTORFF(view) vbuff1=MATRIXFF(column)(tnew,0);
204 | VECTORFF(view) vbuff2=MATRIXFF(row)(tnew2,0);
205 | MATRIXFF(cmprow)(t,t2,&vbuff1.vector,&vbuff2.vector,nodiag,1);
206 | }
207 |
208 | //Step 1: Supernormalization
209 | LOG(9,"Supernormalizing...")
210 | MATRIXFF(memcpy)(tnew,t);
211 | ret=supernormalizea_byrow(tnew);
212 | MATRIXFF(memcpy)(tnew2,t2);
213 | ret=ret||supernormalizea_byrow(tnew2);
214 | if(ret)
215 | ERRRET("Supernormalization failed.")
216 |
217 | //Step 2: Log likelihood ratios from nonpermuted data
218 | LOG(9,"Calculating real log likelihood ratios...")
219 | pij_rank_llr(tnew,tnew2,p);
220 | if(nodiag)
221 | {
222 | vv=MATRIXFF(diagonal)(p);
223 | VECTORFF(set_zero)(&vv.vector);
224 | }
225 | //Step 3: Convert log likelihood ratios to probabilities
226 | if((ret=pij_rank_llrtopij(p,ns,nodiag,0)))
227 | LOG(1,"Failed to convert log likelihood ratios to probabilities.")
228 |
229 | //Cleanup
230 | CLEANUP
231 | return ret;
232 | #undef CLEANUP
233 | }
234 |
--------------------------------------------------------------------------------
/pij/rank.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016-2018, 2020 Lingfei Wang
2 | *
3 | * This file is part of Findr.
4 | *
5 | * Findr is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Affero General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 | *
10 | * Findr is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Affero General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Affero General Public License
16 | * along with Findr. If not, see .
17 | */
18 | /* This file contains rank-based pij prediction without genotype information.
19 | * Input expression data are first supernormalized so only rank information
20 | * remains. Then different prediction method can be applied for pij.
21 | *
22 | * Currently only one method is provided. It first calculates the log likelihood
23 | * ratio (LLR) between null A B and alternative A---B hypotheses. The LLR
24 | * is then converted into probability of alternative hypothesis per A.
25 | * The probability is regarded as pij. This is in function pij_rank_a.
26 | */
27 |
28 | #ifndef _HEADER_LIB_PIJ_RANK_H_
29 | #define _HEADER_LIB_PIJ_RANK_H_
30 | #include "../base/config.h"
31 | #include "../base/types.h"
32 | #ifdef __cplusplus
33 | extern "C"
34 | {
35 | #endif
36 |
37 | /* Calculate p-values of A B against A--B based on LLR distributions of real data
38 | * and null hypothesis.
39 | * t: (ng,ns) Expression data for A
40 | * t2: (nt,ns) Expression data for B
41 | * p: (ng,nt) Output for p-values of A--B is false
42 | * memlimit:Specifies approximate memory usage. Function can fail if memlimit is too small. For unlimited memory, set memlimit=-1.
43 | * Return: 0 if succeed.
44 | */
45 | int pij_rank_pv(const MATRIXF* t,const MATRIXF* t2,MATRIXF* p,size_t memlimit);
46 |
47 | /* Calculate probabilities of A--B based on LLR distributions of real data
48 | * and null hypothesis.
49 | * t: (ng,ns) Expression data for A
50 | * t2: (nt,ns) Expression data for B
51 | * p: (ng,nt) Output for probabilities A--B is true
52 | * nodiag: When the top ng rows of t2 is exactly t, diagonals of pij are meaningless.
53 | * In this case, set nodiag to 1 to avoid inclusion of NANs. For nodiag=0, t and t2
54 | * should not have any identical genes.
55 | * memlimit:Specifies approximate memory usage. Function can fail if memlimit is too small. For large dataset, memory usage will be reduced by spliting t into smaller chunks and infer separately. For unlimited memory, set memlimit=-1.
56 | * Return: 0 if succeed.
57 | */
58 | int pij_rank(const MATRIXF* t,const MATRIXF* t2,MATRIXF* p,char nodiag,size_t memlimit);
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 | #ifdef __cplusplus
75 | }
76 | #endif
77 | #endif
78 |
--------------------------------------------------------------------------------