├── .gitmodules ├── LICENSE ├── Makefile ├── README.md ├── fftw-3.3.7.tar.gz ├── include ├── frp │ ├── compact.h │ ├── coresets.h │ ├── dci.h │ ├── dist.h │ ├── fhtgpu.h │ ├── frp.h │ ├── gpu.h │ ├── graph.h │ ├── ifc.h │ ├── jl.h │ ├── kama.h │ ├── kernel.h │ ├── linalg.h │ ├── lsh.h │ ├── mach.h │ ├── mm.h │ ├── parser.h │ ├── rand.h │ ├── sample.h │ ├── sdq.h │ ├── spectral.h │ ├── spinner.h │ ├── stackstruct.h │ └── util.h └── thirdparty │ └── fast_mutex.h ├── lib ├── fht_kernel0.cu ├── fht_kernel1.cu ├── fht_kernel2.cu ├── fht_kernel3.cu ├── fht_kernel4.cu ├── fht_kernel5.cu ├── fht_kernel6.cu ├── fht_kernel7.cu ├── fht_kernel8.cu └── fht_kernel9.cu ├── py ├── Makefile ├── frp.cpp ├── jl.py └── setup.py ├── scripts ├── autogen.py ├── ratio_err.py └── time_exp.py ├── src ├── aestest.cpp ├── dcitest.cpp ├── fhtest.cpp ├── graphtest.cpp ├── kernel_test.cpp ├── kernel_time.cpp ├── kstest.cpp ├── lshtest.cpp ├── mtest.cpp ├── multest.cpp ├── ojlt.cpp ├── parser.cpp ├── pcatest.cpp └── test_gs.cpp └── test └── testfht.cpp /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "FFHT"] 2 | path = FFHT 3 | url = https://github.com/FALCONN-LIB/FFHT 4 | branch = master 5 | [submodule "fastrange"] 6 | path = fastrange 7 | url = https://github.com/lemire/fastrange 8 | branch = master 9 | [submodule "blaze"] 10 | path = blaze 11 | url = https://bitbucket.org/blaze-lib/blaze.git 12 | branch = master 13 | [submodule "kspp"] 14 | path = kspp 15 | url = https://github.com/dnbaker/kspp 16 | branch = master 17 | [submodule "pybind11"] 18 | path = pybind11 19 | url = https://github.com/pybind/pybind11 20 | [submodule "sleef"] 21 | path = sleef 22 | url = https://github.com/shibatch/sleef 23 | [submodule "math"] 24 | path = boost/math 25 | url = https://github.com/boostorg/math 26 | [submodule "random"] 27 | path = boost/random 28 | url = https://github.com/boostorg/random 29 | [submodule "config"] 30 | path = boost/config 31 | url = https://github.com/boostorg/config 32 | [submodule "utility"] 33 | path = boost/utility 34 | url = https://github.com/boostorg/utility 35 | [submodule "assert"] 36 | path = boost/assert 37 | url = https://github.com/boostorg/assert 38 | [submodule "static_assert"] 39 | path = boost/static_assert 40 | url = https://github.com/boostorg/static_assert 41 | [submodule "integer"] 42 | path = boost/integer 43 | url = https://github.com/boostorg/integer 44 | [submodule "type_traits"] 45 | path = boost/type_traits 46 | url = https://github.com/boostorg/type_traits 47 | [submodule "mpl"] 48 | path = boost/mpl 49 | url = https://github.com/boostorg/mpl 50 | [submodule "core"] 51 | path = boost/core 52 | url = https://github.com/boostorg/core 53 | [submodule "preprocessor"] 54 | path = boost/preprocessor 55 | url = https://github.com/boostorg/preprocessor 56 | [submodule "exception"] 57 | path = boost/exception 58 | url = https://github.com/boostorg/exception 59 | [submodule "throw_exception"] 60 | path = boost/throw_exception 61 | url = https://github.com/boostorg/throw_exception 62 | [submodule "range"] 63 | path = boost/range 64 | url = https://github.com/boostorg/range 65 | [submodule "iterator"] 66 | path = boost/iterator 67 | url = https://github.com/boostorg/iterator 68 | [submodule "io"] 69 | path = boost/io 70 | url = https://github.com/boostorg/io 71 | [submodule "predef"] 72 | path = boost/predef 73 | url = https://github.com/boostorg/predef 74 | [submodule "concept_check"] 75 | path = boost/concept_check 76 | url = https://github.com/boostorg/concept_check 77 | [submodule "detail"] 78 | path = boost/detail 79 | url = https://github.com/boostorg/detail 80 | [submodule "lexical_cast"] 81 | path = boost/lexical_cast 82 | url = https://github.com/boostorg/lexical_cast 83 | [submodule "numeric_conversion"] 84 | path = boost/numeric_conversion 85 | url = https://github.com/boostorg/numeric_conversion 86 | [submodule "functional"] 87 | path = boost/functional 88 | url = https://github.com/boostorg/functional 89 | [submodule "array"] 90 | path = boost/array 91 | url = https://github.com/boostorg/array 92 | [submodule "container"] 93 | path = boost/container 94 | url = https://github.com/boostorg/container 95 | [submodule "move"] 96 | path = boost/move 97 | url = https://github.com/boostorg/move 98 | [submodule "thread"] 99 | path = boost/thread 100 | url = https://github.com/boostorg/thread 101 | [submodule "smart_ptr"] 102 | path = boost/smart_ptr 103 | url = https://github.com/boostorg/smart_ptr 104 | [submodule "vec"] 105 | path = vec 106 | url = https://github.com/dnbaker/vec 107 | [submodule "klib"] 108 | path = klib 109 | url = https://github.com/attractivechaos/klib 110 | [submodule "distmat"] 111 | path = distmat 112 | url = https://github.com/dnbaker/distmat 113 | [submodule "aesctr"] 114 | path = aesctr 115 | url = https://github.com/dnbaker/aesctr 116 | [submodule "flat_hash_map"] 117 | path = flat_hash_map 118 | url = https://github.com/skarupke/flat_hash_map 119 | [submodule "boost/multiprecision"] 120 | path = boost/multiprecision 121 | url = https://github.com/boostorg/multiprecision 122 | [submodule "sketch"] 123 | path = sketch 124 | url = https://github.com/dnbaker/sketch 125 | [submodule "clhash"] 126 | path = clhash 127 | url = https://github.com/lemire/clhash 128 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY=all tests clean obj python 2 | ifndef CXX 3 | CXX=g++ 4 | endif 5 | ifndef CC 6 | CC=gcc 7 | endif 8 | ifndef STD 9 | STD=c++17 10 | endif 11 | WARNINGS=-Wall -Wextra -Wno-char-subscripts \ 12 | -Wpointer-arith -Wwrite-strings -Wdisabled-optimization \ 13 | -Wformat -Wcast-align -Wno-unused-function -Wunused-variable -Wno-ignored-qualifiers -Wsuggest-attribute=const \ 14 | # -Wconversion -Werror -Wno-float-conversion 15 | DBG:= # -DNDEBUG 16 | OFLAG?=-O3 17 | OPT:= $(OFLAG) -funroll-loops -pipe -fno-strict-aliasing -march=native -fopenmp -DUSE_FASTRANGE \ 18 | -funsafe-math-optimizations -ftree-vectorize \ 19 | -DBOOST_NO_RTTI 20 | OS:=$(shell uname) 21 | 22 | EXTRA= 23 | BLAS_LINKING_FLAGS?= 24 | OPT:=$(OPT) $(FLAGS) 25 | XXFLAGS=-fno-rtti 26 | CBLASFILE?=cblas.h 27 | BLAZEFLAGS= -DBLAZE_BLAS_MODE=1 \ 28 | -DBLAZE_USE_BLAS_MATRIX_VECTOR_MULTIPLICATION=1 -DBLAZE_BLAS_INCLUDE_FILE='"$(CBLASFILE)"' \ 29 | $(BLAS_LINKING_FLAGS) 30 | CXXFLAGS=$(OPT) $(XXFLAGS) -std=$(STD) $(WARNINGS) -DRADEM_LUT $(EXTRA) $(BLAZEFLAGS) 31 | CCFLAGS=$(OPT) -std=c11 $(WARNINGS) 32 | LIB=-lz -pthread -lfftw3 -lfftw3l -lfftw3f -lstdc++fs -lsleef -llapack 33 | LD=-L. -Lfftw-3.3.7/lib -Lvec/sleef/build/lib 34 | 35 | OBJS=$(patsubst %.cpp,%.o,$(wildcard lib/*.cpp)) clhash/clhash.o 36 | TEST_OBJS=$(patsubst %.cpp,%.o,$(wildcard test/*.cpp)) 37 | EXEC_OBJS=$(patsubst %.cpp,%.o,$(wildcard src/*.cpp)) $(patsubst %.cpp,%.fo,$(wildcard src/*.cpp)) 38 | 39 | EX=$(patsubst src/%.fo,%f,$(EXEC_OBJS)) $(patsubst src/%.o,%,$(EXEC_OBJS)) 40 | BOOST_DIRS=math config random utility assert static_assert \ 41 | integer type_traits mpl core preprocessor exception throw_exception \ 42 | range iterator io predef concept_check detail lexical_cast \ 43 | numeric_conversion functional array container move thread smart_ptr 44 | 45 | SAN=-fsanitize=address -fsanitize=undefined 46 | 47 | BOOST_INCS=$(patsubst %,-Iboost/%/include,$(BOOST_DIRS)) 48 | 49 | 50 | # If compiling with c++ < 17 and your compiler does not provide 51 | # bessel functions with c++14, you must compile against boost. 52 | 53 | INCLUDE=-I. -Iinclude -Ivec/blaze -Ithirdparty -Irandom/include/\ 54 | -Ifftw-3.3.7/include -I vec/sleef/build/include/ $(BOOST_INCS) \ 55 | -I/usr//local/Cellar/zlib/1.2.11/include -Ifastrange -Idistmat -Iaesctr \ 56 | -Iinclude/frp -Iclhash/include # asdfnfkjhqefkjhdafs 57 | 58 | ifdef BOOST_INCLUDE_PATH 59 | INCLUDE += -I$(BOOST_INCLUDE_PATH) 60 | endif 61 | 62 | OBJS:=$(OBJS) vec/sleef/build/include/sleef.h fht.o FFHT/fast_copy.o 63 | 64 | all: $(OBJS) $(EX) python 65 | print-% : ; @echo $* = $($*) 66 | 67 | obj: $(OBJS) $(EXEC_OBJS) 68 | 69 | HEADERS=$(wildcard include/frp/*.h) 70 | 71 | fht.o: FFHT/fht.c 72 | cd FFHT && make fht.o && cp fht.o .. 73 | 74 | HEADERS=$(wildcard include/frp/*.h) 75 | 76 | test/%.o: test/%.cpp $(OBJS) 77 | $(CXX) $(CXXFLAGS) $(INCLUDE) $(LD) $(OBJS) -c $< -o $@ $(LIB) 78 | 79 | %.fo: %.cpp $(OBJS) 80 | $(CXX) $(CXXFLAGS) -DFLOAT_TYPE=float $(DBG) $(INCLUDE) $(LD) -c $< -o $@ $(LIB) 81 | 82 | %.o: %.cpp $(OBJS) 83 | $(CXX) $(CXXFLAGS) -DFLOAT_TYPE=double $(DBG) $(INCLUDE) $(LD) -c $< -o $@ $(LIB) 84 | 85 | %: src/%.cpp $(OBJS) fftw3.h $(HEADERS) 86 | $(CXX) $(CXXFLAGS) -DFLOAT_TYPE=double $(DBG) $(INCLUDE) $(LD) $(OBJS) $< -o $@ $(LIB) 87 | pcatest: src/pcatest.cpp $(OBJS) $(HEADERS) 88 | $(CXX) $(CXXFLAGS) -DFLOAT_TYPE=double $(DBG) $(INCLUDE) $(LD) $(OBJS) $< -o $@ $(LIB) 89 | dcitest: src/dcitest.cpp $(OBJS) $(HEADERS) 90 | $(CXX) $(CXXFLAGS) -DFLOAT_TYPE=double $(DBG) $(INCLUDE) $(LD) $(OBJS) $< -o $@ -lz -pthread -fopenmp -llapack -DTIME_ADDITIONS #$(SAN) 91 | dcitestf: src/dcitest.cpp $(OBJS) $(HEADERS) 92 | $(CXX) $(CXXFLAGS) -DFLOAT_TYPE=float $(DBG) $(INCLUDE) $(LD) $(OBJS) $< -o $@ -lz -pthread -fopenmp -llapack -DTIME_ADDITIONS #$(SAN) 93 | 94 | %f: src/%.cpp $(OBJS) fftw3.h 95 | $(CXX) $(CXXFLAGS) -DFLOAT_TYPE=float $(DBG) $(INCLUDE) $(LD) $(OBJS) $< -o $@ $(LIB) 96 | 97 | %.o: %.c 98 | $(CC) $(CCFLAGS) -Wno-sign-compare $(DBG) $(INCLUDE) $(LD) -c $< -o $@ $(LIB) 99 | 100 | %.o: FFHT/%.c $(OBJS) fftw3.h 101 | +cd FFHT && make $@ && cp $@ .. && cd .. 102 | 103 | clhash/clhash.o: 104 | cd clhash && make && cd .. 105 | 106 | fftw-3.3.7: fftw-3.3.7.tar.gz 107 | tar -zxvf fftw-3.3.7.tar.gz 108 | 109 | fftw-3.3.7.exist: fftw-3.3.7.tar.gz 110 | tar -zxvf fftw-3.3.7.tar.gz && touch fftw-3.3.7.exist 111 | 112 | PLATFORM_CONF_STR?=--enable-avx2 113 | 114 | fftw3.h: fftw-3.3.7/lib/libfftw3l.a fftw-3.3.7/lib/libfftw3.a fftw-3.3.7/lib/libfftw3f.a 115 | cp fftw-3.3.7/api/fftw3.h . 116 | 117 | python: 118 | cd py && make 119 | 120 | fftw-3.3.7/lib/libfftw3.a: fftw-3.3.7.exist 121 | +cd fftw-3.3.7 &&\ 122 | ./configure $(PLATFORM_CONF_STR) --prefix=$$PWD && make && make install 123 | fftw-3.3.7/lib/libfftw3f.a: fftw-3.3.7.exist fftw-3.3.7/lib/libfftw3.a 124 | +cd fftw-3.3.7 &&\ 125 | ./configure $(PLATFORM_CONF_STR) --prefix=$$PWD --enable-single && make && make install 126 | fftw-3.3.7/lib/libfftw3l.a: fftw-3.3.7.exist fftw-3.3.7/lib/libfftw3f.a 127 | +cd fftw-3.3.7 &&\ 128 | ./configure --prefix=$$PWD --enable-long-double && make && make install && cp api/fftw3.h .. 129 | 130 | 131 | tests: clean unit 132 | 133 | unit: $(OBJS) $(TEST_OBJS) 134 | $(CXX) $(CXXFLAGS) $(INCLUDE) $(TEST_OBJS) $(LD) $(OBJS) -o $@ $(LIB) 135 | 136 | vec/sleef/build: vec/sleef 137 | mkdir -p vec/sleef/build 138 | 139 | vec/sleef/build/include/sleef.h: vec/sleef/build 140 | cd $< && cmake .. && make && cd ../.. 141 | 142 | sleef.h:vec/sleef/build/include/sleef.h 143 | cp vec/sleef/build/include/sleef.h sleef.h 144 | 145 | clean: 146 | +rm -f $(EXEC_OBJS) $(OBJS) $(EX) $(TEST_OBJS) fftw3.h unit lib/*o frp/src/*o && cd FFHT && make clean && cd .. 147 | 148 | mostlyclean: clean 149 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # frp: Fast Randomized Projections 2 | We use [Blaze](https://bitbucket.org/blaze-lib) for fast linear algebra, [Sleef](https://github.com/shibatch/sleef) for fast trigonometric operations, 3 | [Fast Fast-Hadamard Transform](https://github.com/dnbaker/FFHT) from FALCONN-LIB for the Fast Hadamard Transform, [FFTW3](http://fftw.org/) for the FFT, and [boost](https://github.com/boostorg) for 4 | special functions and random number generators. Only required boost headers are provided as submodules and no installation of boost is required. 5 | 6 | ## Contents 7 | 0. Orthogonal JL transform with linear space and linearithmic runtime 8 | 1. This is available through the `ojlt` executable, in C++ programs accessing include/frp/jl.h, and using python bindings. 9 | 2. Python bindings can be installed by `cd python && python3 setup.py install`. 10 | 1. Kernel projections 11 | 1. We support kernel approximation for the Gaussian kernel using Random Fourier Features, Orthogonal Random Features, Structured Orthogonal Random Features, and FastFood. 12 | 2. We recommend Structured Orthogonal Random Features, as it has the highest accuracy in our experiments and can also be hundreds of times faster while still having a small memory footprint. 13 | 2. A type-generic SIMD interface (vec/vec.h), which abstracts operations to allow the compiler to use the widest vectors possible as needed, facilitating generically dispatching the fastest implementation possible on a machine. 14 | 3. Utilities 15 | 2. PRNVector (PseudoRandom Number Vector) to provide access to random vectors using only constant memory requires instead of explicitly storing them by generating values as needed. 16 | 3. Utilities for sampling and filling containers from distributions. 17 | 4. Acquiring cache sizes from the OS. 18 | 4. Linear algebra methods 19 | 1. Implementation of the Gram-Schmidt algorithm for orthogonalizing matrices. 20 | 2. PCA using full eigendecomposition for symmetric matrices. 21 | 3. Covariance Matrix calculation 22 | 5. Miscellaneous, related work 23 | 1. Dynamic Continuous Indexing for real-valued data 24 | 1. [Dynamic Continuous Indexing](https://arxiv.org/abs/1512.00442) 25 | 1. Tested 26 | 2. [Prioritized DCI](https://arxiv.org/abs/1703.00440) 27 | 2. Draft form. 28 | 29 | ### Build instructions 30 | 31 | `make` should compile a variety of tests. 32 | We assume you're using BLAS for your linear algebra; to avoid doing that, modify the Makefile and remove the `-DBLAZE*` flags. 33 | 34 | To specify a different blas header file, use the CBLASFILE variable when compiling: 35 | ```bash 36 | make ojlt CBLASFILE=mkl_cblas.h 37 | # Or, use an environmental variable 38 | export CBLASFILE=mkl_cblas.h && \ 39 | make ojlt 40 | ``` 41 | 42 | 43 | 44 | ## Commentary 45 | 46 | The initial design of this library was to implement methods from [https://arxiv.org/abs/1703.00864](https://arxiv.org/abs/1703.00864). The core transformss on which it is built are fast fast-hadamard transform accelerated structured matrix vector products. This has applications in memory-efficient, accelerated Johnson-Lindenstrauss Transforms, gaussian kernel approximation for linearizing datasets and FastFood/Adaptive Random Spinners. 47 | 48 | ## DCI/Prioritized DCI usage 49 | 50 | Notes: 51 | 52 | During construction, it may be advantageous to use a std::set to maintain sorted indexes (logarithmic update time), whereas at query time, it's faster to use a contiguous array. 53 | We provide the cvt function, which copies the index, but converts the sorted index type from what it used to be (usually a red-black tree) into the destination type, 54 | by default an always-sorted array. 55 | 56 | We suggest doing this for the purposes of faster construction and faster queries. 57 | 58 | Additionally, we do not store any points, just references to them. 59 | 60 | When using a non-default container which supports lower_bound functionality, one needs to both use `std::less` for a comparator and overload `has_lower_bound_mf` struct. 61 | -------------------------------------------------------------------------------- /fftw-3.3.7.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dnbaker/frp/394b427b60221a5dc215a90c58f9fb922b0c4737/fftw-3.3.7.tar.gz -------------------------------------------------------------------------------- /include/frp/compact.h: -------------------------------------------------------------------------------- 1 | #ifndef _GFRP_CRAD_H__ 2 | #define _GFRP_CRAD_H__ 3 | #include "frp/util.h" 4 | #include "frp/linalg.h" 5 | #include "frp/dist.h" 6 | #include "fastrange/fastrange.h" 7 | #include 8 | 9 | namespace frp { 10 | 11 | /* 12 | // From https://arxiv.org/pdf/1702.08159.pdf 13 | 14 | FHT!!! 15 | https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Pseudocode 16 | http://fourier.eng.hmc.edu/e161/lectures/wht/node4.html 17 | Sliding windows http://www.ee.cuhk.edu.hk/~wlouyang/FWHT.htm 18 | 19 | Presentation http://c.csie.org/~itct/slide/DCT_larry.pdf 20 | 21 | Using renoramlization makes it orthogonal, which is GOOD. 22 | some authors further multiply the X0 term by 1/√2 and multiply the resulting matrix by an overall scale factor of 2 N {\displaystyle {\sqrt {\tfrac {2}{N}}}} {\displaystyle {\sqrt {\tfrac {2}{N}}}} (see below for the corresponding change in DCT-III). This makes the DCT-II matrix orthogonal, but breaks the direct correspondence with a real-even DFT of half-shifted input. 23 | 24 | 25 | Fast DCT https://unix4lyfe.org/dct-1d/ 26 | http://ieeexplore.ieee.org/document/558495/ 27 | 28 | 29 | 30 | F2F 31 | proceeds with vectorized sums and subtractions iteratively for the first n/2^k 32 | positions (where n is the length of the input vector and k is the iteration starting from 1) 33 | computing the intermediate operations of the Cooley-Tukey algorithm till a small Hadamard 34 | routine that fits in cache. Then the algorithm continues in the same way but starting from 35 | the smallest length and doubling on each iteration the input dimension until the whole 36 | FWHT is done in-place. 37 | */ 38 | 39 | #if __GNUC__ || __clang__ 40 | constexpr INLINE unsigned clz(unsigned long long x) { 41 | return __builtin_clzll(x); 42 | } 43 | constexpr INLINE unsigned clz(unsigned long x) { 44 | return __builtin_clzl(x); 45 | } 46 | constexpr INLINE unsigned clz(unsigned x) { 47 | return __builtin_clz(x); 48 | } 49 | #else 50 | 51 | #define clztbl(x, arg) do {\ 52 | switch(arg) {\ 53 | case 0: x += 4; break;\ 54 | case 1: x += 3; break;\ 55 | case 2: case 3: x += 2; break;\ 56 | case 4: case 5: case 6: case 7: x += 1; break;\ 57 | }} while(0) 58 | 59 | constexpr INLINE int clz_manual( uint32_t x ) 60 | { 61 | int n(0); 62 | if ((x & 0xFFFF0000) == 0) {n = 16; x <<= 16;} 63 | if ((x & 0xFF000000) == 0) {n += 8; x <<= 8;} 64 | if ((x & 0xF0000000) == 0) {n += 4; x <<= 4;} 65 | clztbl(n, x >> (32 - 4)); 66 | return n; 67 | } 68 | 69 | // Overload 70 | constexpr INLINE int clz_manual( uint64_t x ) 71 | { 72 | int n(0); 73 | if ((x & 0xFFFFFFFF00000000ull) == 0) {n = 32; x <<= 32;} 74 | if ((x & 0xFFFF000000000000ull) == 0) {n += 16; x <<= 16;} 75 | if ((x & 0xFF00000000000000ull) == 0) {n += 8; x <<= 8;} 76 | if ((x & 0xF000000000000000ull) == 0) {n += 4; x <<= 4;} 77 | clztbl(n, x >> (64 - 4)); 78 | return n; 79 | } 80 | #define clz(x) clz_manual(x) 81 | #endif 82 | 83 | template 84 | static constexpr INLINE unsigned ilog2(T x) noexcept { 85 | return sizeof(T) * CHAR_BIT - clz(x) - 1; 86 | } 87 | static constexpr unsigned log2_64(uint64_t x) {return ilog2(x);} 88 | 89 | class PRNRademacher { 90 | size_t n_; 91 | uint64_t seed_; 92 | public: 93 | using size_type = std::size_t; 94 | PRNRademacher(size_t n=0, uint64_t seed=0): n_(n), seed_(seed) {} 95 | auto size() const {return n_;} 96 | void resize(size_t newsize) {n_ = newsize;} 97 | void seed(uint64_t seed) {seed_ = seed;} 98 | 99 | template 100 | void apply(Container &c) const { 101 | wy::WyHash gen(seed_); 102 | uint64_t val(gen()); 103 | for(size_t i(0), e(c.size()); i < e; ++i) { 104 | if(unlikely((i & ((CHAR_BIT * sizeof(uint64_t)) - 1)) == 0)) 105 | val = gen(); 106 | c[i] *= val & 1 ? -1.: 1.; 107 | val >>= 1; 108 | } 109 | } 110 | 111 | template 112 | void apply(ArithType *c, size_t nitems=0) { 113 | wy::WyHash gen(seed_); 114 | uint64_t val; 115 | if(nitems == 0) nitems = n_; 116 | for(size_t i(0); i < nitems; ++i) { 117 | if(unlikely((i & ((CHAR_BIT * sizeof(uint64_t)) - 1)) == 0)) 118 | val = gen(); 119 | c[i] *= val & 1 ? -1.: 1.; 120 | val >>= 1; 121 | } 122 | } 123 | }; 124 | 125 | template 126 | class CachedRademacher { 127 | protected: 128 | size_t n_; 129 | uint64_t seed_; 130 | blaze::DynamicVector vec_; 131 | public: 132 | using size_type = std::size_t; 133 | CachedRademacher(size_t n, uint64_t seed=0): n_(n), seed_(seed), vec_(n) { 134 | this->seed(seed); 135 | } 136 | void resize(size_t newsz) { 137 | if(newsz < n_) { 138 | vec_.resize(n_); 139 | n_ = newsz; 140 | return; 141 | } 142 | if(newsz > n_) { 143 | vec_.resize(newsz); 144 | n_ = newsz; 145 | seed(seed_); 146 | } 147 | } 148 | void seed(uint64_t seed) { 149 | wy::WyHash gen(seed); 150 | unsigned t = 64; 151 | auto v = gen(); 152 | for(size_t i = 0, e = n_; i < e; ++i) { 153 | vec_[i] = v & 1 ? -1.: 1.; 154 | if(t-- == 0) { 155 | v = gen(); 156 | t = 64; 157 | } 158 | } 159 | } 160 | auto size() const {return n_;} 161 | template 162 | void apply(blaze::Vector &c) const { 163 | ~c *= vec_; 164 | } 165 | void apply(FT *c) const { 166 | blaze::CustomVector cv(c, vec_.size()); 167 | apply(cv); 168 | } 169 | }; 170 | 171 | template> 172 | class CompactRademacherTemplate { 173 | T seed_; 174 | std::vector data_; 175 | using FloatType = FLOAT_TYPE; 176 | 177 | static constexpr size_t NBITS = sizeof(T) * CHAR_BIT; 178 | static constexpr size_t SHIFT = log2_64(NBITS); 179 | static constexpr size_t BITMASK = NBITS - 1; 180 | 181 | public: 182 | using value_type = FloatType; 183 | using container_type = T; 184 | using size_type = size_t; 185 | // Constructors 186 | CompactRademacherTemplate(size_t n=0, uint64_t seed=std::time(nullptr)): seed_(seed), data_(n >> SHIFT) { 187 | if(n & (BITMASK)) 188 | std::fprintf(stderr, "Warning: n is not evenly divisible by BITMASK size. (n: %zu). (bitmask: %zu)\n", n, BITMASK); 189 | randomize(seed_); 190 | } 191 | CompactRademacherTemplate(CompactRademacherTemplate &&other) = default; 192 | CompactRademacherTemplate(const CompactRademacherTemplate &other) = default; 193 | CompactRademacherTemplate &operator=(const CompactRademacherTemplate &o) = default; 194 | CompactRademacherTemplate &operator=(CompactRademacherTemplate &&o) = default; 195 | template 196 | class CompactAs { 197 | static constexpr AsType values[2] {static_cast(1), static_cast(-1)}; 198 | const CompactRademacherTemplate &ref_; 199 | public: 200 | CompactAs(const CompactRademacherTemplate &ref): ref_(ref) {} 201 | AsType operator[](size_t index) const {return values[ref_.bool_idx(index)];} 202 | }; 203 | template 204 | CompactAs as_type() const { 205 | return CompactAs(*this); 206 | } 207 | void seed(T seed) {seed_ = seed;} 208 | void resize(T new_size) { 209 | if(new_size != size()) { 210 | data_.resize(std::max(static_cast(1), new_size >> SHIFT)); 211 | randomize(seed_); 212 | } 213 | } 214 | // For setting to random values 215 | auto *data() {return data_;} 216 | const auto *data() const {return data_;} 217 | // For use 218 | auto size() const {return data_.size() << SHIFT;} 219 | auto capacity() const {return data_.capacity() << SHIFT;} 220 | auto nwords() const {return data_.size();} 221 | auto nbytes() const {return size();} 222 | bool operator==(const CompactRademacherTemplate &other) const { 223 | if(size() != other.size()) return false; 224 | auto odata = other.data(); 225 | for(size_t i(0);i < data_.size(); ++i) 226 | if(data_[i] != odata[i]) 227 | return false; 228 | return true; 229 | } 230 | 231 | void randomize(uint64_t seed) { 232 | random_fill(reinterpret_cast(data_.data()), data_.size() * sizeof(T) / sizeof(uint64_t), seed); 233 | } 234 | void zero() {memset(data_, 0, sizeof(T) * data_.size());} 235 | void reserve(size_t newsize) { 236 | data_.reserve(newsize >> SHIFT); 237 | } 238 | INLINE int bool_idx(size_type idx) const {return !(data_[(idx >> SHIFT)] & (static_cast(1) << (idx & BITMASK)));} 239 | 240 | FloatType operator[](size_type idx) const {return bool_idx(idx) ? FloatType(-1.): FloatType(1.);} 241 | template 242 | void apply(const InVector &in, OutVector &out) const { 243 | static_assert(is_same, FloatType>::value, "Input vector should be the same type as this structure."); 244 | static_assert(is_same, FloatType>::value, "Output vector should be the same type as this structure."); 245 | out = in; 246 | apply(out); 247 | } 248 | template 249 | void apply(FloatType2 *vec) const { 250 | auto tmp(as_type()); 251 | for(T i = 0; i < size(); ++i) vec[i] *= tmp[i]; 252 | } 253 | template 254 | void apply(VectorType &vec) const { 255 | //std::fprintf(stderr, "Applying %s vector of size %zu.\n", __PRETTY_FUNCTION__, vec.size()); 256 | auto tmp(as_type>()); 257 | if(vec.size() != size()) { 258 | if(vec.size() > size()) 259 | throw std::runtime_error("Vector is too big for he gotdam feet"); 260 | std::fprintf(stderr, "Warning: CompactRademacherTemplate is too big. Only affecting elements in my size (%zu) vs vector size (%zu). Any F*Ts might not be so kind.\n", size_t(size()), size_t(vec.size())); 261 | } 262 | for(T i = 0, e(std::min(vec.size(), size())); i < e; ++i) { 263 | vec[i] *= tmp[i]; 264 | } 265 | } 266 | }; 267 | 268 | using CompactRademacher = CompactRademacherTemplate; 269 | 270 | struct UnchangedRNGDistribution { 271 | template 272 | auto operator()(RNG &rng) const {return rng();} 273 | void reset() {} 274 | }; 275 | 276 | struct Int2GaussianDistribution { 277 | template 278 | auto operator()(RNG &rng) const {return random_gaussian_from_seed(rng());} 279 | void reset() {} 280 | }; 281 | 282 | template, typename Distribution=UnchangedRNGDistribution> 283 | class PRNVector { 284 | // Vector of random values generated 285 | const uint64_t seed_; 286 | uint64_t used_; 287 | uint64_t size_; 288 | RNG rng_; 289 | Distribution dist_; 290 | public: 291 | using ResultType = decay_t; 292 | private: 293 | ResultType val_; 294 | 295 | public: 296 | 297 | class PRNIterator { 298 | 299 | PRNVector *const ref_; 300 | public: 301 | auto operator*() const {return ref_->val_;} 302 | auto &operator ++() { 303 | inc(); 304 | return *this; 305 | } 306 | void inc() { 307 | ref_->gen(); 308 | ++ref_->used_; 309 | } 310 | void gen() {ref_->gen();} 311 | bool operator !=([[maybe_unused]] const PRNIterator &other) const { 312 | return ref_->used_ < ref_->size_; // Doesn't even access the other iterator. Only used for `while(it < end)`. 313 | } 314 | PRNIterator(PRNVector *prn_vec): ref_(prn_vec) {} 315 | }; 316 | 317 | template 318 | PRNVector(uint64_t size, uint64_t seed=0, DistArgs &&... args): 319 | seed_{seed}, used_{0}, size_{size}, rng_(seed_), dist_(forward(args)...), val_(gen()) {} 320 | 321 | auto begin() { 322 | reset(); 323 | return PRNIterator(this); 324 | } 325 | ResultType gen() {return val_ = dist_(rng_);} 326 | void reset() { 327 | rng_.seed(seed_); 328 | dist_.reset(); 329 | used_ = 0; 330 | gen(); 331 | } 332 | auto end() { 333 | return PRNIterator(static_cast(nullptr)); 334 | } 335 | auto end() const { 336 | return PRNIterator(static_cast(nullptr)); 337 | } 338 | auto size() const {return size_;} 339 | void resize(size_t newsize) {size_ = newsize;} 340 | }; 341 | 342 | } // namespace frp 343 | 344 | 345 | #endif // #ifndef _GFRP_CRAD_H__ 346 | -------------------------------------------------------------------------------- /include/frp/coresets.h: -------------------------------------------------------------------------------- 1 | #ifndef _CORESETS_H__ 2 | #define _CORESETS_H__ 3 | #include "blaze/Math.h" 4 | 5 | namespace frp { 6 | inline namespace coresets { 7 | 8 | template> 9 | struct WeightedMatrix: std::pair { 10 | using matrix_type = MatType; 11 | using vector_type = VectorType; 12 | }; 13 | 14 | // General idea: 15 | // Generate weighted 16 | 17 | template 18 | auto generate_lightweight_kmeans(const WeightedMatrix &m) { 19 | // Method for k-means 20 | using FT = typename Mat::matrix_type::ValueType; 21 | static constexpr bool CSO = blaze::StorageOrder::value; 22 | blaze::DynamicVector mean, importance; 23 | auto weights = m.second; 24 | if(weights) { 25 | if(weights->size() != mat.rows()) throw 1; 26 | auto vit = mat.second->begin(); 27 | FT tsum = *vit++; 28 | mean = trans(row(m.first, 0)) * tsum; 29 | for(size_t i = 1; i , m.first.rows(); ++i) { 30 | auto rv = *vit++; 31 | mean += trans(row(m.first, i)) * rv; 32 | tsum += rv; 33 | } 34 | if(tsum == 0.) throw 2; // should never happen 35 | mean *= 1. / tsum; 36 | FT wnormsum = 0.; 37 | importance.resize(m.rows()); 38 | vit = weights->begin(); 39 | for(size_t i = 0; i < m.first.rows(); ++i) { 40 | auto diff = trans(row(m.first, i)) - mean; 41 | FT rdiffnorm = blaze::sum(diff * diff) * *vit++; 42 | wnormsum += rdiffnorm; 43 | } 44 | if(wnormsum) 45 | importance = ((importance / wnormsum) + 1. / tsum) * .5; 46 | else importance = (1. / tsum); // uniform assignment 47 | 48 | } else { 49 | mean = blaze::mean(mat); 50 | for(size_t i = 0; i < m.first.rows(); ++i) { 51 | auto diff = trans(row(m.first, i)) - mean; 52 | FT rdiffnorm = blaze::sum(diff * diff); 53 | wnormsum += rdiffnorm; 54 | } 55 | } 56 | importance /= blaze::sum(importance); 57 | return importance; 58 | } 59 | 60 | // To get a core-set: pick a size (TODO: make function determining coreset size) 61 | // and then sample that many (with replacement) until you hit that size 62 | // Thoughts: indexing via coresets? 63 | 64 | } // coresets 65 | } // frp 66 | 67 | #endif 68 | -------------------------------------------------------------------------------- /include/frp/dist.h: -------------------------------------------------------------------------------- 1 | #ifndef _GFRP_DIST_H__ 2 | #define _GFRP_DIST_H__ 3 | #include 4 | #include "frp/rand.h" 5 | #include "frp/linalg.h" 6 | #include "boost/random/normal_distribution.hpp" 7 | #include "boost/random.hpp" 8 | 9 | namespace frp { 10 | 11 | // Fill a matrix with distributions. Contains utilities for filling 12 | // vectors with C++ std distributions as well as Rademacher. 13 | 14 | template typename Distribution, typename RNG=aes::AesCtr, typename... DistArgs> 15 | void sample_fill(Container &con, uint64_t seed, DistArgs &&... args) { 16 | using FloatType = std::decay_t; 17 | RNG gen(seed); 18 | Distribution dist(std::forward(args)...); 19 | for(auto &el: con) el = dist(gen); 20 | } 21 | 22 | template typename Distribution, typename RNG=aes::AesCtr, typename... DistArgs> 23 | void sample_fill(blaze::DynamicMatrix &con, uint64_t seed, DistArgs &&... args) { 24 | #pragma omp parallel for 25 | for(size_t i = 0; i < con.rows(); ++i) { 26 | RNG gen(seed); 27 | gen.seed(gen() + i); 28 | thread_local Distribution dist(std::forward(args)...); 29 | for(size_t j(0); j < con.columns(); ++j) 30 | con(i, j) = dist(gen); 31 | } 32 | } 33 | 34 | 35 | template> 36 | void random_fill(uint64_t *data, uint64_t len, uint64_t seed=0) { 37 | for(RNG gen(seed); len; data[--len] = gen()); 38 | } 39 | 40 | #define DEFINE_DIST_FILL(type, name) \ 41 | template, typename...Args> \ 42 | void name##_fill(Container &con, uint64_t seed, Args &&... args) { \ 43 | sample_fill(con, seed, std::forward(args)...); \ 44 | }\ 45 | template, typename...Args> \ 46 | void name##_fill(blaze::DynamicMatrix &con, uint64_t seed, Args &&... args) { \ 47 | sample_fill(con, seed, std::forward(args)...); \ 48 | }\ 49 | struct name##_fill_struct {\ 50 | template, typename...Args>\ 51 | void operator()(Container &con, uint64_t seed, Args &&... args) const {\ 52 | name##_fill(con, seed, std::forward(args)...);\ 53 | }\ 54 | }; 55 | 56 | template 57 | class unit_normal: public boost::normal_distribution { 58 | public: 59 | void reset() {} 60 | }; 61 | 62 | enum DistributionType { 63 | NORMAL, 64 | UNIT_NORMAL, 65 | CAUCHY, 66 | CHI_SQUARED, 67 | LOGNORMAL, 68 | EXTREME_VALUE_DISTRIBUTION, 69 | WEIBULL, 70 | UNIFORM_REAL_DISTRIBUTION, 71 | NEGATIVE_BINOMIAL, 72 | EXPONENTIAL, 73 | EVD=EXTREME_VALUE_DISTRIBUTION, 74 | NB=NEGATIVE_BINOMIAL, 75 | EXP=EXPONENTIAL 76 | }; 77 | DEFINE_DIST_FILL(boost::normal_distribution, gaussian) 78 | DEFINE_DIST_FILL(unit_normal, unit_gaussian) 79 | DEFINE_DIST_FILL(boost::cauchy_distribution, cauchy) 80 | DEFINE_DIST_FILL(boost::random::chi_squared_distribution, chisq) 81 | DEFINE_DIST_FILL(boost::lognormal_distribution, lognormal) 82 | DEFINE_DIST_FILL(boost::random::extreme_value_distribution, extreme_value) 83 | DEFINE_DIST_FILL(boost::random::weibull_distribution, weibull) 84 | DEFINE_DIST_FILL(boost::random::uniform_real_distribution, uniform) 85 | DEFINE_DIST_FILL(std::negative_binomial_distribution, nb) 86 | DEFINE_DIST_FILL(std::exponential_distribution, exp) 87 | 88 | } // frp 89 | 90 | #endif // #ifndef _GFRP_DIST_H__ 91 | -------------------------------------------------------------------------------- /include/frp/fhtgpu.h: -------------------------------------------------------------------------------- 1 | #ifndef FRP_GPU_FHT_H 2 | #define FRP_GPU_FHT_H 3 | namespace frp { 4 | 5 | namespace detail { 6 | // Derived from WyHash 7 | static constexpr const uint64_t _wyp0=0xa0761d6478bd642full, _wyp1=0xe7037ed1a0b428dbull; 8 | 9 | template 10 | static constexpr inline T seedind2val(T ind, T seed) { 11 | uint64_t oldstate = ind; 12 | uint64_t newstart = ind * 6364136223846793005ULL + seed; 13 | uint32_t xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u; 14 | uint32_t rot = oldstate >> 59u; 15 | return (xorshifted >> rot) | (xorshifted << ((-rot) & 31)); 16 | } 17 | 18 | template 19 | static constexpr inline T seedind2val_lazy(T ind, T seed) { 20 | return ((ind ^ seed) * static_cast(6364136223846793005ULL)) ^ _wyp1; 21 | } 22 | 23 | 24 | // TODO: kernel fusion between fht and random diagonal matrix multiplication from fixed seeds. 25 | 26 | } // detail 27 | 28 | 29 | #ifdef __CUDACC__ 30 | template 31 | __global__ void grsfht_kernel(T *ptr, size_t l2, int nthreads, T theta, T2 *vals) { 32 | // Givens rotation hadamard product kernel 33 | // This maps pretty well to the GPU 34 | int tid = blockIdx.x*blockDim.x + threadIdx.x; 35 | int n = 1 << l2; 36 | for(int i = 0; i < l2; ++i) { 37 | T theta = vals[i]; 38 | T m1 = cos(theta), m2 = sin(theta); 39 | int s1 = 1 << i, s2 = s1 << 1; 40 | int nthreads_active = min(n >> (i + 1), nthreads); 41 | int npert = n / nthreads_active; 42 | if(tid < nthreads_active) { 43 | for(int j = tid * npert, e = j + npert; j != e; j += s2) { 44 | #pragma unroll 45 | for(size_t k = 0; k < s1; ++k) { 46 | auto u = ptr[j + k], v = ptr[j + k + s1]; 47 | ptr[j + k] = u * mc - v * ms, ptr[j + k + s1] = ms * u + mc * v; 48 | } 49 | } 50 | } 51 | __syncthreads(); 52 | } 53 | } 54 | 55 | template 56 | __global__ void pfht_kernel(T *ptr, size_t l2, int nthreads, T theta) { 57 | // This maps pretty well to the GPU 58 | int tid = blockIdx.x*blockDim.x + threadIdx.x; 59 | int n = 1 << l2; 60 | T mc = cos(theta), ms = sin(theta); 61 | for(int i = 0; i < l2; ++i) { 62 | int s1 = 1 << i, s2 = s1 << 1; 63 | int nthreads_active = min(n >> (i + 1), nthreads); 64 | int npert = n / nthreads_active; 65 | if(tid < nthreads_active) { 66 | for(int j = tid * npert, e = j + npert; j != e; j += s2) { 67 | #pragma unroll 68 | for(size_t k = 0; k < s1; ++k) { 69 | auto u = ptr[j + k], v = ptr[j + k + s1]; 70 | ptr[j + k] = u * mc - v * ms, ptr[j + k + s1] = ms * u + mc * v; 71 | } 72 | } 73 | } 74 | __syncthreads(); 75 | } 76 | } 77 | 78 | template 79 | __global__ void fht_kernel(T *ptr, size_t l2, int nthreads) { 80 | // This maps pretty well to the GPU 81 | int tid = blockIdx.x*blockDim.x + threadIdx.x; 82 | int n = 1 << l2; 83 | for(int i = 0; i < l2; ++i) { 84 | int s1 = 1 << i, s2 = s1 << 1; 85 | int nthreads_active = min(n >> (i + 1), nthreads); 86 | int npert = n / nthreads_active; 87 | if(tid < nthreads_active) { 88 | for(int j = tid * npert, e = j + npert; j != e; j += s2) { 89 | #pragma unroll 90 | for(size_t k = 0; k < s1; ++k) { 91 | auto u = ptr[j + k], v = ptr[j + k + s1]; 92 | ptr[j + k] = u + v, ptr[j + k + s1] = u - v; 93 | } 94 | } 95 | } 96 | __syncthreads(); 97 | } 98 | if(renormalize) { 99 | T mult = 1. / pow(sqrt(2.), l2); 100 | int npert = n / nthreads; 101 | #pragma unroll 102 | for(int i = tid * npert, e = i + npert; i < e; ++i) { 103 | ptr[i] *= mult; 104 | } 105 | } 106 | } 107 | template 108 | __global__ void rademacher_multiply(T *ptr, uint32_t *rvals, size_t l2, int nthreads) { 109 | int tid = blockIdx.x * blockDim.x + threadIdx.x; 110 | size_t n = 1ull << l2; 111 | int per_thread = n / nthreads; 112 | int start_index = tid * per_thread, end = start_index + per_thread; 113 | for(int i = start_index / 32; i != end / 32; ++i) { 114 | auto rv = rvals[i]; 115 | int li = i * 32; 116 | #pragma unroll 117 | for(int j = 0; j < 32; ++j) { 118 | auto v = ptr[li + j]; 119 | ptr[li + j] = (rv >> j)& 1 ? -v: v; 120 | } 121 | } 122 | } 123 | template 124 | __global__ void radfht_kernel(T *ptr, uint32_t *rvals, size_t l2, int nthreads) { 125 | // Performs both 126 | int tid = blockIdx.x*blockDim.x + threadIdx.x; 127 | int n = 1 << l2; 128 | int per_thread = n / nthreads; 129 | int start_index = tid * per_thread, end = start_index + per_thread; 130 | for(int i = start_index / 32; i != end / 32; ++i) { 131 | auto rv = rvals[i]; 132 | int li = i * 32; 133 | #pragma unroll 134 | for(int j = 0; j < 32; ++j) { 135 | auto v = ptr[li + j]; 136 | ptr[li + j] = (rv >> j)& 1 ? -v: v; 137 | } 138 | } 139 | for(int i = 0; i < l2; ++i) { 140 | int s1 = 1 << i, s2 = s1 << 1; 141 | int nthreads_active = min(n >> (i + 1), nthreads); 142 | int npert = n / nthreads_active; 143 | if(tid < nthreads_active) { 144 | #pragma unroll 145 | for(int j = tid * npert, e = j + npert; j != e; j += s2) { 146 | #pragma unroll 147 | for(size_t k = 0; k < s1; ++k) { 148 | auto u = ptr[j + k], v = ptr[j + k + s1]; 149 | ptr[j + k] = u + v, ptr[j + k + s1] = u - v; 150 | } 151 | } 152 | } 153 | __syncthreads(); 154 | } 155 | if(renormalize) { 156 | T mult = 1. / pow(sqrt(2.), l2); 157 | int npert = n / nthreads; 158 | #pragma unroll 159 | for(int i = tid * npert, e = i + npert; i < e; ++i) { 160 | ptr[i] *= mult; 161 | } 162 | } 163 | } 164 | 165 | #endif /* #ifdef __CUDACC__ */ 166 | 167 | } // frp 168 | #endif /* FRP_GPU_FHT_H */ 169 | -------------------------------------------------------------------------------- /include/frp/frp.h: -------------------------------------------------------------------------------- 1 | #ifndef _GFRP_H__ 2 | #define _GFRP_H__ 3 | 4 | #include "frp/compact.h" 5 | #include "frp/dci.h" 6 | #include "frp/dist.h" 7 | #include "frp/fhtgpu.h" 8 | #include "frp/frp.h" 9 | #include "frp/gpu.h" 10 | #include "frp/ifc.h" 11 | #include "frp/jl.h" 12 | #include "frp/kernel.h" 13 | #include "frp/linalg.h" 14 | #include "frp/mach.h" 15 | #include "frp/parser.h" 16 | #include "frp/rand.h" 17 | #include "frp/sample.h" 18 | #include "frp/sdq.h" 19 | #include "frp/spinner.h" 20 | #include "frp/stackstruct.h" 21 | #include "frp/util.h" 22 | 23 | #endif 24 | 25 | 26 | -------------------------------------------------------------------------------- /include/frp/gpu.h: -------------------------------------------------------------------------------- 1 | #ifndef FRP_GPU_H 2 | #define FRP_GPU_H 3 | #include "fhtgpu.h" 4 | 5 | 6 | #endif /* FRP_GPU_H */ 7 | -------------------------------------------------------------------------------- /include/frp/graph.h: -------------------------------------------------------------------------------- 1 | #ifndef FROOPY_GRAPH_H__ 2 | #define FROOPY_GRAPH_H__ 3 | #include "./util.h" 4 | #include 5 | #include 6 | 7 | namespace frp { 8 | inline namespace graph { 9 | 10 | struct Emplacer { 11 | template class Container, typename Value, typename...CArgs> 12 | static auto emplace(Container &c, Value &&v) { 13 | c.emplace(std::move(v)); 14 | } 15 | template 16 | static auto emplace(std::vector &c, Value &&v) { 17 | c.emplace_back(std::move(v)); 18 | } 19 | template 20 | static auto emplace(std::deque &c, Value &&v) { 21 | c.emplace_back(std::move(v)); 22 | } 23 | }; 24 | 25 | // Representation 1: all nodes implicit 26 | template class EdgeContainer=std::set> 28 | class SparseGraph { 29 | public: 30 | using index_type = IndexType; 31 | using edge_type = std::pair; 32 | protected: 33 | index_type n_; 34 | EdgeContainer edges_; 35 | public: 36 | SparseGraph(index_type n=0): n_(n) { 37 | 38 | } 39 | void resize(index_type newn) { 40 | if(newn < n_) 41 | for(const auto &pair: edges_) 42 | if(pair.first > n_ || pair.second > n_) 43 | throw std::runtime_error("Resizing leaves dangling edges."); 44 | n_ = newn; 45 | } 46 | void add(index_type lhs, index_type rhs) { 47 | add(std::make_pair(lhs, rhs)); 48 | } 49 | void add(edge_type edge) { 50 | CONST_IF(!is_directed) { 51 | if(edge.first > edge.second) 52 | std::swap(edge.first, edge.second); 53 | } 54 | if(std::max(edge.first, edge.second) > n_) 55 | throw std::runtime_error("Can't add edges between nodes that don't exist"); 56 | Emplacer::emplace(edges_, edge); 57 | } 58 | void sort() { 59 | std::sort(edges_.begin(), edges_.end()); 60 | } 61 | }; 62 | // Representation 2: nodes explicit, with values 63 | // edges are unweighted 64 | template class EdgeContainer=std::set, 67 | template class NodeContainer=std::vector> 68 | class NodeValuedSparseGraph: public SparseGraph { 69 | protected: 70 | using super = SparseGraph; 71 | using node_type = ValueType; 72 | using super::edge_type; 73 | using super::index_type; 74 | NodeContainer nodes_; 75 | public: 76 | template 77 | NodeValuedSparseGraph(Args &&...args): nodes_(std::forward(args)...) { 78 | if(nodes_.size()) 79 | super::resize(nodes_.size()); 80 | } 81 | template 82 | auto emplace_node(Args &&...args) { 83 | ++this->n_; 84 | return Emplacer::emplace(nodes_, std::forward(args)...); 85 | } 86 | }; 87 | 88 | // TODO: Representation 3: nodes implicit, weighted edges 89 | // TODO: Representation 4: nodes explicit, weighted edges 90 | } // graph 91 | } // frp 92 | 93 | #endif /* FROOPY_GRAPH_H__ */ 94 | -------------------------------------------------------------------------------- /include/frp/ifc.h: -------------------------------------------------------------------------------- 1 | #ifndef _FAST_COPY_H__ 2 | #define _FAST_COPY_H__ 3 | 4 | #include 5 | #include 6 | #if (defined(__x86_64__) || defined(__i386__)) 7 | # include 8 | #endif 9 | 10 | #define _STORAGE_ static inline 11 | 12 | // These functions all assume that the size of memory being copied is a power of 2. 13 | 14 | #ifndef FAST_COPY_MEMCPY_THRESHOLD 15 | #define FAST_COPY_MEMCPY_THRESHOLD (1u << 20) 16 | #endif 17 | 18 | #if _FEATURE_AVX512F 19 | // If n is less than 64, defaults to memcpy. Otherwise, being a power of 2, we can just use unaligned stores and loads. 20 | _STORAGE_ void *fast_copy(void *out, void *in, size_t n) { 21 | if(n >= FAST_COPY_MEMCPY_THRESHOLD) { 22 | return memcpy(out, in, n); 23 | } 24 | n >>= 6; 25 | for(__m512 *ov = (__m512 *)out, *iv = (__m512 *)in; n--;) { 26 | _mm512_storeu_ps((float *)(ov++), _mm512_loadu_ps((float *)(iv++))); 27 | } 28 | return out; 29 | } 30 | #elif __AVX2__ 31 | // If n is less than 32, defaults to memcpy. Otherwise, being a power of 2, we can just use unaligned stores and loads. 32 | _STORAGE_ void *fast_copy(void *out, void *in, size_t n) { 33 | if(n >= FAST_COPY_MEMCPY_THRESHOLD) { 34 | return memcpy(out, in, n); 35 | } 36 | n >>= 5; 37 | for(__m256 *ov = (__m256 *)out, *iv = (__m256 *)in; n--;) { 38 | _mm256_storeu_ps((float *)(ov++), _mm256_loadu_ps((float *)(iv++))); 39 | } 40 | return out; 41 | } 42 | #elif __SSE2__ 43 | // If n is less than 16, defaults to memcpy. Otherwise, being a power of 2, we can just use unaligned stores and loads. 44 | _STORAGE_ void *fast_copy(void *out, void *in, size_t n) { 45 | if(n >= FAST_COPY_MEMCPY_THRESHOLD) { 46 | return memcpy(out, in, n); 47 | } 48 | n >>= 4; 49 | for(__m128 *ov = (__m128 *)out, *iv = (__m128 *)in; n--;) { 50 | _mm_storeu_ps((float *)(ov++), _mm_loadu_ps((float *)(iv++))); 51 | } 52 | return out; 53 | } 54 | #else 55 | _STORAGE_ void *fast_copy(void *out, void *in, size_t n) { 56 | return memcpy(out, in, n); 57 | } 58 | #endif 59 | 60 | #undef _STORAGE_ 61 | #endif 62 | -------------------------------------------------------------------------------- /include/frp/jl.h: -------------------------------------------------------------------------------- 1 | #ifndef _JL_H__ 2 | #define _JL_H__ 3 | #include 4 | #include "frp/spinner.h" 5 | 6 | namespace frp { 7 | 8 | namespace jl { 9 | 10 | template 11 | class JLTransform { 12 | using FloatType = typename MatrixType::ElementType; 13 | const size_t m_, n_; 14 | MatrixType matrix_; 15 | public: 16 | JLTransform(size_t m, size_t n): 17 | m_{m}, n_{n}, matrix_(m, n) { 18 | if(m_ >= n_) fprintf(stderr, "Warning: JLTransform has to reduce dimensionality."); 19 | } 20 | template 21 | void fill(RNG &rng, Distribution &dist, bool orthogonalize=true) { 22 | for(size_t i(0); i < m_; ++i) 23 | for(size_t j(0); j < n_; ++j) 24 | matrix_(i, j) = dist(rng); 25 | if(orthogonalize) { 26 | linalg::gram_schmidt(matrix_, linalg::RESCALE_TO_GAUSSIAN); 27 | } 28 | matrix_ *= 1. / std::sqrt(static_cast(m_)); 29 | } 30 | void fill(uint64_t seed, bool orthogonalize=true) { 31 | std::mt19937_64 rng(seed); 32 | std::normal_distribution dist; 33 | fill(rng, dist, orthogonalize); 34 | } 35 | template 36 | void apply(const InVec &in, OutVec out) { 37 | assert(out.size() == m_); 38 | assert(in.size() == n_); 39 | out = matrix_ * in; 40 | } 41 | auto size() const {return matrix_.rows() * matrix_.columns();} 42 | }; 43 | 44 | template 45 | class OrthogonalJLTransform { 46 | size_t from_, to_; 47 | std::vector> blocks_; 48 | std::vector seeds_; 49 | public: 50 | using size_type = uint64_t; 51 | 52 | OrthogonalJLTransform(size_t from, size_t to, uint64_t seed, size_t nblocks=3): from_(roundup(from)), to_(to) 53 | { 54 | std::mt19937_64 gen(seed); 55 | while(seeds_.size() < nblocks) seeds_.push_back(gen()); 56 | for(const auto seed: seeds_) blocks_.emplace_back(from, seed); 57 | } 58 | OrthogonalJLTransform(OrthogonalJLTransform &&o) = default; 59 | OrthogonalJLTransform(const OrthogonalJLTransform &o) = default; 60 | void resize(size_type newfrom, size_type newto) { 61 | //std::fprintf(stderr, "Resizing from %zu to %zu (rounded up %zu)\n", from_, roundup(newfrom), newfrom); 62 | newfrom = roundup(newfrom); 63 | resize_from(newfrom); 64 | resize_to(newto); 65 | } 66 | size_t from_size() const {return from_;} 67 | size_t to_size() const {return to_;} 68 | void reseed(size_type newseed) { 69 | seeds_.clear(); 70 | std::mt19937_64 gen(newseed); 71 | while(seeds_.size() < nblocks()) seeds_.push_back(gen()); 72 | } 73 | void resize_from(size_type newfrom) { 74 | from_ = newfrom; 75 | for(size_type i(0); i < nblocks(); ++i) { 76 | blocks_[i].seed(seeds_[i]); 77 | blocks_[i].resize(from_); 78 | } 79 | } 80 | void resize_to(size_type newto) { 81 | to_ = newto; 82 | } 83 | size_t nblocks() const {return blocks_.size();} 84 | template 85 | void transform(const Vec1 &in, Vec2 &out) const { 86 | Vec2 tmp(in); // Copy. 87 | transform_inplace(tmp); 88 | out = subvector(tmp, 0, to_); // Copy result out. 89 | } 90 | template::value>> 91 | void transform_inplace(Vec1 &in) const { 92 | for(auto it(std::rbegin(blocks_)), eit(std::rend(blocks_)); it != eit; ++it) { 93 | it->apply(in); 94 | } 95 | in *= std::sqrt(static_cast(from_) / to_); 96 | } 97 | template::value>> 98 | void transform_inplace(FloatType *in) const { 99 | for(auto it(std::rbegin(blocks_)), eit(std::rend(blocks_)); it != eit; (it++)->apply(in)); // Apply transforms 100 | // Renormalize. 101 | using SType = typename vec::SIMDTypes; 102 | const FloatType *end(in + to_); 103 | const typename SType::Type vmul = SType::set1(std::sqrt(static_cast(from_) / to_)); 104 | if(SType::aligned(in)) { 105 | do SType::store(in, SType::mul(SType::load(in), vmul)); while((in += sizeof(vmul) / sizeof(*in)) < end); 106 | } else { 107 | do SType::storeu(in, SType::mul(SType::loadu(in), vmul)); while((in += sizeof(vmul) / sizeof(*in)) < end); 108 | } 109 | } 110 | // Downstream application has to subsample itself. 111 | // Optionally add a (potentially scaled?) Guassian multiplication layer. 112 | }; 113 | 114 | class FastJLTransform { 115 | /* https://www.cs.princeton.edu/~chazelle/pubs/FJLT-sicomp09.pdf 116 | * THE FAST JOHNSON-LINDENSTRAUSS TRANSFORM AND APPROXIMATE NEAREST NEIGHBORS 117 | * SIAM J. COMPUT ©2009 Society for Industrial and Applied MathematicsVol. 39, No. 1,p. 32 118 | * 119 | * The success of this approach to accelerating ANN suggests the potential utility of the OJLT in said searches. 120 | */ 121 | size_t from_, to_; 122 | HadamardRademacherSDBlock block_; 123 | uint64_t seed_; 124 | SubsampleStrategy sample_method_; 125 | public: 126 | using size_type = uint64_t; 127 | 128 | FastJLTransform(size_t from, size_t to, uint64_t seed, SubsampleStrategy strat=FIRST_M): 129 | from_(roundup(from)), to_(to), block_(from, seed), seed_(seed), sample_method_(strat) 130 | { 131 | } 132 | FastJLTransform(FastJLTransform &&o) = default; 133 | FastJLTransform(const FastJLTransform &o) = default; 134 | void resize(size_type newfrom, size_type newto) { 135 | //std::fprintf(stderr, "Resizing from %zu to %zu (rounded up %zu)\n", from_, roundup(newfrom), newfrom); 136 | newfrom = roundup(newfrom); 137 | resize_from(newfrom); 138 | resize_to(newto); 139 | } 140 | SubsampleStrategy get_sample_method() const {return sample_method_;} 141 | SubsampleStrategy set_sample_method(SubsampleStrategy newstrat) {return sample_method_ = newstrat;} 142 | size_t from_size() const {return from_;} 143 | size_t to_size() const {return to_;} 144 | void reseed(size_type newseed) { 145 | block_ = HadamardRademacherSDBlock(from_, newseed); 146 | } 147 | void resize_from(size_type newfrom) { 148 | from_ = newfrom; 149 | reseed(seed_); 150 | } 151 | void resize_to(size_type newto) { 152 | to_ = newto; 153 | } 154 | static constexpr size_t nblocks() {return 1;} 155 | template 156 | void transform(const Vec1 &in, Vec2 &out) const { 157 | Vec2 tmp(in); // Copy. 158 | transform_inplace(tmp); 159 | out = subvector(tmp, 0, to_); // Copy result out. 160 | } 161 | template::value>> 162 | void transform_inplace(Vec1 &in) const { 163 | block_.apply(in); 164 | const auto mult = std::sqrt(static_cast(from_) / to_); 165 | // Note: we multiply in the same pass as the shuffle under the hope that the cache efficiency 166 | // of a signle pass outweighs the value of SIMD acceleration 167 | switch(sample_method_) { 168 | case FIRST_M: 169 | in.resize(to_); // The buffer following is unused/unnecessary. We simply sample the first d rows wlog 170 | in *= mult; 171 | break; 172 | case RANDOM_NO_REPLACEMENT: case RANDOM_NO_REPLACEMENT_HASH_SET: case RANDOM_NO_REPLACEMENT_VEC: case RANDOM_W_REPLACEMENT: default: 173 | aes::AesCtr gen(seed_ ^ 1337); 174 | if(to_ > from_) { 175 | size_t initsz = in.size(); 176 | in.resize(to_); 177 | for(size_t i = initsz; i < in.size(); in[i++] = (in[fastrange(gen(), initsz)] * mult)); 178 | } else if(to_ != from_) { 179 | for(size_t i = 0; i < to_; ++i) { 180 | uint32_t oind = fastrange(gen(), in.size() - i) + i; // This avoids replacement, just for convenience 181 | in[i] = in[oind] * mult; 182 | } 183 | in.resize(to_); 184 | } // else do nothing 185 | } 186 | } 187 | template::value>> 188 | void transform_inplace(FloatType *in) const { 189 | // Apply transform and renormalize. 190 | if(sample_method_ != FIRST_M) throw std::runtime_error("Sampling methods besides FIRST_M not implemented for pointers."); 191 | if(from_ < to_) throw std::runtime_error("FastJLTransform only supports dimensionality reduction."); 192 | block_.apply(in); 193 | using SType = typename vec::SIMDTypes; 194 | const FloatType *end(in + to_); 195 | const typename SType::Type vmul = SType::set1(std::sqrt(static_cast(from_) / to_)); 196 | if(SType::aligned(in)) { 197 | do SType::store(in, SType::mul(SType::load(in), vmul)); while((in += sizeof(vmul) / sizeof(*in)) < end); 198 | } else { 199 | do SType::storeu(in, SType::mul(SType::loadu(in), vmul)); while((in += sizeof(vmul) / sizeof(*in)) < end); 200 | } 201 | } 202 | // Downstream application has to subsample itself. 203 | // Optionally add a (potentially scaled?) Guassian multiplication layer. 204 | }; 205 | 206 | using OJLTransform = OrthogonalJLTransform; 207 | using DOJ = OrthogonalJLTransform; 208 | using OJLT = OJLTransform; 209 | using FOJ = OJLT; 210 | using FJLT = FastJLTransform; 211 | 212 | } // namespace jl 213 | using namespace jl; 214 | 215 | } // namespace frp 216 | 217 | #endif // #ifndef _JL_H__ 218 | -------------------------------------------------------------------------------- /include/frp/kama.h: -------------------------------------------------------------------------------- 1 | #ifndef KAMA_H__ 2 | #define KAMA_H__ 3 | 4 | #endif /* KAMA_H__ */ 5 | -------------------------------------------------------------------------------- /include/frp/lsh.h: -------------------------------------------------------------------------------- 1 | #ifndef FRP_LSH_H__ 2 | #define FRP_LSH_H__ 3 | #include "vec/vec.h" 4 | #include "frp/jl.h" 5 | #include "clhash/include/clhash.h" 6 | #include "flat_hash_map/flat_hash_map.hpp" 7 | 8 | 9 | namespace frp { 10 | struct mclhasher { 11 | const void *random_data_; 12 | mclhasher(uint64_t seed1=137, uint64_t seed2=777): random_data_(get_random_key_for_clhash(seed1, seed2)) {} 13 | mclhasher(const mclhasher &o): random_data_(copy_random_data(o)) {} // copy data 14 | mclhasher(mclhasher &&o): random_data_(o.random_data_) { 15 | o.random_data_ = nullptr; // move 16 | } 17 | static void *copy_random_data(const mclhasher &o) { 18 | void *ret; 19 | if(posix_memalign(&ret, sizeof(__m128i), RANDOM_BYTES_NEEDED_FOR_CLHASH)) throw std::bad_alloc(); 20 | return std::memcpy(ret, o.random_data_, RANDOM_BYTES_NEEDED_FOR_CLHASH); 21 | } 22 | template 23 | uint64_t operator()(const T *data, const size_t len) const { 24 | return clhash(random_data_, (const char *)data, len * sizeof(T)); 25 | } 26 | uint64_t operator()(const char *str) const {return operator()(str, std::strlen(str));} 27 | template 28 | uint64_t operator()(const T &input) const { 29 | return operator()((const char *)&input, sizeof(T)); 30 | } 31 | template 32 | uint64_t operator()(const std::vector &input) const { 33 | return operator()((const char *)input.data(), sizeof(T) * input.size()); 34 | } 35 | uint64_t operator()(const std::string &str) const { 36 | return operator()(str.data(), str.size()); 37 | } 38 | ~mclhasher() { 39 | std::free((void *)random_data_); 40 | } 41 | }; 42 | using SIMDSpace = vec::SIMDTypes; 43 | using VType = typename SIMDSpace::VType; 44 | template ATTR_CONST INLINE auto cmp_zero(V v); 45 | #if _FEATURE_AVX512F 46 | template<> ATTR_CONST INLINE auto 47 | cmp_zero (__m512 v) { 48 | return _mm512_cmp_ps_mask(v, _mm512_setzero_ps(), _CMP_GT_OQ); 49 | } 50 | template<> ATTR_CONST INLINE auto 51 | cmp_zero (__m512d v) { 52 | return _mm512_cmp_pd_mask(v, _mm512_setzero_pd(), _CMP_GT_OQ); 53 | } 54 | #elif __AVX__ 55 | template<> 56 | ATTR_CONST INLINE 57 | auto cmp_zero (__m256 v) { 58 | return _mm256_movemask_ps(_mm256_cmp_ps(v, _mm256_setzero_ps(), _CMP_GT_OQ)); 59 | } 60 | template<> 61 | ATTR_CONST INLINE 62 | auto cmp_zero (__m256d v) { 63 | return _mm256_movemask_pd(_mm256_cmp_pd(v, _mm256_setzero_pd(), _CMP_GT_OQ)); 64 | } 65 | #else 66 | #pragma message("not vectorizing signed projection hashing") 67 | #endif 68 | 69 | template 70 | struct empty { 71 | template empty(Args &&...args) {} 72 | }; 73 | 74 | template 75 | struct F2VType; 76 | #if __AVX__ 77 | 78 | template<> struct F2VType { 79 | using type = __m256; 80 | static type load(const float *a) { 81 | return _mm256_loadu_ps(a); 82 | } 83 | }; 84 | template<> struct F2VType { 85 | using type = __m256d; 86 | static type load(const double *a) { 87 | return _mm256_loadu_pd(a); 88 | } 89 | }; 90 | #endif 91 | #if HAS_AVX_512 92 | template<> struct F2VType { 93 | using type = __m512; 94 | static type load(const float *a) { 95 | return _mm512_loadu_ps(a); 96 | } 97 | }; 98 | template<> struct F2VType { 99 | using type = __m512d; 100 | static type load(const double *a) { 101 | return _mm512_loadu_pd(a); 102 | } 103 | }; 104 | #endif 105 | 106 | #if HAS_AVX_512 107 | template 108 | static constexpr int f2b(__m512d v) { 109 | return cmp_zero(v); 110 | } 111 | template 112 | static constexpr int f2b(__m512 v) { 113 | return cmp_zero(v); 114 | } 115 | //using VType = F2VType::type; 116 | #endif 117 | #if __AVX__ 118 | template 119 | static constexpr int f2b(__m256d v) { 120 | return cmp_zero(v); 121 | } 122 | template 123 | static constexpr int f2b(__m256 v) { 124 | return cmp_zero(v); 125 | } 126 | //using VType = typename F2VType::type; 127 | #endif 128 | 129 | template, typename...DistArgs> 130 | blaze::DynamicMatrix 131 | generate_randproj_matrix(size_t nr, size_t ncol, 132 | bool orthonormalize=true, uint64_t seed=0, 133 | DistArgs &&...args) 134 | { 135 | using matrix_type = blaze::DynamicMatrix; 136 | matrix_type ret(nr, ncol); 137 | seed = ((seed ^ nr) * ncol) * seed; 138 | if(orthonormalize) { 139 | try { 140 | matrix_type r, q; 141 | if(ret.rows() >= ret.columns()) { 142 | // Randomize 143 | OMP_PRAGMA("omp parallel for") 144 | for(size_t i = 0; i < ret.rows(); ++i) { 145 | blaze::RNG gen(seed + i * seed + i); 146 | DistributionType dist(std::forward(args)...); 147 | for(auto &v: row(ret, i)) 148 | v = dist(gen); 149 | } 150 | // QR 151 | blaze::qr(ret, q, r); 152 | assert(ret.columns() == q.columns()); 153 | assert(ret.rows() == q.rows()); 154 | swap(ret, q); 155 | } else { 156 | // Generate random matrix for (C, C) and then just take the first R rows 157 | const auto mc = ret.columns(); 158 | matrix_type tmp(mc, mc); 159 | OMP_PRAGMA("omp parallel for") 160 | for(size_t i = 0; i < tmp.rows(); ++i) { 161 | blaze::RNG gen(seed + i * seed + i); 162 | DistributionType dist(std::forward(args)...); 163 | for(auto &v: row(tmp, i)) 164 | v = dist(gen); 165 | } 166 | blaze::qr(tmp, q, r); 167 | ret = submatrix(q, 0, 0, ret.rows(), ret.columns()); 168 | } 169 | OMP_PRAGMA("omp parallel for") 170 | for(size_t i = 0; i < ret.rows(); ++i) 171 | blaze::normalize(row(ret, i)); 172 | } catch(const std::exception &ex) { // Orthonormalize 173 | std::fprintf(stderr, "failure in orthonormalization: %s\n", ex.what()); 174 | throw; 175 | } 176 | } else { 177 | OMP_PRAGMA("omp parallel for") 178 | for(size_t i = 0; i < nr; ++i) { 179 | blaze::RNG gen(seed + i); 180 | std::normal_distribution dist; 181 | for(auto &v: row(ret, i)) 182 | v = dist(gen); 183 | normalize(row(ret, i)); 184 | } 185 | } 186 | return ret; 187 | } 188 | 189 | 190 | 191 | template class Container=::blaze::DynamicVector, bool SO=blaze::rowMajor> 192 | struct LSHasher { 193 | using CType = Container; 194 | CType container_; 195 | template 196 | LSHasher(CArgs &&...args): container_(std::forward(args)...) {} 197 | template 198 | auto dot(const T &ov) const { 199 | return blaze::dot(container_, ov); 200 | } 201 | // TODO: Store full matrix to get hashes 202 | // TODO: Use structured matrices to speed up calculation (FFHT, then downsample to bins) 203 | }; 204 | 205 | 206 | 207 | template 208 | static INLINE uint64_t cmp2hash(const blaze::DynamicVector &c, size_t n=0) { 209 | assert(n <= 64); 210 | uint64_t ret = 0; 211 | if(n == 0) { 212 | n = n; 213 | } 214 | #if HAS_AVX_512 215 | static constexpr size_t COUNT = sizeof(__m512d) / sizeof(FType); 216 | #elif __AVX__ 217 | static constexpr size_t COUNT = sizeof(__m256d) / sizeof(FType); 218 | #else 219 | static constexpr size_t COUNT = 0; 220 | #endif 221 | size_t i = 0; 222 | #if HAS_AVX_512 || defined(__AVX__) 223 | CONST_IF(COUNT) { 224 | using LV = F2VType; 225 | for(; i < n / COUNT;ret = (ret << COUNT) | cmp_zero(LV::load((&c[i++ * COUNT])))); 226 | i *= COUNT; 227 | } 228 | #else 229 | for(;i + 8 <= n; i += 8) { 230 | ret = (ret << 8) | 231 | ((c[i] > 0.) << 7) | ((c[i + 1] > 0.) << 6) | 232 | ((c[i + 2] > 0.) << 5) | ((c[i + 3] > 0.) << 4) | 233 | ((c[i + 4] > 0.) << 3) | ((c[i + 5] > 0.) << 2) | 234 | ((c[i + 6] > 0.) << 1) | (c[i + 7] > 0.); 235 | } 236 | #endif 237 | for(; i < n; ret = (ret << 1) | (c[i++] > 0.)); 238 | return ret; 239 | } 240 | 241 | template> 242 | struct MatrixLSHasher { 243 | using CType = ::blaze::DynamicMatrix; 244 | using this_type = MatrixLSHasher; 245 | using const_this_type = const MatrixLSHasher; 246 | CType container_; 247 | template 248 | MatrixLSHasher(size_t nr, size_t nc, bool orthonormalize=true, uint64_t seed=0, 249 | DistArgs &&...args): 250 | container_(std::move(generate_randproj_matrix(nr, nc, orthonormalize, seed, std::forward(args)...))) {} 251 | auto &multiply(const blaze::DynamicVector &c, blaze::DynamicVector &ret) const { 252 | //std::fprintf(stderr, "size of input: %zu. size of ret: %zu. Matrix sizes: %zu/%zu\n", c.size(), ret.size(), container_.rows(), container_.columns()); 253 | ret = this->container_ * c; 254 | //std::fprintf(stderr, "multiplied successfully\n"); 255 | return ret; 256 | } 257 | blaze::DynamicVector multiply(const blaze::DynamicVector &c) const { 258 | blaze::DynamicVector vec; 259 | //std::fprintf(stderr, "size of input: %zu. size of vec: %zu. Matrix sizes: %zu/%zu\n", c.size(), vec.size(), container_.rows(), container_.columns()); 260 | this->multiply(c, vec); 261 | return vec; 262 | } 263 | auto multiply(const blaze::DynamicVector &c) const { 264 | //std::fprintf(stderr, "size of input: %zu. size of vec: %zu. Matrix sizes: %zu/%zu\n", c.size(), container_.rows(), container_.columns()); 265 | blaze::DynamicVector vec = this->container_ * trans(c); 266 | return vec; 267 | } 268 | template 269 | decltype(auto) project(Args &&...args) const {return multiply(std::forward(args)...);} 270 | template 271 | uint64_t hash(const blaze::DynamicVector &c) const { 272 | #if VERBOSE_AF 273 | std::cout << this->container_ << '\n'; 274 | #endif 275 | blaze::DynamicVector vec = multiply(c); 276 | return cmp2hash(vec); // This is the SRP hasher (signed random projection) 277 | } 278 | template 279 | uint64_t operator()(const blaze::DynamicVector &c) const { 280 | return this->hash(c); 281 | } 282 | }; 283 | 284 | template> 285 | struct E2LSHasher { 286 | MatrixLSHasher superhasher_; 287 | blaze::DynamicVector b_; 288 | double r_; 289 | mclhasher clhasher_; 290 | template 291 | E2LSHasher(unsigned d, unsigned k, double r = 1., uint64_t seed=0, Args &&...args): superhasher_(k, d, false, seed, std::forward(args)...), r_(r), b_(k), clhasher_(seed * seed + seed) { 292 | superhasher_.container_ /= r; 293 | std::uniform_real_distribution gen(0, r_); 294 | std::mt19937_64 mt(seed ^ uint64_t(d * k * r)); 295 | for(auto &v: b_) 296 | v = gen(mt); 297 | } 298 | E2LSHasher(const E2LSHasher &o) = default; 299 | E2LSHasher(E2LSHasher &&o) = default; 300 | template 301 | decltype(auto) project(Args &&...args) const { 302 | //std::fprintf(stderr, "b size: %zu\n", b_.size()); 303 | //auto v = superhasher_.project(std::forward(args)...); 304 | //std::fprintf(stderr, "v size: %zu\n", v.size()); 305 | return floor(superhasher_.project(std::forward(args)...) + b_); 306 | } 307 | template 308 | uint64_t hash(Args &&...args) const { 309 | auto proj = this->project(std::forward(args)...); 310 | return clhasher_(&b_[0], b_.size() * sizeof(FType)); 311 | } 312 | template 313 | uint64_t operator()(Args &&...args) const { 314 | return hash(std::forward(args)...); 315 | } 316 | }; 317 | 318 | template 319 | struct ThresholdedCauchyDistribution { 320 | std::cauchy_distribution cd_; 321 | FT absmax_; 322 | template ThresholdedCauchyDistribution(FT absmax, Args &&...args): cd_(std::forward(args)...), absmax_(std::abs(absmax)) { 323 | } 324 | FT operator()() { 325 | return std::clamp(cd_(), -absmax_, absmax_); 326 | } 327 | }; 328 | 329 | template 330 | struct L1E2LSHasher: public E2LSHasher> { 331 | using super = E2LSHasher>; 332 | L1E2LSHasher(unsigned d, unsigned k, double r = 1., uint64_t seed=0, FType amax=1000.): 333 | super(d, k, r, seed, amax) {} 334 | }; 335 | 336 | 337 | 338 | template> 339 | struct FHTLSHasher { 340 | using this_type = FHTLSHasher; 341 | using const_this_type = const FHTLSHasher; 342 | //std::vector> d_; // diagonal matrix 343 | // // use Matrix for case that we need more projections than we have dimensions 344 | std::vector> jlt_; 345 | size_t nc_, nr_; 346 | auto ncroundup() const {return roundup(nc_);} 347 | template 348 | FHTLSHasher(size_t nr, size_t nc, uint64_t seed=0, unsigned nblocks=1, 349 | DistArgs &&...args): nc_(nc), nr_(nr) { 350 | unsigned njlts = (nr + nc - 1) / nc; 351 | jlt_.reserve(njlts); 352 | if(nr > nc) { 353 | std::mt19937_64 mt(seed + nc * nr); 354 | throw std::runtime_error("Not implemented: projections > dimensionality. TODO: this"); 355 | } else { 356 | jlt_.emplace_back(nc, nr, seed + (nc * nr), nblocks); 357 | //d_.emplace_back(roundup(nc), 0); 358 | } 359 | blaze::RNG gen(seed); 360 | DistributionType dist(std::forward(args)...); 361 | #if 0 362 | for(auto &d: d_) 363 | for(size_t i = 0; i < nc; ++i) 364 | d[i] = dist(gen); 365 | #endif 366 | } 367 | auto &multiply(const blaze::DynamicVector &c, blaze::DynamicVector &ret) const { 368 | // This will change when we support more projections than input dimensions 369 | //auto &d = d_[0]; 370 | auto &jl = jlt_[0]; 371 | const auto ts = ncroundup(); 372 | if(ret.size() != ts) ret.resize(ts); 373 | //subvector(ret, 0, nc_) = trans(c) * subvector(d, 0, nc_); 374 | subvector(ret, 0, nc_) = c; 375 | subvector(ret, nc_, ts - nc_) = 0; 376 | jl.transform_inplace(ret); 377 | return ret; 378 | } 379 | auto multiply(const blaze::DynamicVector &c) const { 380 | blaze::DynamicVector vec(ncroundup()); 381 | multiply(c, vec); 382 | return vec; 383 | } 384 | auto multiply(const blaze::DynamicVector &c) const { 385 | // This will change when we support more projections than input dimensions 386 | //auto &d = d_[0]; 387 | auto &jl = jlt_[0]; 388 | auto ts = ncroundup(); 389 | blaze::DynamicVector vec(ts); 390 | //subvector(vec, 0, nc_) = trans(c) * subvector(d, 0, nc_); if using d_ 391 | subvector(vec, 0, nc_) = trans(c); 392 | subvector(vec, nc_, ts - nc_) = 0; 393 | jl.transform_inplace(vec); 394 | return vec; 395 | } 396 | template 397 | decltype(auto) project(Args &&...args) const {return multiply(std::forward(args)...);} 398 | template 399 | uint64_t hash(const blaze::DynamicVector &c) const { 400 | blaze::DynamicVector vec = multiply(c); 401 | return cmp2hash(vec, nr_); 402 | } 403 | template 404 | uint64_t operator()(const blaze::DynamicVector &c) const { 405 | return this->hash(c); 406 | } 407 | }; 408 | 409 | 410 | template //, ContainerTemplate=template class=std::vector, 411 | //typename... ContainerArgs> 412 | struct LSHTable { 413 | using Container = std::vector; 414 | Hasher hasher_; 415 | ska::flat_hash_map map_; 416 | IDType nadded_ = 0; 417 | LSHTable(Hasher &&hasher): hasher_(std::move(hasher)) { 418 | } 419 | template 420 | auto add(const T &x) { 421 | auto id = nadded_++; 422 | auto v = hasher_(x); 423 | auto tmp = map_.emplace(v, {id}); // start with just ID 424 | if(!tmp.second) tmp.first->push_back(id); // If already present, push back 425 | } 426 | template 427 | const Container *query(const T &x) const { 428 | auto v = hasher_(x); 429 | auto it = map_.find(v); 430 | if(it == map_.end()) return nullptr; 431 | return &(it->second); 432 | } 433 | }; 434 | 435 | template 436 | auto make_lshtable(Hasher &&hasher) { 437 | return LSHTable(std::move(hasher)); 438 | } 439 | 440 | } // frp 441 | 442 | #endif 443 | -------------------------------------------------------------------------------- /include/frp/mach.h: -------------------------------------------------------------------------------- 1 | #ifndef _GFRP_MACH_H__ 2 | #define _GFRP_MACH_H__ 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "kspp/ks.h" 11 | #include "frp/util.h" 12 | 13 | namespace frp { namespace mach { 14 | 15 | void print_toks(std::vector &strings) { 16 | ks::string tmp; 17 | tmp.sprintf("Num toks: %zu\t", strings.size()); 18 | for(const auto &str: strings) tmp.resize(tmp.size() + str.size()); 19 | for(const auto &str: strings) tmp += str, tmp += ','; 20 | tmp.pop(); 21 | fprintf(stderr, "toks: %s\n", tmp.data()); 22 | } 23 | 24 | #ifdef __APPLE__ 25 | #define CACHE_CMD_STR "/usr/sbin/system_profiler SPHardwareDataType" 26 | #else 27 | #define CACHE_CMD_STR "lscpu" 28 | #endif 29 | 30 | template 31 | using ref = T&; 32 | 33 | struct CacheSizes { 34 | size_t l1, l2, l3; 35 | operator ref() { 36 | return reinterpret_cast>(*this); 37 | } 38 | CacheSizes(size_t l1a, size_t l2a, size_t l3a): l1(l1a), l2(l2a), l3(l3a) {} 39 | CacheSizes() {memset(this, 0, sizeof(*this));} 40 | std::string str() const { 41 | char buf[64]; 42 | sprintf(buf, "L1:%zu,L2:%zu,L3:%zu", l1, l2, l3); 43 | return buf; 44 | } 45 | }; 46 | 47 | template 48 | CacheSizes get_cache_sizes() { 49 | FILE *fp(popen(CACHE_CMD_STR, "r")); 50 | char buf[1 << 16]; 51 | memset(buf, 0, sizeof(buf)); 52 | CacheSizes ret; 53 | SizeType *ptr; 54 | char *line; 55 | while((line = fgets(buf, sizeof(buf), fp))) { 56 | if(strstr(line, "ache") == nullptr) continue; 57 | if(strstr(line, "L") == nullptr) continue; 58 | auto toks(ks::toksplit(line, strlen(line), 0)); 59 | if(toks[0] == "L1i") { 60 | continue; 61 | } else if(toks[0] == "L1d") { 62 | ptr = &ret[0]; 63 | } else if(toks[0] == "L2") { 64 | ptr = &ret[1]; 65 | } else if(toks[0] == "L3") { 66 | ptr = &ret[2]; 67 | } else { 68 | fclose(fp); 69 | fprintf(stderr, "DIE (%s)\n", toks[0].data()); 70 | exit(1); 71 | } 72 | #ifdef __APPLE__ 73 | const auto &endtok(toks.back()); 74 | const auto &magtok(toks[toks.size() - 2]); 75 | *ptr = atoi(magtok.data()); 76 | const char sizechar(endtok[0]); 77 | #else 78 | const char *tmp(toks.back().data()); 79 | *ptr = atoi(tmp); 80 | while(isdigit(*tmp)) ++tmp; 81 | const char sizechar(*tmp); 82 | #endif 83 | assert(isalpha(sizechar)); 84 | switch(sizechar) { 85 | case 'T': case 't': *ptr <<= 40; break; 86 | case 'G': case 'g': *ptr <<= 30; break; 87 | case 'M': case 'm': *ptr <<= 20; break; 88 | case 'K': case 'k': *ptr <<= 10; break; 89 | } 90 | } 91 | 92 | fclose(fp); 93 | return ret; 94 | } 95 | 96 | }} // namespace gfpr::mach 97 | 98 | 99 | #endif // #ifndef _GFRP_MACH_H__ 100 | -------------------------------------------------------------------------------- /include/frp/mm.h: -------------------------------------------------------------------------------- 1 | #ifndef FROOPY_MEX_H__ 2 | #define FROOPY_MEX_H__ 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "blaze/Math.h" 8 | 9 | namespace frp { 10 | template 11 | blaze::CompressedMatrix parse_mm(std::string fn) { 12 | std::ifstream ifs(fn); 13 | std::string line; 14 | do { 15 | std::getline(ifs, line); 16 | } while(line.empty() || line.front() == '%'); 17 | char *s = line.data(); 18 | while(std::isspace(*s)) ++s; 19 | unsigned long nrows = std::strtoul(s, &s, 10); 20 | do ++s while(std::isspace(*s)); 21 | unsigned long ncols = std::strtoul(s, &s, 10); 22 | do ++s while(std::isspace(*s)); 23 | unsigned long nnz = std::strtoul(s, nullptr, 10); 24 | blaze::CompressedMatrix ret(nrows, ncols); 25 | ret.reserve(nnz); 26 | while(std::getline(ifs, line)) { 27 | s = line.data(); 28 | while(std::isspace(*s)) ++s; 29 | auto rownum = std::strtoul(s, &s, 10) - 1; 30 | do ++s while(std::isspace(*s)); 31 | auto colnum = std::strtoul(s, &s, 10) - 1; 32 | do ++s while(std::isspace(*s)); 33 | double val = std::strtod(s, nullptr); 34 | ret.insert(rownum, colnum, val); 35 | } 36 | return ret; 37 | } 38 | } 39 | 40 | #endif /* FROOPY_MEX_H__ */ 41 | -------------------------------------------------------------------------------- /include/frp/parser.h: -------------------------------------------------------------------------------- 1 | #ifndef _GFRP_PARSER_H__ 2 | #define _GFRP_PARSER_H__ 3 | #include "frp/util.h" 4 | 5 | namespace frp { 6 | 7 | 8 | namespace io { 9 | template 10 | struct IOTypes; 11 | 12 | inline size_t fgzread(FILE *fp, void *buf, unsigned len) { 13 | return fread(buf, 1, len, fp); 14 | } 15 | 16 | template<> 17 | struct IOTypes { 18 | static constexpr decltype(&fopen) open = &fopen; 19 | static constexpr decltype(&fclose) close = &fclose; 20 | static constexpr decltype(&fgzread) read = &fgzread; 21 | static constexpr decltype(&feof) eof = &feof; 22 | static constexpr decltype(&ferror) error = &ferror; 23 | }; 24 | 25 | template<> 26 | struct IOTypes { 27 | static constexpr decltype(&gzopen) open = &gzopen; 28 | static constexpr decltype(&gzclose) close = &gzclose; 29 | static constexpr decltype(&gzread) read = &gzread; 30 | static constexpr decltype(&gzeof) eof = &gzeof; 31 | static constexpr decltype(&gzerror) error = &gzerror; 32 | }; 33 | 34 | static const std::string zlibsuf = ".gz"; 35 | static const std::string bzip2suf = ".bz2"; 36 | static const std::string zstdsuf = ".zst"; 37 | static const std::string zlibcmd = "gzip -dc "; 38 | static const std::string bzip2cmd = "bzip2 -dc "; 39 | static const std::string zstdcmd = "ztd -dc "; 40 | 41 | bool ends_with(const std::string &pat, const std::string &ref) { 42 | return std::equal(std::rbegin(pat), std::rend(pat), std::rbegin(ref)); 43 | } 44 | 45 | enum CType { 46 | UNKNOWN = -1, 47 | UNCOMPRESSED = 0, // FILE * 48 | ZLIB = 1, // .gz 49 | ZSTD = 2, // .zstd 50 | BZIP2 = 3 // .bz2 51 | }; 52 | 53 | CType infer_ctype(const std::string &path) { 54 | if(ends_with(zlibsuf, path)) return ZLIB; 55 | if(ends_with(bzip2suf, path)) return BZIP2; 56 | if(ends_with(zstdsuf, path)) return ZSTD; 57 | return UNCOMPRESSED; 58 | } 59 | 60 | } // namespace io 61 | 62 | #define USE_FP(attr) static constexpr auto attr = io::IOTypes::attr 63 | 64 | class LineReader { 65 | FILE *fp_; 66 | std::string path_; 67 | io::CType ctype_; 68 | char delim_; 69 | size_t bufsz_; 70 | ssize_t len_; 71 | char *data_; 72 | const std::string comment_lines_; 73 | 74 | /* 75 | Reads through a file line by line just once. Will add more functionality later. 76 | */ 77 | public: 78 | LineReader(const char *path, 79 | char delim='\n', size_t bufsz=0, io::CType ctype=io::UNKNOWN, std::string comment_lines="#"): 80 | fp_(nullptr), path_(path), ctype_(ctype >= 0 ? ctype: io::infer_ctype(path_)), 81 | delim_(delim), bufsz_(bufsz), 82 | len_(0), data_(bufsz_ ? (char *)std::malloc(bufsz_): nullptr), 83 | comment_lines_(std::move(comment_lines)) 84 | { 85 | } 86 | ~LineReader() { 87 | if(fp_) fclose(fp_); 88 | std::free(data_); 89 | } 90 | class LineIterator { 91 | LineReader &ref_; 92 | public: 93 | LineIterator(LineReader &ref): 94 | ref_(ref) {} 95 | LineIterator &operator*() { 96 | return *this; 97 | } 98 | LineIterator &operator++() { 99 | ref_.len_ = getdelim(&ref_.data_, &ref_.bufsz_, ref_.delim_, ref_.fp_); 100 | if(good()) 101 | if(std::find(ref_.comment_lines_.begin(), ref_.comment_lines_.end(), ref_.data_[0]) != ref_.comment_lines_.end()) 102 | return this->operator++(); 103 | return *this; 104 | } 105 | using uivec_t = std::vector; 106 | 107 | ssize_t len() const {return ref_.len();} 108 | char *data() {return ref_.data();} 109 | const char *data() const {return ref_.data();} 110 | bool operator!=([[maybe_unused]] const LineIterator &other) const {return good();} 111 | bool operator< ([[maybe_unused]] const LineIterator &other) const {return good();} 112 | char &operator[](size_t index) {return data()[index];} 113 | const char &operator[](size_t index) const {return data()[index];} 114 | bool good() const {return ref_.len_ != -1;} 115 | // TODO: speed this up by avoiding making a vector of positions and just parse in the first pass. 116 | template