├── src ├── tests │ ├── data │ │ ├── matrix │ │ ├── y_test.txt │ │ ├── y_train.txt │ │ ├── svm_light_dummy_witout_target │ │ ├── svm_light_dummy │ │ ├── x_dummy.txt │ │ ├── x_test.txt │ │ └── x_train.txt │ ├── TestFixtures.h │ ├── Makefile │ ├── TestFixtures.c │ ├── test_random.c │ ├── test_ffm_utils.c │ ├── test_ffm_sgd.c │ └── test_ffm_als_mcmc.c ├── ffm_random.h ├── Makefile ├── kmath.h ├── ffm_random.c ├── kvec.h ├── fast_fm.h ├── ffm_utils.h ├── ffm.c ├── ffm_sgd.c ├── cli.c ├── ffm_als_mcmc.c ├── kmath.c └── ffm_utils.c ├── demo ├── data │ ├── train_ranking │ ├── train_pairs │ ├── test_classification │ ├── test_ranking │ ├── train_classification │ ├── test_regression │ └── train_regression ├── Makefile ├── example_sgd.c ├── example_sgd_bpr.c └── example_als_mcmc.c ├── .gitmodules ├── .gitignore ├── Makefile ├── .travis.yml ├── include └── ffm.h ├── COPYING └── README.rst /src/tests/data/matrix: -------------------------------------------------------------------------------- 1 | 1 2 2 | 2 3 3 | 4 5 4 | -------------------------------------------------------------------------------- /src/tests/data/y_test.txt: -------------------------------------------------------------------------------- 1 | 298 2 | 266 3 | 29 4 | 298 5 | 848 6 | -------------------------------------------------------------------------------- /src/tests/data/y_train.txt: -------------------------------------------------------------------------------- 1 | 298 2 | 266 3 | 29 4 | 298 5 | 848 6 | -------------------------------------------------------------------------------- /demo/data/train_ranking: -------------------------------------------------------------------------------- 1 | 0:6 1:1 2 | 0:2 1:3 3 | 0:3 4 | 0:6 1:1 5 | 0:4 1:5 6 | -------------------------------------------------------------------------------- /demo/data/train_pairs: -------------------------------------------------------------------------------- 1 | 3 0 2 | 4 0 3 | 1 2 4 | 3 1 5 | 4 1 6 | 3 2 7 | 4 2 8 | 4 3 9 | -------------------------------------------------------------------------------- /demo/data/test_classification: -------------------------------------------------------------------------------- 1 | 1 0:6 1:1 2 | -1 0:2 1:3 3 | -1 0:3 4 | 1 0:6 1:1 5 | 1 0:4 1:5 6 | -------------------------------------------------------------------------------- /demo/data/test_ranking: -------------------------------------------------------------------------------- 1 | 298 0:6 1:1 2 | 266 0:2 1:3 3 | 29 0:3 4 | 298 0:6 1:1 5 | 848 0:4 1:5 6 | -------------------------------------------------------------------------------- /demo/data/train_classification: -------------------------------------------------------------------------------- 1 | 1 0:6 1:1 2 | -1 0:2 1:3 3 | -1 0:3 4 | 1 0:6 1:1 5 | 1 0:4 1:5 6 | -------------------------------------------------------------------------------- /src/tests/data/svm_light_dummy_witout_target: -------------------------------------------------------------------------------- 1 | 0:6 1:1 2 | 0:2 1:3 3 | 0:3 4 | 4:1 5 | 0:4 1:5 6 | -------------------------------------------------------------------------------- /demo/data/test_regression: -------------------------------------------------------------------------------- 1 | 298 0:6 1:1 2 | 266 0:2 1:3 3 | 29 0:3 4 | 298 0:6 1:1 5 | 848 0:4 1:5 6 | -------------------------------------------------------------------------------- /demo/data/train_regression: -------------------------------------------------------------------------------- 1 | 298 0:6 1:1 2 | 266 0:2 1:3 3 | 29 0:3 4 | 298 0:6 1:1 5 | 848 0:4 1:5 6 | -------------------------------------------------------------------------------- /src/tests/data/svm_light_dummy: -------------------------------------------------------------------------------- 1 | 298 0:6 1:1 2 | 266 0:2 1:3 3 | 29 4 | 298 0:6 1:1 5 | 848 0:4 1:5 6 | -------------------------------------------------------------------------------- /src/tests/data/x_dummy.txt: -------------------------------------------------------------------------------- 1 | 0 0 6 2 | 0 1 1 3 | 1 0 2.22 4 | 1 1 3 5 | 2 0 3.333 6 | 3 0 6 7 | 3 1 1 8 | 4 0 4.4444 9 | 4 1 5 10 | -------------------------------------------------------------------------------- /src/tests/data/x_test.txt: -------------------------------------------------------------------------------- 1 | 0 0 6 2 | 0 1 1 3 | 1 0 2.22 4 | 1 1 3 5 | 2 0 3.333 6 | 3 0 6 7 | 3 1 1 8 | 4 0 4.4444 9 | 4 1 5 10 | -------------------------------------------------------------------------------- /src/tests/data/x_train.txt: -------------------------------------------------------------------------------- 1 | 0 0 6 2 | 0 1 1 3 | 1 0 2.22 4 | 1 1 3 5 | 2 0 3.333 6 | 3 0 6 7 | 3 1 1 8 | 4 0 4.4444 9 | 4 1 5 10 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "externals/CXSparse"] 2 | path = externals/CXSparse 3 | url = https://github.com/ibayer/CXSparse.git 4 | 5 | [submodule "externals/OpenBLAS"] 6 | path = externals/OpenBLAS 7 | url = https://github.com/xianyi/OpenBLAS.git 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # C 2 | *.swp 3 | *.o 4 | *.a 5 | *.so 6 | *.zip 7 | src/tests/test_ffm_als_mcmc 8 | src/tests/test_ffm_utils 9 | src/tests/test_ffm_sgd 10 | src/tests/test_random 11 | 12 | # OS X build-generated directories 13 | src/tests/test_ffm_als_mcmc.dSYM/ 14 | src/tests/test_ffm_utils.dSYM/ 15 | src/tests/test_random.dSYM/ 16 | 17 | bin/ 18 | demo/data/ranking_predictions 19 | dense_fm 20 | sparse_fm 21 | cli 22 | # latex 23 | *.aux 24 | *.bbl 25 | *.blg 26 | *.dvi 27 | *.log 28 | *.toc 29 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | lib: $(OBJECTS) 3 | ( cd externals/CXSparse ; $(MAKE) library ) 4 | ( export USE_THREAD=0; cd externals/OpenBLAS ; $(MAKE) libs) 5 | ( cd src ; $(MAKE) lib ) 6 | 7 | cli: 8 | ( cd externals/CXSparse ; $(MAKE) library ) 9 | ( export USE_THREAD=0; cd externals/OpenBLAS ; $(MAKE) libs) 10 | ( cd src ; $(MAKE) cli ) 11 | 12 | .PHONY : clean 13 | clean : 14 | ( cd src ; $(MAKE) clean ) 15 | 16 | .PHONY : clean_libs 17 | clean_libs : 18 | ( cd externals/OpenBLAS/ ; $(MAKE) clean ) 19 | ( cd externals/CXSparse/ ; $(MAKE) clean ) 20 | -------------------------------------------------------------------------------- /src/tests/TestFixtures.h: -------------------------------------------------------------------------------- 1 | #include "fast_fm.h" 2 | #include 3 | 4 | typedef struct TestFixture_T { 5 | cs* X; 6 | cs* X_t; 7 | ffm_vector* y; 8 | ffm_coef* coef; 9 | } TestFixture_T; 10 | 11 | void TestFixtureContructorSimple(TestFixture_T* pFixture, gconstpointer pg); 12 | 13 | void TestFixtureContructorWide(TestFixture_T* pFixture, gconstpointer pg); 14 | 15 | void TestFixtureContructorLong(TestFixture_T* pFixture, gconstpointer pg); 16 | 17 | void TestFixtureDestructor(TestFixture_T* pFixture, gconstpointer pg); 18 | 19 | TestFixture_T* makeTestFixture(int seed, int n_samples, int n_features, int k); 20 | -------------------------------------------------------------------------------- /src/ffm_random.h: -------------------------------------------------------------------------------- 1 | // Author: Immanuel Bayer 2 | // License: BSD 3 clause 3 | 4 | #ifndef FFM_RANDOM_H 5 | #define FFM_RANDOM_H 6 | 7 | #include "fast_fm.h" 8 | 9 | typedef krand_t ffm_rng; 10 | 11 | ffm_rng *ffm_rng_seed(int seed); 12 | void ffm_rng_free(ffm_rng *rng); 13 | double ffm_rand_normal(ffm_rng *kr, double mean, double stddev); 14 | double ffm_rand_uniform(ffm_rng *kr); 15 | double ffm_rand_gamma(ffm_rng *kr, double shape, double scale); 16 | double ffm_rand_exp(ffm_rng *kr, double rate); // lambda (rate) fixed at 1 17 | double ffm_rand_left_trunc_normal(ffm_rng *kr, double mean); 18 | double ffm_rand_left_trunc_normal(ffm_rng *kr, double trunc); 19 | double ffm_rand_right_trunc_normal(ffm_rng *kr, double trunc); 20 | 21 | #endif /* FFM_RANDOM_H */ 22 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | 3 | os: 4 | - linux 5 | - osx 6 | 7 | sudo: required 8 | dist: trusty 9 | 10 | before_install: 11 | - git submodule update --init --recursive 12 | 13 | install: 14 | - echo $TRAVIS_OS_NAME 15 | # build and test library 16 | - make lib 17 | - (cd demo/; make; make demo_lib) 18 | 19 | # install dependencies for cli and testsuite 20 | - if [ "$TRAVIS_OS_NAME" = "osx" ]; then brew install glib gsl argp-standalone; fi 21 | - if [ "$TRAVIS_OS_NAME" = "linux" ]; then sudo apt-get install -y libglib2.0-dev libgsl0-dev; fi 22 | 23 | script: 24 | # run test suite 25 | - (cd src/tests/; make all; make check) 26 | # run cli demo, linking fails on osx 27 | - if [ "$TRAVIS_OS_NAME" = "linux" ]; then make cli; (cd demo/; make; make demo_cli); fi 28 | -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | OBJECTS = kmath.o ffm_random.o ffm_als_mcmc.o ffm_utils.o ffm_sgd.o ffm.o 2 | INCLUDES = -I../externals/OpenBLAS 3 | LDLIBS = -L../externals/CXSparse/Lib -lcxsparse -L../externals/OpenBLAS -lopenblas -lm 4 | CFLAGS = -std=c99 -fPIC -g -Wall -O3 $(INCLUDES) 5 | CC=gcc 6 | 7 | $(P): CFLAGS += -DTest_operations 8 | $(P): $(OBJECTS) 9 | 10 | ffm_als_mcmc.o : fast_fm.h 11 | ffm_utils.o : fast_fm.h 12 | ffm_sgd.o : fast_fm.h 13 | ffm.o : fast_fm.h 14 | cli.o : fast_fm.h 15 | 16 | 17 | lib: $(OBJECTS) 18 | mkdir -p ../bin/ 19 | ar -x ../externals/CXSparse/Lib/libcxsparse.a 20 | ar -x ../externals/OpenBLAS/libopenblas.a 21 | ar rcs ../bin/libfastfm.a $(OBJECTS) *.o 22 | 23 | cli: $(OBJECTS) cli.o 24 | mkdir -p ../bin/ 25 | $(CC) $(OBJECTS) cli.o $(CFLAGS) $(INCLUDES) $(LDLIBS) -o ../bin/fastfm 26 | 27 | .PHONY : clean 28 | clean : 29 | rm -f $(P) $(OBJECTS) 30 | rm -f *.o 31 | rm -f ../bin/libfastfm.a 32 | rm -f ../bin/fastfm 33 | -------------------------------------------------------------------------------- /demo/Makefile: -------------------------------------------------------------------------------- 1 | P= example_als_mcmc example_sgd example_sgd_bpr 2 | VPATH = ../ 3 | OBJECTS = example_als_mcmc.o example_sgd example_sgd_bpr 4 | CFLAGS = -std=c99 -g -Wall -O0 5 | LDLIBS = -L../bin -lfastfm -lm 6 | CC = gcc 7 | 8 | all: $(P) 9 | ( cd .. ; $(MAKE) lib) 10 | 11 | 12 | demo_lib: 13 | ./example_sgd 14 | ./example_sgd_bpr 15 | ./example_als_mcmc 16 | 17 | demo_cli: regression classification 18 | 19 | regression: 20 | ./../bin/fastfm data/train_regression data/test_regression \ 21 | --task regression \ 22 | --rng-seed 1234 \ 23 | --init-var=0.11 \ 24 | --n-iter=123 \ 25 | --solver='mcmc' \ 26 | --rank 7 \ 27 | --l2-reg=.22 28 | 29 | classification: 30 | ./../bin/fastfm data/train_classification data/test_classification \ 31 | --task classification\ 32 | --solver='als' \ 33 | --l2-reg-w=.11 \ 34 | --l2-reg-V=.22 35 | 36 | 37 | ranking: 38 | ./../bin/fastfm data/train_ranking data/test_ranking \ 39 | --task ranking \ 40 | --train-pairs data/train_pairs \ 41 | --solver='sgd' \ 42 | --init-var=0.01 \ 43 | --n-iter=2000 \ 44 | --rank 2 \ 45 | --l2-reg=0.5 \ 46 | --step-size=0.002 \ 47 | --test-predict data/ranking_predictions 48 | 49 | 50 | .PHONY : clean 51 | clean : 52 | rm -f $(P) $(OBJECTS) 53 | rm -f *.o 54 | -------------------------------------------------------------------------------- /src/tests/Makefile: -------------------------------------------------------------------------------- 1 | P= test_ffm_als_mcmc test_ffm_utils test_ffm_sgd test_random 2 | VPATH = ../ 3 | OBJECTS= ffm_als_mcmc.o ffm_sgd.o 4 | INCLUDES = -I.. 5 | CFLAGS = `pkg-config --cflags gsl glib-2.0` -std=c99 -g -Wall -O0 $(INCLUDES) 6 | LDLIBS= `pkg-config --libs gsl glib-2.0` -L../../externals/CXSparse/Lib -lcxsparse 7 | 8 | CC=gcc 9 | 10 | all: test_ffm_sgd test_ffm_als_mcmc test_ffm_utils test_random 11 | ( cd ../../externals/CXSparse ; $(MAKE) library ) 12 | ( export USE_THREAD=0; cd ../../externals/OpenBLAS ; $(MAKE) libs) 13 | 14 | check: 15 | gtester test_random --keep-going --verbose 16 | gtester test_ffm_utils --keep-going --verbose 17 | gtester test_ffm_sgd --keep-going --verbose 18 | gtester test_ffm_als_mcmc --keep-going --verbose 19 | 20 | mem_check: 21 | G_DEBUG=gc-friendly G_SLICE=always-malloc valgrind --leak-check=full ./test_ffm_als_mcmc 22 | 23 | test_ffm_sgd : kmath.o ffm_random.o ffm_utils.o ffm_als_mcmc.o ffm_sgd.o TestFixtures.o 24 | test_ffm_als_mcmc : kmath.o ffm_random.o ffm_utils.o ffm_als_mcmc.o TestFixtures.o 25 | test_ffm_utils : kmath.o ffm_random.o ffm_utils.o ffm_als_mcmc.o 26 | test_random: kmath.o ffm_random.o ffm_utils.o 27 | 28 | kmath.o: fast_fm.h 29 | ffm_random: fast_fm.h 30 | test_ffm_sgd.o: fast_fm.h 31 | ffm_als_mcmc.o : fast_fm.h 32 | ffm_utils.o : fast_fm.h 33 | TestFixtures.o : TestFixtures.h 34 | 35 | 36 | .PHONY : clean 37 | clean : 38 | rm -f $(P) $(OBJECTS) 39 | rm -f *.o 40 | -------------------------------------------------------------------------------- /include/ffm.h: -------------------------------------------------------------------------------- 1 | // Author: Immanuel Bayer 2 | // License: BSD 3 clause 3 | 4 | #ifndef FFM_H 5 | #define FFM_H 6 | #include "../externals/CXSparse/Include/cs.h" 7 | 8 | #define TASK_CLASSIFICATION 10 9 | #define TASK_REGRESSION 20 10 | #define TASK_RANKING 30 11 | 12 | // ############### library interface #################### 13 | 14 | typedef struct ffm_param { 15 | int n_iter; 16 | int k; 17 | double init_sigma; 18 | double init_lambda_w; 19 | double init_lambda_V; 20 | int TASK; 21 | int SOLVER; 22 | double stepsize; 23 | int rng_seed; 24 | 25 | int iter_count; 26 | int ignore_w_0; 27 | int ignore_w; 28 | int warm_start; 29 | 30 | int n_hyper_param; 31 | double *hyper_param; 32 | } ffm_param; 33 | 34 | void ffm_predict(double *w_0, double *w, double *V, cs *X, double *y_pred, 35 | int k); 36 | 37 | void ffm_als_fit(double *w_0, double *w, double *V, cs *X, double *y, 38 | ffm_param *param); 39 | 40 | void ffm_mcmc_fit_predict(double *w_0, double *w, double *V, cs *X_train, 41 | cs *X_test, double *y_train, double *y_pred, 42 | ffm_param *param); 43 | 44 | void ffm_sgd_fit(double *w_0, double *w, double *V, cs *X, double *y, 45 | ffm_param *param); 46 | 47 | void ffm_sgd_bpr_fit(double *w_0, double *w, double *V, cs *X, double *pairs, 48 | int n_pairs, ffm_param *param); 49 | 50 | #endif /* FFM_H */ 51 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | New BSD License 2 | 3 | Copyright (c) 2014–2015 Immanuel Bayer 4 | All rights reserved. 5 | 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are met: 9 | 10 | a. Redistributions of source code must retain the above copyright notice, 11 | this list of conditions and the following disclaimer. 12 | b. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | c. Neither the name of the developers nor the names of 16 | its contributors may be used to endorse or promote products 17 | derived from this software without specific prior written 18 | permission. 19 | 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 | ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR 25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 31 | DAMAGE. 32 | -------------------------------------------------------------------------------- /src/kmath.h: -------------------------------------------------------------------------------- 1 | #ifndef AC_KMATH_H 2 | #define AC_KMATH_H 3 | 4 | #include 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | /********************************** 11 | * Pseudo-random number generator * 12 | **********************************/ 13 | 14 | typedef uint64_t krint64_t; 15 | 16 | struct _krand_t; 17 | typedef struct _krand_t krand_t; 18 | 19 | #define kr_drand(_kr) ((kr_rand(_kr) >> 11) * (1.0/9007199254740992.0)) 20 | #define kr_sample(_kr, _k, _cnt) ((*(_cnt))++ < (_k)? *(_cnt) - 1 : kr_rand(_kr) % *(_cnt)) 21 | 22 | krand_t *kr_srand(krint64_t seed); 23 | krint64_t kr_rand(krand_t *kr); 24 | 25 | /************************** 26 | * Non-linear programming * 27 | **************************/ 28 | 29 | #define KMIN_RADIUS 0.5 30 | #define KMIN_EPS 1e-7 31 | #define KMIN_MAXCALL 50000 32 | 33 | typedef double (*kmin_f)(int, double*, void*); 34 | typedef double (*kmin1_f)(double, void*); 35 | 36 | double kmin_hj(kmin_f func, int n, double *x, void *data, double r, double eps, int max_calls); // Hooke-Jeeves' 37 | double kmin_brent(kmin1_f func, double a, double b, void *data, double tol, double *xmin); // Brent's 1-dimenssion 38 | 39 | /********************* 40 | * Special functions * 41 | *********************/ 42 | 43 | double kf_lgamma(double z); // log gamma function 44 | double kf_erfc(double x); // complementary error function 45 | double kf_gammap(double s, double z); // regularized lower incomplete gamma function 46 | double kf_gammaq(double s, double z); // regularized upper incomplete gamma function 47 | double kf_betai(double a, double b, double x); // regularized incomplete beta function 48 | 49 | #ifdef __cplusplus 50 | } 51 | #endif 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /demo/example_sgd.c: -------------------------------------------------------------------------------- 1 | // Author: Immanuel Bayer 2 | // License: BSD 3 clause 3 | 4 | #include 5 | #include "../include/ffm.h" 6 | 7 | cs *create_design_matrix() { 8 | // X = | 6 1 | 9 | // | 2 3 | 10 | // | 3 0 | 11 | // | 6 1 | 12 | // | 4 5 | 13 | int m = 5; 14 | int n = 2; 15 | cs *X = cs_spalloc(m, n, m * n, 1, 1); /* create triplet identity matrix */ 16 | cs_entry(X, 0, 0, 6); 17 | cs_entry(X, 0, 1, 1); 18 | cs_entry(X, 1, 0, 2); 19 | cs_entry(X, 1, 1, 3); 20 | cs_entry(X, 2, 0, 3); 21 | cs_entry(X, 3, 0, 6); 22 | cs_entry(X, 3, 1, 1); 23 | cs_entry(X, 4, 0, 4); 24 | cs_entry(X, 4, 1, 5); 25 | cs *X_csc = cs_compress(X); /* A = compressed-column form of T */ 26 | cs *X_t = cs_transpose(X_csc, 1); 27 | cs_spfree(X); 28 | cs_spfree(X_t); 29 | return X_csc; 30 | } 31 | 32 | void sgd_regression_example() { 33 | printf("### SGD regression example ###\n"); 34 | 35 | // int n_features = 2; 36 | int n_samples = 5; 37 | int k = 2; // # of hiden variables per feature 38 | 39 | double y[] = {298, 266, 29, 298, 848}; 40 | cs *X = create_design_matrix(); 41 | cs *X_t = cs_transpose(X, 1); 42 | 43 | ffm_param param = {.n_iter = 2000, 44 | .init_sigma = .01, 45 | .init_lambda_w = 0.5, 46 | .init_lambda_V = 0.5, 47 | .stepsize = .002, 48 | .TASK = TASK_REGRESSION}; 49 | // allocate fm parameter 50 | double w_0 = 0; 51 | double w[2]; 52 | double V[2 * 2]; // k * n_features 53 | 54 | ffm_sgd_fit(&w_0, &w[0], &V[0], X_t, &y[0], ¶m); 55 | 56 | double y_pred[5]; // allocate space for the predictions 57 | ffm_predict(&w_0, &w[0], &V[0], X, y_pred, k); 58 | 59 | printf("y_true: ["); 60 | for (int i = 0; i < n_samples; i++) printf(" %.2f,", y[i]); 61 | printf("]\n"); 62 | printf("y_pred: ["); 63 | for (int i = 0; i < n_samples; i++) printf(" %.2f,", y_pred[i]); 64 | printf("]\n\n"); 65 | } 66 | 67 | int main(void) { sgd_regression_example(); } 68 | -------------------------------------------------------------------------------- /src/ffm_random.c: -------------------------------------------------------------------------------- 1 | // Author: Immanuel Bayer 2 | // License: BSD 3 clause 3 | 4 | #include "fast_fm.h" 5 | 6 | ffm_rng *ffm_rng_seed(int seed) { return kr_srand(seed); } 7 | 8 | double ffm_rand_uniform(ffm_rng *kr) { return kr_rand(kr) / (double)ULONG_MAX; } 9 | 10 | void ffm_rng_free(ffm_rng *rng) { free(rng); } 11 | 12 | // Box-Muller transform 13 | double ffm_rand_normal(ffm_rng *kr, double mean, double stddev) { 14 | double x, y, r; 15 | do { 16 | x = 2.0 * ffm_rand_uniform(kr) - 1; 17 | y = 2.0 * ffm_rand_uniform(kr) - 1; 18 | r = x * x + y * y; 19 | } while (r == 0.0 || r > 1.0); 20 | double d = sqrt(-2.0 * log(r) / r); 21 | return x * d * stddev + mean; 22 | } 23 | 24 | double ffm_rand_exp(ffm_rng *kr, double rate) { 25 | return -log(ffm_rand_uniform(kr)) / rate; 26 | } 27 | 28 | // A Simple Method for Generation Gamma Variables (Section 5) 29 | double ffm_rand_gamma(ffm_rng *kr, double shape, double scale) { 30 | assert(scale > 0); 31 | 32 | if (shape < 1.) { 33 | double r = ffm_rand_uniform(kr); 34 | return ffm_rand_gamma(kr, 1.0 + shape, scale) * pow(r, 1.0 / shape); 35 | } 36 | 37 | double d, c, x, v, u; 38 | d = shape - 1. / 3.; 39 | c = 1. / sqrt(9. * d); 40 | while (true) { 41 | do { 42 | x = ffm_rand_normal(kr, 0, 1); 43 | v = 1. + c * x; 44 | } while (v <= 0.); 45 | v = v * v * v; 46 | u = ffm_rand_uniform(kr); 47 | if (u < 1. - .0331 * (x * x) * (x * x)) return scale * d * v; 48 | if (log(u) < 0.5 * x * x + d * (1. - v + log(v))) return scale * d * v; 49 | } 50 | } 51 | 52 | // normal truncated left 53 | // Robert: Simulation of truncated normal variables 54 | double ffm_rand_left_trunc_normal(ffm_rng *kr, double trunc) { 55 | /* 56 | if (trunc <= 0.) 57 | while (true) 58 | { 59 | double r = ffm_rand_normal(kr, trunc, 1); 60 | if (r > trunc) return r; 61 | } 62 | */ 63 | 64 | double alpha_star = 0.5 * (trunc + sqrt(trunc * trunc + 4.0)); 65 | while (true) { 66 | double z = trunc + ffm_rand_exp(kr, alpha_star); 67 | double tmp = z - alpha_star; 68 | double g = exp(-(tmp * tmp) / 2.0); 69 | double u = ffm_rand_uniform(kr); 70 | if (u <= g) return z; 71 | } 72 | } 73 | 74 | double ffm_rand_right_trunc_normal(ffm_rng *kr, double trunc) { 75 | return ffm_rand_left_trunc_normal(kr, trunc); 76 | } 77 | -------------------------------------------------------------------------------- /demo/example_sgd_bpr.c: -------------------------------------------------------------------------------- 1 | // Author: Immanuel Bayer 2 | // License: BSD 3 clause 3 | 4 | #include 5 | #include "../include/ffm.h" 6 | 7 | cs *create_design_matrix() { 8 | // X = | 6 1 | 9 | // | 2 3 | 10 | // | 3 0 | 11 | // | 6 1 | 12 | // | 4 5 | 13 | int m = 5; 14 | int n = 2; 15 | cs *X = cs_spalloc(m, n, m * n, 1, 1); /* create triplet identity matrix */ 16 | cs_entry(X, 0, 0, 6); 17 | cs_entry(X, 0, 1, 1); 18 | cs_entry(X, 1, 0, 2); 19 | cs_entry(X, 1, 1, 3); 20 | cs_entry(X, 2, 0, 3); 21 | cs_entry(X, 3, 0, 6); 22 | cs_entry(X, 3, 1, 1); 23 | cs_entry(X, 4, 0, 4); 24 | cs_entry(X, 4, 1, 5); 25 | cs *X_csc = cs_compress(X); /* A = compressed-column form of T */ 26 | cs *X_t = cs_transpose(X_csc, 1); 27 | cs_spfree(X); 28 | cs_spfree(X_t); 29 | return X_csc; 30 | } 31 | 32 | void sgd_bpr_example() { 33 | printf("### SGD BPR example ###\n"); 34 | 35 | // int n_features = 2; 36 | int n_samples = 5; 37 | int k = 2; // # of hiden variables per feature 38 | 39 | // double y[] = {298, 266, 29, 298, 848}; 40 | double y[] = {2, 1, 0, 3, 4}; 41 | double pairs[10][2] = {{0, 1}, 42 | {0, 2}, 43 | {3, 0}, 44 | {4, 0}, 45 | {1, 2}, 46 | {3, 1}, 47 | {4, 1}, 48 | {3, 2}, 49 | {4, 2}, 50 | {4, 3}}; 51 | int n_pairs = 10; 52 | cs *X = create_design_matrix(); 53 | cs *X_t = cs_transpose(X, 1); 54 | 55 | ffm_param param = {.n_iter = 2000, 56 | .k = k, 57 | .init_sigma = .01, 58 | .init_lambda_w = .5, 59 | .init_lambda_V = .5, 60 | .stepsize = 0.002, 61 | .TASK = TASK_RANKING}; 62 | // allocate fm parameter 63 | double w_0 = 0; 64 | double w[2]; 65 | double V[2 * 2]; // k * n_features 66 | // regularizatio 67 | ffm_sgd_bpr_fit(&w_0, &w[0], &V[0], X_t, &pairs[0][0], n_pairs, ¶m); 68 | 69 | double y_pred[5]; // allocate space for the predictions 70 | ffm_predict(&w_0, &w[0], &V[0], X, y_pred, k); 71 | 72 | printf("y_true: ["); 73 | for (int i = 0; i < n_samples; i++) printf(" %.2f,", y[i]); 74 | printf("]\n"); 75 | printf("y_pred: ["); 76 | for (int i = 0; i < n_samples; i++) printf(" %.2f,", y_pred[i]); 77 | printf("]\n\n"); 78 | } 79 | 80 | int main(void) { sgd_bpr_example(); } 81 | -------------------------------------------------------------------------------- /src/kvec.h: -------------------------------------------------------------------------------- 1 | /* The MIT License 2 | 3 | Copyright (c) 2008, by Attractive Chaos 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | */ 25 | 26 | /* 27 | An example: 28 | 29 | #include "kvec.h" 30 | int main() { 31 | kvec_t(int) array; 32 | kv_init(array); 33 | kv_push(int, array, 10); // append 34 | kv_a(int, array, 20) = 5; // dynamic 35 | kv_A(array, 20) = 4; // static 36 | kv_destroy(array); 37 | return 0; 38 | } 39 | */ 40 | 41 | /* 42 | 2008-09-22 (0.1.0): 43 | 44 | * The initial version. 45 | 46 | */ 47 | 48 | #ifndef AC_KVEC_H 49 | #define AC_KVEC_H 50 | 51 | #include 52 | 53 | #define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) 54 | 55 | #define kvec_t(type) struct { size_t n, m; type *a; } 56 | #define kv_init(v) ((v).n = (v).m = 0, (v).a = 0) 57 | #define kv_destroy(v) free((v).a) 58 | #define kv_A(v, i) ((v).a[(i)]) 59 | #define kv_pop(v) ((v).a[--(v).n]) 60 | #define kv_size(v) ((v).n) 61 | #define kv_max(v) ((v).m) 62 | 63 | #define kv_resize(type, v, s) ((v).m = (s), (v).a = (type*)realloc((v).a, sizeof(type) * (v).m)) 64 | 65 | #define kv_copy(type, v1, v0) do { \ 66 | if ((v1).m < (v0).n) kv_resize(type, v1, (v0).n); \ 67 | (v1).n = (v0).n; \ 68 | memcpy((v1).a, (v0).a, sizeof(type) * (v0).n); \ 69 | } while (0) \ 70 | 71 | #define kv_push(type, v, x) do { \ 72 | if ((v).n == (v).m) { \ 73 | (v).m = (v).m? (v).m<<1 : 2; \ 74 | (v).a = (type*)realloc((v).a, sizeof(type) * (v).m); \ 75 | } \ 76 | (v).a[(v).n++] = (x); \ 77 | } while (0) 78 | 79 | #define kv_pushp(type, v) (((v).n == (v).m)? \ 80 | ((v).m = ((v).m? (v).m<<1 : 2), \ 81 | (v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \ 82 | : 0), ((v).a + ((v).n++)) 83 | 84 | #define kv_a(type, v, i) (((v).m <= (size_t)(i)? \ 85 | ((v).m = (v).n = (i) + 1, kv_roundup32((v).m), \ 86 | (v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \ 87 | : (v).n <= (size_t)(i)? (v).n = (i) + 1 \ 88 | : 0), (v).a[(i)]) 89 | 90 | #endif 91 | -------------------------------------------------------------------------------- /demo/example_als_mcmc.c: -------------------------------------------------------------------------------- 1 | // Author: Immanuel Bayer 2 | // License: BSD 3 clause 3 | 4 | #include 5 | #include "../include/ffm.h" 6 | 7 | cs *create_design_matrix() { 8 | // X = | 6 1 | 9 | // | 2 3 | 10 | // | 3 0 | 11 | // | 6 1 | 12 | // | 4 5 | 13 | int m = 5; 14 | int n = 2; 15 | cs *X = cs_spalloc(m, n, m * n, 1, 1); /* create triplet identity matrix */ 16 | cs_entry(X, 0, 0, 6); 17 | cs_entry(X, 0, 1, 1); 18 | cs_entry(X, 1, 0, 2); 19 | cs_entry(X, 1, 1, 3); 20 | cs_entry(X, 2, 0, 3); 21 | cs_entry(X, 3, 0, 6); 22 | cs_entry(X, 3, 1, 1); 23 | cs_entry(X, 4, 0, 4); 24 | cs_entry(X, 4, 1, 5); 25 | cs *X_csc = cs_compress(X); /* A = compressed-column form of T */ 26 | cs *X_t = cs_transpose(X_csc, 1); 27 | cs_spfree(X); 28 | cs_spfree(X_t); 29 | return X_csc; 30 | } 31 | 32 | void als_regression_example() { 33 | printf("### ALS regression example ###\n"); 34 | 35 | // int n_features = 2; 36 | int n_samples = 5; 37 | int k = 2; // # of hiden variables per feature 38 | 39 | double y[] = {298, 266, 29, 298, 848}; 40 | cs *X = create_design_matrix(); 41 | 42 | // allocate space for all hyper-parameter 43 | int w_groups = 1; 44 | int n_hyper_param = 1 + 2 * k + 2 * w_groups; 45 | double hyper_param[7]; // 7 = n_hyper_param 46 | 47 | ffm_param param = {.n_iter = 1000, 48 | .k = k, 49 | .init_sigma = 0.1, 50 | .TASK = TASK_REGRESSION, 51 | .init_lambda_w = 1, 52 | .init_lambda_V = 1, 53 | .hyper_param = hyper_param, 54 | .n_hyper_param = n_hyper_param}; 55 | // allocate fm parameter 56 | double w_0 = 0; 57 | double w[2]; 58 | double V[2 * 2]; // k * n_features 59 | // regularization 60 | 61 | ffm_als_fit(&w_0, &w[0], &V[0], X, &y[0], ¶m); 62 | 63 | double y_pred[5]; // allocate space for the predictions 64 | ffm_predict(&w_0, &w[0], &V[0], X, y_pred, k); 65 | 66 | printf("y_true: ["); 67 | for (int i = 0; i < n_samples; i++) printf(" %.2f,", y[i]); 68 | printf("]\n"); 69 | printf("y_pred: ["); 70 | for (int i = 0; i < n_samples; i++) printf(" %.2f,", y_pred[i]); 71 | printf("]\n\n"); 72 | } 73 | 74 | void mcmc_regression_example() { 75 | printf("### MCMC regression example ###\n"); 76 | 77 | // int n_features = 2; 78 | int n_samples = 5; 79 | int k = 2; // # of hiden variables per feature 80 | 81 | double y[] = {298, 266, 29, 298, 848}; 82 | cs *X = create_design_matrix(); 83 | 84 | // allocate space for all hyper-parameter 85 | int w_groups = 1; 86 | int n_hyper_param = 1 + 2 * k + 2 * w_groups; 87 | double hyper_param[7]; // 7 = n_hyper_param 88 | 89 | ffm_param param = {.n_iter = 1000, 90 | .k = k, 91 | .init_sigma = 0.1, 92 | .TASK = TASK_REGRESSION, 93 | .init_lambda_w = 1, 94 | .init_lambda_V = 1, 95 | .hyper_param = hyper_param, 96 | .n_hyper_param = n_hyper_param}; 97 | // allocate fm parameter 98 | double w_0 = 0; 99 | double w[2]; 100 | double V[2 * 2]; // k * n_features 101 | 102 | double y_pred[5]; // allocate space for the predictions 103 | ffm_mcmc_fit_predict(&w_0, &w[0], &V[0], X, X, &y[0], &y_pred[0], ¶m); 104 | 105 | printf("y_true: ["); 106 | for (int i = 0; i < n_samples; i++) printf(" %.2f,", y[i]); 107 | printf("]\n"); 108 | printf("y_pred: ["); 109 | for (int i = 0; i < n_samples; i++) printf(" %.2f,", y_pred[i]); 110 | printf("]\n\n"); 111 | } 112 | 113 | int main(void) { 114 | als_regression_example(); 115 | mcmc_regression_example(); 116 | } 117 | -------------------------------------------------------------------------------- /src/fast_fm.h: -------------------------------------------------------------------------------- 1 | // Author: Immanuel Bayer 2 | // License: BSD 3 clause 3 | 4 | #ifndef FAST_MF_H 5 | #define FAST_MF_H 6 | 7 | #ifndef _GNU_SOURCE 8 | #define _GNU_SOURCE 9 | #endif 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include "../externals/CXSparse/Include/cs.h" 18 | #include "../externals/OpenBLAS/cblas.h" 19 | 20 | #include "kmath.h" 21 | #include "../include/ffm.h" 22 | 23 | #define SOLVER_ALS 1 24 | #define SOLVER_MCMC 2 25 | #define SOLVER_SGD 3 26 | 27 | typedef struct ffm_vector { 28 | int size; 29 | double *data; 30 | int owner; 31 | } ffm_vector; 32 | 33 | typedef struct ffm_matrix { 34 | int size0; // number of row 35 | int size1; // number of columns 36 | double *data; 37 | int owner; 38 | } ffm_matrix; 39 | 40 | #define Fn_apply(type, fn, ...) \ 41 | { \ 42 | void *stopper_for_apply = (int[]){0}; \ 43 | type **list_for_apply = (type *[]){__VA_ARGS__, stopper_for_apply}; \ 44 | for (int i = 0; list_for_apply[i] != stopper_for_apply; i++) \ 45 | fn(list_for_apply[i]); \ 46 | } 47 | #define ffm_vector_free_all(...) \ 48 | Fn_apply(ffm_vector, ffm_vector_free, __VA_ARGS__); 49 | #define ffm_matrix_free_all(...) \ 50 | Fn_apply(ffm_matrix, ffm_matrix_free, __VA_ARGS__); 51 | 52 | typedef struct ffm_coef { 53 | double w_0; 54 | ffm_vector *w; 55 | ffm_matrix *V; 56 | // hyperparameter 57 | double alpha; 58 | double lambda_w, mu_w; 59 | ffm_vector *lambda_V; 60 | ffm_vector *mu_V; 61 | } ffm_coef; 62 | 63 | typedef struct fm_data { 64 | ffm_vector *y; 65 | cs *X; 66 | } fm_data; 67 | 68 | #include "ffm_utils.h" 69 | #include "ffm_random.h" 70 | 71 | void sparse_fit(ffm_coef *coef, cs *X, cs *X_test, ffm_vector *y, 72 | ffm_vector *y_pred, ffm_param param); 73 | 74 | void sparse_predict(ffm_coef *coef, cs *X, ffm_vector *y_pred); 75 | 76 | void row_predict(ffm_coef *coef, cs *A, ffm_vector *y_pred); 77 | 78 | void col_predict(ffm_coef *coef, cs *A, ffm_vector *y_pred); 79 | 80 | // ############### internal functions for ALS / MCMC #################### 81 | 82 | int eval_second_order_term(ffm_matrix *V, cs *X, ffm_vector *result); 83 | 84 | void update_second_order_error(int column, cs *X, ffm_vector *a_theta_v, 85 | double delta, ffm_vector *error); 86 | 87 | void sparse_v_lf_frac(double *sum_denominator, double *sum_nominator, cs *A, 88 | int col_index, ffm_vector *err, ffm_vector *cache, 89 | ffm_vector *a_theta_v, double v_lf); 90 | 91 | void sample_hyper_parameter(ffm_coef *coef, ffm_vector *err, ffm_rng *rng); 92 | 93 | void map_update_target(ffm_vector *y_pred, ffm_vector *sample_target, 94 | ffm_vector *y_train); 95 | 96 | void sample_target(ffm_rng *r, ffm_vector *y_pred, ffm_vector *z_target, 97 | ffm_vector *y_true); 98 | 99 | // ############### internal functions for SGD #################### 100 | 101 | void ffm_fit_sgd(ffm_coef *coef, cs *X, ffm_vector *y, ffm_param *param); 102 | 103 | double ffm_predict_sample(ffm_coef *coef, cs *X, int sample_row); 104 | 105 | void ffm_fit_sgd_bpr(ffm_coef *coef, cs *A, ffm_matrix *pairs, ffm_param param); 106 | 107 | void update_second_order_bpr(cs *A, ffm_matrix *V, double cache_n, 108 | double cache_p, double pairs_err, double step_size, 109 | double lambda_V, int sample_row_p, 110 | int sample_row_n, int f); 111 | 112 | ffm_coef *extract_gradient(ffm_coef *coef_t0, ffm_coef *coef_t1, 113 | double stepsize); 114 | 115 | double l2_penalty(ffm_coef *coef); 116 | #endif /* FAST_MF_H */ 117 | -------------------------------------------------------------------------------- /src/ffm_utils.h: -------------------------------------------------------------------------------- 1 | // Author: Immanuel Bayer 2 | // License: BSD 3 clause 3 | 4 | #ifndef FFM_UTILS_H 5 | #define FFM_UTILS_H 6 | 7 | #include "fast_fm.h" 8 | 9 | // ########################### ffm scalar ################################### 10 | double ffm_sigmoid(double x); 11 | double ffm_normal_cdf(double x); 12 | double ffm_normal_pdf(double x); 13 | double ffm_pow_2(double x); 14 | 15 | // ########################### ffm_vector #################################### 16 | double ffm_vector_variance(ffm_vector *x); 17 | double ffm_vector_mean(ffm_vector *x); 18 | void ffm_vector_normal_cdf(ffm_vector *x); 19 | int ffm_vector_free(ffm_vector *a); 20 | ffm_vector *ffm_vector_alloc(int size); 21 | ffm_vector *ffm_vector_calloc(int size); 22 | int ffm_vector_memcpy(ffm_vector *a, ffm_vector const *b); 23 | int ffm_vector_add(ffm_vector *a, ffm_vector const *b); 24 | int ffm_vector_sub(ffm_vector *a, ffm_vector const *b); 25 | int ffm_vector_scale(ffm_vector *a, double b); 26 | int ffm_vector_mul(ffm_vector *a, ffm_vector const *b); 27 | double ffm_vector_sum(ffm_vector *a); 28 | void ffm_vector_set_all(ffm_vector *a, double b); 29 | void ffm_vector_set(ffm_vector *a, int i, double alpha); 30 | double ffm_vector_get(ffm_vector *a, int i); 31 | void ffm_vector_add_constant(ffm_vector *a, double alpha); 32 | void ffm_vector_printf(ffm_vector *a); 33 | void ffm_vector_sort(ffm_vector *y); 34 | void ffm_vector_make_labels(ffm_vector *y); // use media as class boundary 35 | double ffm_vector_median(ffm_vector *y); 36 | double ffm_vector_accuracy(ffm_vector *y_true, ffm_vector *y_pred); 37 | ffm_matrix *ffm_vector_to_rank_comparision(ffm_vector *y); 38 | bool ffm_vector_contains(ffm_vector *y, double value, int cutoff); 39 | ffm_vector *ffm_vector_get_order(ffm_vector *y); 40 | double ffm_vector_kendall_tau(ffm_vector *a, ffm_vector *b); 41 | // index starts at 0 42 | void ffm_vector_update_mean(ffm_vector *mean, int index, ffm_vector *x); 43 | double ffm_vector_mean_squared_error(ffm_vector *y_true, ffm_vector *y_pred); 44 | // ########################### ffm_matrix #################################### 45 | ffm_matrix *ffm_matrix_from_file(char *path); 46 | void ffm_matrix_printf(ffm_matrix *X); 47 | int ffm_matrix_free(ffm_matrix *a); 48 | ffm_matrix *ffm_matrix_alloc(int size0, int size1); 49 | ffm_matrix *ffm_matrix_calloc(int size0, int size1); 50 | double *ffm_matrix_get_row_ptr(ffm_matrix *X, int i); 51 | void ffm_matrix_set(ffm_matrix *X, int i, int j, double a); 52 | double ffm_matrix_get(ffm_matrix *X, int i, int j); 53 | // ########################### cblas helper ################################# 54 | 55 | double ffm_blas_ddot(ffm_vector *x, ffm_vector const *y); 56 | // y <-- alpha*x + y 57 | void ffm_blas_daxpy(double alpha, ffm_vector *x, ffm_vector const *y); 58 | // ||x||_2 / euclidean norm 59 | double ffm_blas_dnrm2(ffm_vector *x); 60 | // dgemv y := alpha*A*x + beta*y, 61 | // --------------- utils --------------------------------- 62 | 63 | // read svm_light file, if target is omitted creates 64 | // dummy target vector of zeros 65 | fm_data read_svm_light_file(char *path); 66 | 67 | int Cs_write(FILE *f, const cs *A); 68 | 69 | void free_fm_data(fm_data *data); 70 | 71 | void init_ffm_coef(ffm_coef *coef, ffm_param param); 72 | ffm_coef *alloc_fm_coef(int n_features, int k, int ignore_w); 73 | 74 | void free_ffm_coef(ffm_coef *coef); 75 | 76 | double ffm_r2_score(ffm_vector *y_true, ffm_vector *y_pred); 77 | 78 | double ffm_average_precision_at_cutoff(ffm_vector *y_true, ffm_vector *y_pred, 79 | int cutoff); 80 | 81 | // y = A*x+y 82 | // with A in RowMajor format. 83 | int Cs_row_gaxpy(const cs *A, const double *x, double *y); 84 | 85 | /* y = alpha*A[:,j]*x+y */ 86 | int Cs_daxpy(const cs *A, int col_index, double alpha, const double *x, 87 | double *y); 88 | 89 | /* y = alpha*A[:,j]+y */ 90 | int Cs_scal_apy(const cs *A, int col_index, double alpha, double *y); 91 | 92 | /* y = alpha*A[:,j]^2+y */ 93 | int Cs_scal_a2py(const cs *A, int col_index, double alpha, double *y); 94 | 95 | /* y = X^2.sum(axis=0) */ 96 | int Cs_col_norm(const cs *A, ffm_vector *y); 97 | 98 | /* y = */ 99 | double Cs_ddot(const cs *A, int col_index, double *y); 100 | 101 | #endif /* FFM_UTILS_H */ 102 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Citing fastFM 2 | ============= 3 | 4 | The library fastFM is an academic project. The time and resources spent developing fastFM are therefore justifyed 5 | by the number of citations of the software. If you publish scienctific articles using fastFM, pleease cite the following article (bibtex entry `citation.bib `_). 6 | 7 | Bayer, I. "fastFM: A Library for Factorization Machines" Journal of Machine Learning Research 17, pp. 1-5 (2016) 8 | 9 | 10 | 11 | fastFM: A Library for Factorization Machines 12 | ============================================ 13 | 14 | .. image:: https://travis-ci.org/ibayer/fastFM-core.svg?branch=master 15 | :target: https://travis-ci.org/ibayer/fastFM-core 16 | 17 | 18 | .. image:: https://img.shields.io/badge/platform-OSX|Linux-lightgrey.svg 19 | :target: https://travis-ci.org/ibayer/fastFM 20 | 21 | .. image:: https://img.shields.io/pypi/l/Django.svg 22 | :target: https://travis-ci.org/ibayer/fastFM 23 | 24 | This repository contains the source code for the fastFM C library and the stand-alone 25 | comand line interface (cli). In general we recommend to use fastFM with the `Python 26 | interface `_. 27 | 28 | Usage 29 | ----- 30 | 31 | 32 | .. code-block:: bash 33 | 34 | fastFM-core/bin/bin/fastfm data/train_regression data/test_regression \ 35 | --task regression \ 36 | --rng-seed 1234 \ 37 | --init-var=0.11 \ 38 | --n-iter=123 \ 39 | --solver='mcmc' \ 40 | --rank 7 \ 41 | --l2-reg=.22 42 | 43 | 44 | Examples on how to use the other command line options options, including example data, can be found 45 | in ``fastFM-core/demo/Makefile``. The ``demo/`` folder contains examples showing how to use 46 | fastFM as C library. 47 | 48 | +----------------+------------------+-----------------------------+ 49 | | Task | Solver | Loss | 50 | +================+==================+=============================+ 51 | | Regression | als, mcmc, sgd | Square Loss | 52 | +----------------+------------------+-----------------------------+ 53 | | Classification | als, mcmc, sgd | Probit(Map), Probit, Sigmoid| 54 | +----------------+------------------+-----------------------------+ 55 | | Ranking | sgd | BPR | 56 | +----------------+------------------+-----------------------------+ 57 | *Supported solvers and tasks* 58 | 59 | Installation 60 | ------------ 61 | 62 | **OS X:** 63 | Library compiles on OSX, however console interface doesn't. 64 | 65 | .. code-block:: bash 66 | 67 | # Install cblas (Linux only). 68 | $ sudo apt-get install libatlas-base-dev 69 | 70 | # Clone the repro including submodules (or clone + `git submodule update --init --recursive`) 71 | $ git clone --recursive https://github.com/ibayer/fastFM-core.git 72 | 73 | # Build library 74 | $ cd fastFM-core/; make; 75 | 76 | # Build command line interface (currently this works on linux only) 77 | $ make cli 78 | 79 | Tests 80 | ----- 81 | 82 | **OS X:** 83 | Recommended way to manage dependencies is `Homebrew package manager `_. 84 | If you have brew installed, dependencies can be installed by running command 85 | ``brew install glib gsl argp-standalone``. 86 | 87 | .. code-block:: bash 88 | 89 | # The tests require the glib and gsl library (Linux, for OSX see above). 90 | $ sudo apt-get install libglib2.0-dev libgsl0-dev 91 | 92 | $ cd fastFM-core/src/tests 93 | 94 | # Build the tests 95 | $ make 96 | 97 | # Run all tests 98 | $ make check 99 | 100 | 101 | Contribution 102 | ------------ 103 | 104 | * Star this repository: keeps contributors motivated 105 | * Open a issue: report bugs or suggest improvements 106 | * Fix errors in the documentation: small changes matter 107 | * Contribute code 108 | 109 | **Contributions are very wellcome!** Since this project lives on github we reommend 110 | to open a pull request (PR) for code contributions as early as possible. This is the 111 | fastest way to get feedback and allows `Travis CI `_ to run checks on your changes. 112 | 113 | Development Guidlines 114 | --------------------- 115 | 116 | * check coding style (google) ``lang-format-3.5 -style=google -i `` 117 | * static code analysis ``clang-tidy-3.5 -fix -- I.`` 118 | * run valgrind memory check on sparse_test.c ``make mem_check`` 119 | * run valgrind to check for errors ``valgrind -v ./a.out >& out`` 120 | 121 | 122 | **Contributors** 123 | 124 | * takuti 125 | * altimin 126 | * ibayer 127 | 128 | License: BSD 129 | ------------ 130 | -------------------------------------------------------------------------------- /src/ffm.c: -------------------------------------------------------------------------------- 1 | // Author: Immanuel Bayer 2 | // License: BSD 3 clause 3 | 4 | #include "fast_fm.h" 5 | /* 6 | * Interface for external call 7 | * used by the python wrapper and cli interface 8 | */ 9 | 10 | void ffm_predict(double *w_0, double *w, double *V, cs *X, double *y_pred, 11 | int k) { 12 | int n_samples = X->m; 13 | int n_features = X->n; 14 | ffm_vector ffm_w = {.size = n_features, .data = w, .owner = 0}; 15 | ffm_matrix ffm_V = {.size0 = k, .size1 = n_features, .data = V, .owner = 0}; 16 | ffm_coef coef = {.w_0 = *w_0, .w = &ffm_w, .V = &ffm_V}; 17 | 18 | ffm_vector ffm_y_pred = {.size = n_samples, .data = y_pred, .owner = 0}; 19 | sparse_predict(&coef, X, &ffm_y_pred); 20 | } 21 | 22 | void ffm_als_fit(double *w_0, double *w, double *V, cs *X, double *y, 23 | ffm_param *param) { 24 | param->SOLVER = SOLVER_ALS; 25 | int n_samples = X->m; 26 | int n_features = X->n; 27 | 28 | ffm_vector ffm_w = {.size = n_features, .data = w, .owner = 0}; 29 | ffm_matrix ffm_V = { 30 | .size0 = param->k, .size1 = n_features, .data = V, .owner = 0}; 31 | ffm_coef coef = { 32 | .w_0 = *w_0, .w = &ffm_w, .V = &ffm_V, .lambda_w = param->init_lambda_w}; 33 | if (param->k > 0) { 34 | coef.lambda_V = ffm_vector_alloc(param->k); 35 | coef.mu_V = ffm_vector_alloc(param->k); 36 | ffm_vector_set_all(coef.lambda_V, param->init_lambda_V); 37 | } else { 38 | coef.lambda_V = NULL; 39 | coef.mu_V = NULL; 40 | } 41 | 42 | ffm_vector ffm_y = {.size = n_samples, .data = y, .owner = 0}; 43 | sparse_fit(&coef, X, NULL, &ffm_y, NULL, *param); 44 | 45 | // copy the last coef values back into the python memory 46 | *w_0 = coef.w_0; 47 | 48 | if (param->k > 0) ffm_vector_free_all(coef.lambda_V, coef.mu_V); 49 | } 50 | 51 | void ffm_mcmc_fit_predict(double *w_0, double *w, double *V, cs *X_train, 52 | cs *X_test, double *y_train, double *y_pred, 53 | ffm_param *param) { 54 | param->SOLVER = SOLVER_MCMC; 55 | int k = param->k; 56 | double *hyper_param = param->hyper_param; 57 | int n_test_samples = X_test->m; 58 | int n_train_samples = X_train->m; 59 | int n_features = X_train->n; 60 | ffm_vector ffm_w = {.size = n_features, .data = w, .owner = 0}; 61 | ffm_matrix ffm_V = { 62 | .size0 = param->k, .size1 = n_features, .data = V, .owner = 0}; 63 | ffm_coef coef = {.w_0 = *w_0, 64 | .w = &ffm_w, 65 | .V = &ffm_V, 66 | .lambda_w = param->init_lambda_w, 67 | .alpha = 1, 68 | .mu_w = 0}; 69 | if (k > 0) { 70 | coef.lambda_V = ffm_vector_alloc(param->k); 71 | coef.mu_V = ffm_vector_alloc(param->k); 72 | } else { 73 | coef.lambda_V = NULL; 74 | coef.mu_V = NULL; 75 | } 76 | 77 | // set inital values for hyperparameter 78 | int w_groups = 1; 79 | assert(param->n_hyper_param == 1 + 2 * k + 2 * w_groups && 80 | "hyper_parameter vector has wrong size"); 81 | if (param->warm_start) { 82 | coef.alpha = hyper_param[0]; 83 | coef.lambda_w = hyper_param[1]; 84 | // copy V lambda's over 85 | for (int i = 0; i < k; i++) 86 | ffm_vector_set(coef.lambda_V, i, hyper_param[i + 1 + w_groups]); 87 | coef.mu_w = hyper_param[k + 1 + w_groups]; 88 | // copy V mu's over 89 | for (int i = 0; i < k; i++) 90 | ffm_vector_set(coef.mu_V, i, hyper_param[i + 1 + (2 * w_groups) + k]); 91 | } 92 | 93 | ffm_vector ffm_y_train = { 94 | .size = n_train_samples, .data = y_train, .owner = 0}; 95 | ffm_vector ffm_y_pred = {.size = n_test_samples, .data = y_pred, .owner = 0}; 96 | sparse_fit(&coef, X_train, X_test, &ffm_y_train, &ffm_y_pred, *param); 97 | // copy the last coef values back into the python memory 98 | *w_0 = coef.w_0; 99 | 100 | // copy current hyperparameter back 101 | hyper_param[0] = coef.alpha; 102 | hyper_param[1] = coef.lambda_w; 103 | // copy V lambda's back 104 | for (int i = 0; i < k; i++) 105 | hyper_param[i + 1 + w_groups] = ffm_vector_get(coef.lambda_V, i); 106 | hyper_param[k + 1 + w_groups] = coef.mu_w; 107 | // copy mu's back 108 | for (int i = 0; i < k; i++) 109 | hyper_param[i + 1 + (2 * w_groups) + k] = ffm_vector_get(coef.mu_V, i); 110 | 111 | if (k > 0) ffm_vector_free_all(coef.lambda_V, coef.mu_V); 112 | } 113 | 114 | void ffm_sgd_bpr_fit(double *w_0, double *w, double *V, cs *X, double *pairs, 115 | int n_pairs, ffm_param *param) { 116 | // X is transpose of design matrix. Samples are stored in columns. 117 | int n_features = X->m; 118 | ffm_vector ffm_w = {.size = n_features, .data = w, .owner = 0}; 119 | ffm_matrix ffm_V = { 120 | .size0 = param->k, .size1 = n_features, .data = V, .owner = 0}; 121 | ffm_coef coef = { 122 | .w_0 = *w_0, .w = &ffm_w, .V = &ffm_V, .lambda_w = param->init_lambda_w}; 123 | if (param->k > 0) { 124 | coef.lambda_V = ffm_vector_alloc(param->k); 125 | coef.mu_V = ffm_vector_alloc(param->k); 126 | } else { 127 | coef.lambda_V = NULL; 128 | coef.mu_V = NULL; 129 | } 130 | 131 | ffm_matrix ffm_y = {.size0 = n_pairs, .size1 = 2, .data = pairs, .owner = 0}; 132 | ffm_fit_sgd_bpr(&coef, X, &ffm_y, *param); 133 | 134 | // copy the last coef values back into the python memory 135 | *w_0 = coef.w_0; 136 | if (param->k > 0) ffm_vector_free_all(coef.lambda_V, coef.mu_V); 137 | } 138 | 139 | void ffm_sgd_fit(double *w_0, double *w, double *V, cs *X, double *y, 140 | ffm_param *param) { 141 | // X is transpose of design matrix. Samples are stored in columns. 142 | int n_samples = X->n; 143 | int n_features = X->m; 144 | 145 | ffm_vector ffm_w = {.size = n_features, .data = w, .owner = 0}; 146 | ffm_matrix ffm_V = { 147 | .size0 = param->k, .size1 = n_features, .data = V, .owner = 0}; 148 | ffm_coef coef = { 149 | .w_0 = *w_0, .w = &ffm_w, .V = &ffm_V, .lambda_w = param->init_lambda_w}; 150 | if (param->k > 0) { 151 | coef.lambda_V = ffm_vector_alloc(param->k); 152 | coef.mu_V = ffm_vector_alloc(param->k); 153 | } else { 154 | coef.lambda_V = NULL; 155 | coef.mu_V = NULL; 156 | } 157 | 158 | ffm_vector ffm_y = {.size = n_samples, .data = y, .owner = 0}; 159 | ffm_fit_sgd(&coef, X, &ffm_y, param); 160 | 161 | // copy the last coef values back into the python memory 162 | *w_0 = coef.w_0; 163 | if (param->k > 0) ffm_vector_free_all(coef.lambda_V, coef.mu_V); 164 | } 165 | -------------------------------------------------------------------------------- /src/tests/TestFixtures.c: -------------------------------------------------------------------------------- 1 | #ifndef TESTFIXTURES_H 2 | #define TESTFIXTURES_H 3 | #include "fast_fm.h" 4 | #include "TestFixtures.h" 5 | 6 | void TestFixtureContructorSimple(TestFixture_T *pFixture, gconstpointer pg) { 7 | int n_features = 2; 8 | int n_samples = 3; 9 | int k = 1; 10 | // setup data 11 | int m = n_samples; 12 | // X = | 1 2| 13 | // | 3 4| 14 | // | 5 6| 15 | int n = n_features; 16 | cs *X = cs_spalloc(m, n, m * n, 1, 1); /* create triplet identity matrix */ 17 | cs_entry(X, 0, 0, 1); 18 | cs_entry(X, 0, 1, 2); 19 | cs_entry(X, 1, 0, 3); 20 | cs_entry(X, 1, 1, 4); 21 | cs_entry(X, 2, 0, 5); 22 | cs_entry(X, 2, 1, 6); 23 | cs *X_csc = cs_compress(X); /* A = compressed-column form of T */ 24 | cs *X_t = cs_transpose(X_csc, 1); 25 | cs_spfree(X); 26 | 27 | pFixture->X_t = X_t; 28 | pFixture->X = X_csc; 29 | pFixture->y = ffm_vector_calloc(n_samples); 30 | // y = [600, 2800, 10000] 31 | pFixture->y->data[0] = 600; 32 | pFixture->y->data[1] = 2800; 33 | pFixture->y->data[2] = 10000; 34 | 35 | // setup coefs 36 | pFixture->coef = alloc_fm_coef(n_features, k, false); 37 | 38 | // V = |300, 400| 39 | ffm_matrix_set(pFixture->coef->V, 0, 0, 300); 40 | ffm_matrix_set(pFixture->coef->V, 0, 1, 400); 41 | 42 | // w = [10 20] 43 | ffm_vector_set(pFixture->coef->w, 0, 10); 44 | ffm_vector_set(pFixture->coef->w, 1, 20); 45 | 46 | pFixture->coef->w_0 = 2; 47 | // hyperparameter 48 | pFixture->coef->lambda_w = 0; 49 | pFixture->coef->lambda_V = 0; 50 | } 51 | 52 | void TestFixtureContructorWide(TestFixture_T *pFixture, gconstpointer pg) { 53 | int n_features = 3; 54 | int n_samples = 2; 55 | int k = 2; 56 | // setup data 57 | int m = n_samples; 58 | // X = | 1 2 3| 59 | // | 4 0 6| 60 | int n = n_features; 61 | cs *X = cs_spalloc(m, n, m * n, 1, 1); /* create triplet identity matrix */ 62 | cs_entry(X, 0, 0, 1); 63 | cs_entry(X, 0, 1, 2); 64 | cs_entry(X, 0, 2, 3); 65 | cs_entry(X, 1, 0, 4); 66 | cs_entry(X, 1, 2, 6); 67 | cs *X_csc = cs_compress(X); /* A = compressed-column form of T */ 68 | cs *X_t = cs_transpose(X_csc, 1); 69 | cs_spfree(X); 70 | 71 | pFixture->X_t = X_t; 72 | pFixture->X = X_csc; 73 | pFixture->y = ffm_vector_calloc(n_samples); 74 | // y = [1 20] 75 | pFixture->y->data[0] = 1; 76 | pFixture->y->data[1] = 20; 77 | 78 | // setup coefs 79 | pFixture->coef = alloc_fm_coef(n_features, k, false); 80 | 81 | // V = |1 2 3| 82 | // |3 4 5| 83 | ffm_matrix_set(pFixture->coef->V, 0, 0, 1); 84 | ffm_matrix_set(pFixture->coef->V, 0, 1, 2); 85 | ffm_matrix_set(pFixture->coef->V, 0, 2, 3); 86 | ffm_matrix_set(pFixture->coef->V, 1, 0, 4); 87 | ffm_matrix_set(pFixture->coef->V, 1, 1, 5); 88 | ffm_matrix_set(pFixture->coef->V, 1, 2, 6); 89 | 90 | // w = [1 2 3] 91 | ffm_vector_set(pFixture->coef->w, 0, 1); 92 | ffm_vector_set(pFixture->coef->w, 1, 2); 93 | ffm_vector_set(pFixture->coef->w, 2, 3); 94 | 95 | pFixture->coef->w_0 = 2; 96 | // hyperparameter 97 | pFixture->coef->lambda_w = 1; 98 | ffm_vector_set_all(pFixture->coef->lambda_V, 1); 99 | } 100 | 101 | void TestFixtureContructorLong(TestFixture_T *pFixture, gconstpointer pg) { 102 | int n_features = 2; 103 | int n_samples = 5; 104 | int k = 2; 105 | // setup data 106 | int m = n_samples; 107 | // X = | 6 1 | 108 | // | 2 3 | 109 | // | 3 0 | 110 | // | 6 1 | 111 | // | 4 5 | 112 | int n = n_features; 113 | cs *X = cs_spalloc(m, n, m * n, 1, 1); /* create triplet identity matrix */ 114 | cs_entry(X, 0, 0, 6); 115 | cs_entry(X, 0, 1, 1); 116 | cs_entry(X, 1, 0, 2); 117 | cs_entry(X, 1, 1, 3); 118 | cs_entry(X, 2, 0, 3); 119 | cs_entry(X, 3, 0, 6); 120 | cs_entry(X, 3, 1, 1); 121 | cs_entry(X, 4, 0, 4); 122 | cs_entry(X, 4, 1, 5); 123 | cs *X_csc = cs_compress(X); /* A = compressed-column form of T */ 124 | cs *X_t = cs_transpose(X_csc, 1); 125 | cs_spfree(X); 126 | 127 | pFixture->X_t = X_t; 128 | pFixture->X = X_csc; 129 | pFixture->y = ffm_vector_calloc(n_samples); 130 | // y [ 298 266 29 298 848 ] 131 | pFixture->y->data[0] = 298; 132 | pFixture->y->data[1] = 266; 133 | pFixture->y->data[2] = 29; 134 | pFixture->y->data[3] = 298; 135 | pFixture->y->data[4] = 848; 136 | 137 | // setup coefs 138 | pFixture->coef = alloc_fm_coef(n_features, k, false); 139 | 140 | // V = |6 0| 141 | // |5 8| 142 | ffm_matrix_set(pFixture->coef->V, 0, 0, 6); 143 | ffm_matrix_set(pFixture->coef->V, 0, 1, 0); 144 | ffm_matrix_set(pFixture->coef->V, 1, 0, 5); 145 | ffm_matrix_set(pFixture->coef->V, 1, 1, 8); 146 | 147 | // w = [9 2] 148 | ffm_vector_set(pFixture->coef->w, 0, 9); 149 | ffm_vector_set(pFixture->coef->w, 1, 2); 150 | 151 | // w_0 = 2 152 | pFixture->coef->w_0 = 2; 153 | } 154 | 155 | void TestFixtureDestructor(TestFixture_T *pFixture, gconstpointer pg) { 156 | cs_spfree(pFixture->X_t); 157 | cs_spfree(pFixture->X); 158 | 159 | ffm_vector_free(pFixture->y); 160 | free_ffm_coef(pFixture->coef); 161 | } 162 | 163 | cs *Cs_rand_spalloc(ffm_rng *rng, int n_samples, int n_features) { 164 | cs *X_trip = cs_spalloc(n_samples, n_features, n_samples * n_features, 1, 1); 165 | 166 | int i, j; 167 | for (i = 0; i < n_samples; i++) 168 | for (j = 0; j < n_features; j++) 169 | cs_entry(X_trip, i, j, ffm_rand_uniform(rng) * 2.0 - 1); 170 | 171 | cs_dropzeros(X_trip); 172 | cs *X_csc = cs_compress(X_trip); 173 | cs_spfree(X_trip); 174 | return X_csc; 175 | } 176 | 177 | TestFixture_T *makeTestFixture(int seed, int n_samples, int n_features, int k) { 178 | ffm_rng *rng = ffm_rng_seed(seed); 179 | 180 | // allocate coef 181 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 182 | // draw hyperparameter 183 | coef->alpha = ffm_rand_gamma(rng, .5 * (1 + n_samples), 1.0); 184 | coef->lambda_w = ffm_rand_gamma(rng, .5 * (2 + n_features), 1.0 / 100.); 185 | double lambda_V_all = ffm_rand_gamma(rng, .5 * (2 + n_features), 1.0 / 100.); 186 | if (k > 0) ffm_vector_set_all(coef->lambda_V, lambda_V_all); 187 | double sigma2_w = 1.0 / (1 + n_samples); 188 | coef->mu_w = ffm_rand_normal(rng, sigma2_w * coef->lambda_w, sigma2_w); 189 | double sigma2_V = 1.0 / (1 + n_features); 190 | double mu_V_all = ffm_rand_normal(rng, sigma2_V * lambda_V_all, sigma2_V); 191 | if (k > 0) ffm_vector_set_all(coef->mu_V, mu_V_all); 192 | 193 | // generate w_0 194 | coef->w_0 = ffm_rand_normal(rng, 0, 1); 195 | 196 | // init w 197 | for (int i = 0; i < n_features; i++) 198 | ffm_vector_set( 199 | coef->w, i, 200 | coef->mu_w + ffm_rand_normal(rng, coef->mu_w, 1.0 / coef->lambda_w)); 201 | 202 | // init V 203 | if (k > 0) { 204 | for (int i = 0; i < coef->V->size0; i++) 205 | for (int j = 0; j < coef->V->size1; j++) { 206 | double tmp = ffm_rand_normal(rng, mu_V_all, 1.0 / lambda_V_all); 207 | ffm_matrix_set(coef->V, i, j, tmp); 208 | } 209 | } 210 | 211 | // generate uniform X 212 | cs *X = Cs_rand_spalloc(rng, n_samples, n_features); 213 | cs *X_t = cs_transpose(X, 1); 214 | 215 | // generate y using second-order FM 216 | ffm_vector *y = ffm_vector_calloc(n_samples); 217 | sparse_predict(coef, X, y); 218 | 219 | // put everything into a TestFixture 220 | struct TestFixture_T *pFix = malloc(sizeof *pFix); 221 | pFix->X = X; 222 | pFix->X_t = X_t; 223 | pFix->y = y; 224 | pFix->coef = coef; 225 | ffm_rng_free(rng); 226 | return pFix; 227 | } 228 | #endif /* TESTFIXTURES_H */ 229 | -------------------------------------------------------------------------------- /src/ffm_sgd.c: -------------------------------------------------------------------------------- 1 | // Author: Immanuel Bayer 2 | // License: BSD 3 clause 3 | 4 | #include "fast_fm.h" 5 | 6 | void ffm_fit_sgd(ffm_coef *coef, cs *A, ffm_vector *y, ffm_param *param) { 7 | // X is transpose of design matrix. Samples are stored in columns. 8 | double step_size = param->stepsize; 9 | int n_samples = A->n; 10 | int k; 11 | if (coef->V) 12 | k = coef->V->size0; 13 | else 14 | k = 0; 15 | if (!param->warm_start) init_ffm_coef(coef, *param); 16 | 17 | for (int i = 0; i < param->n_iter; i++) { 18 | int sample_row = i % n_samples; 19 | int y_true = y->data[sample_row]; 20 | double y_err = 0; 21 | 22 | // predict y(x) 23 | if (param->TASK == TASK_REGRESSION){ 24 | y_err = ffm_predict_sample(coef, A, sample_row) - y_true; 25 | } 26 | else{ 27 | y_err = 28 | (ffm_sigmoid(ffm_predict_sample(coef, A, sample_row) * y_true) - 1) * 29 | y_true; 30 | } 31 | coef->w_0 = coef->w_0 - step_size * y_err; 32 | 33 | int p, *Ap, *Ai; 34 | double *Ax; 35 | Ap = A->p; 36 | Ai = A->i; 37 | Ax = A->x; 38 | 39 | // update w 40 | // p starts add the current sample position and ends befor the next 41 | // samples starts or the last column ends. 42 | // The last entry in Ap is the number of nz which is always greater 43 | // then the start of the last sample. 44 | for (p = Ap[sample_row]; p < Ap[sample_row + 1]; p++) { 45 | double theta_w = ffm_vector_get(coef->w, Ai[p]); 46 | ffm_vector_set( 47 | coef->w, Ai[p], 48 | theta_w - step_size * (y_err * Ax[p] + theta_w * coef->lambda_w)); 49 | } 50 | if (k > 0) 51 | // update V 52 | for (int f = 0; f < k; f++) { 53 | // calc cache 54 | double cache = 0; 55 | for (p = Ap[sample_row]; p < Ap[sample_row + 1]; p++) 56 | cache += Ax[p] * ffm_matrix_get(coef->V, f, Ai[p]); 57 | // update V_i_j 58 | for (p = Ap[sample_row]; p < Ap[sample_row + 1]; p++) { 59 | double v = ffm_matrix_get(coef->V, f, Ai[p]); 60 | double grad = Ax[p] * cache - (Ax[p] * Ax[p]) * v; 61 | double lambda_V_f = ffm_vector_get(coef->lambda_V, f); 62 | ffm_matrix_set(coef->V, f, Ai[p], 63 | v - step_size * (y_err * grad + v * lambda_V_f)); 64 | } 65 | } 66 | } 67 | } 68 | 69 | void ffm_fit_sgd_bpr(ffm_coef *coef, cs *A, ffm_matrix *pairs, 70 | ffm_param param) { 71 | // A is transpose of design matrix. Samples are stored in columns. 72 | int p, *Ap, *Ai; 73 | double *Ax; 74 | Ap = A->p; 75 | Ai = A->i; 76 | Ax = A->x; 77 | 78 | double step_size = param.stepsize; 79 | int n_comparisons = pairs->size0; 80 | int k; 81 | if (coef->V) 82 | k = coef->V->size0; 83 | else 84 | k = 0; 85 | if (!param.warm_start) init_ffm_coef(coef, param); 86 | coef->w_0 = 0; // should always be zero 87 | 88 | for (int i = 0; i < param.n_iter; i++) { 89 | int comparison_row = i % n_comparisons; 90 | int sample_row_p = ffm_matrix_get(pairs, comparison_row, 0); 91 | int sample_row_n = ffm_matrix_get(pairs, comparison_row, 1); 92 | 93 | // predict y(x) 94 | double pairs_err = 95 | -1 + ffm_sigmoid(ffm_predict_sample(coef, A, sample_row_p) - 96 | ffm_predict_sample(coef, A, sample_row_n)); 97 | 98 | int p_n = Ap[sample_row_n]; // Start of positive sample in value array. 99 | int p_n_end = Ap[sample_row_n + 1]; // End of positive sample in value array. 100 | 101 | int p_p = Ap[sample_row_p]; // Start of negative sample in value array. 102 | int p_p_end = Ap[sample_row_p +1 ]; // Ende of negative sample in value array. 103 | 104 | // update w 105 | // Iterate over nnz positive (p) and negative (n) features vectors in 106 | // parallel. Stop if both vectors reached there last entry. 107 | while (p_p < p_p_end || p_n < p_n_end){ 108 | 109 | // Check if p should be added to gradient. 110 | // 1) p has any features left. 111 | // 2) p's next feature commes bevore n's next feature or n has no 112 | // features left. 113 | bool add_p = false; 114 | if (p_p < p_p_end && (Ai[p_p] <= Ai[p_n] || !(p_n < p_n_end))) { 115 | add_p = true; 116 | } 117 | 118 | // this is symmetrical to above. 119 | bool add_n = false; 120 | if (p_n < p_n_end && (Ai[p_n] <= Ai[p_p] || !(p_p < p_p_end))) { 121 | add_n = true; 122 | } 123 | 124 | // Add feature value to gradient and increment feature position. 125 | double grad = 0; 126 | int feature_to_update = - 1; 127 | if (add_p) { 128 | feature_to_update = Ai[p_p]; 129 | grad = Ax[p_p++]; 130 | } 131 | if (add_n) { 132 | feature_to_update = Ai[p_n]; 133 | grad -= Ax[p_n++]; 134 | } 135 | assert (feature_to_update >= 0); 136 | 137 | double theta_w = coef->w->data[feature_to_update]; 138 | ffm_vector_set( 139 | coef->w, feature_to_update, 140 | theta_w - step_size * (pairs_err * grad + theta_w * coef->lambda_w)); 141 | } 142 | if (k > 0) 143 | // update V 144 | for (int f = 0; f < k; f++) { 145 | // calc cache 146 | double cache_p = 0; 147 | for (p = Ap[sample_row_p]; p < Ap[sample_row_p + 1]; p++) 148 | cache_p += Ax[p] * ffm_matrix_get(coef->V, f, Ai[p]); 149 | double cache_n = 0; 150 | for (p = Ap[sample_row_n]; p < Ap[sample_row_n + 1]; p++) 151 | cache_n += Ax[p] * ffm_matrix_get(coef->V, f, Ai[p]); 152 | // update V_i 153 | double lambda_V_f = ffm_vector_get(coef->lambda_V, f); 154 | update_second_order_bpr(A, coef->V, cache_n, cache_p, pairs_err, 155 | step_size, lambda_V_f, sample_row_p, 156 | sample_row_n, f); 157 | } 158 | } 159 | } 160 | 161 | // TODO: Bedder code reuse between first and second order updates. 162 | void update_second_order_bpr(cs *A, ffm_matrix *V, double cache_n, 163 | double cache_p, double pairs_err, double step_size, 164 | double lambda_V, int sample_row_p, 165 | int sample_row_n, int f) { 166 | int *Ap, *Ai; 167 | double *Ax; 168 | Ap = A->p; 169 | Ai = A->i; 170 | Ax = A->x; 171 | int p_n = Ap[sample_row_n]; // Start of positive sample in value array. 172 | int p_n_end = Ap[sample_row_n + 1]; // End of positive sample in value array. 173 | 174 | int p_p = Ap[sample_row_p]; // Start of negative sample in value array. 175 | int p_p_end = Ap[sample_row_p +1 ]; // Ende of negative sample in value array. 176 | 177 | // Iterate over nnz positive (p) and negative (n) features vectors in 178 | // parallel. Stop if both vectors reached there last entry. 179 | while (p_p < p_p_end || p_n < p_n_end){ 180 | 181 | // Check if p should be added to gradient. 182 | // 1) p has any features left. 183 | // 2) p's next feature commes bevore n's next feature or n has no 184 | // features left. 185 | bool add_p = false; 186 | int feature_to_update = - 1; 187 | if (p_p < p_p_end && (Ai[p_p] <= Ai[p_n] || !(p_n < p_n_end))) { 188 | add_p = true; 189 | feature_to_update = Ai[p_p]; 190 | } 191 | 192 | // this is symmetrical to above. 193 | bool add_n = false; 194 | if (p_n < p_n_end && (Ai[p_n] <= Ai[p_p] || !(p_p < p_p_end))) { 195 | add_n = true; 196 | feature_to_update = Ai[p_n]; 197 | } 198 | assert (feature_to_update >= 0); 199 | 200 | // Add feature value to gradient and increment feature position. 201 | double grad = 0; 202 | double v = ffm_matrix_get(V, f, feature_to_update); 203 | if (add_p) { 204 | grad = Ax[p_p] * cache_p - (Ax[p_p] * Ax[p_p]) * v; 205 | p_p++; 206 | } 207 | if (add_n) { 208 | feature_to_update = Ai[p_n]; 209 | grad -= Ax[p_n] * cache_n - (Ax[p_n] * Ax[p_n]) * v; 210 | p_n++; 211 | } 212 | ffm_matrix_set(V, f, feature_to_update, 213 | v - step_size * (pairs_err * grad + v * lambda_V)); 214 | } 215 | } 216 | 217 | double ffm_predict_sample(ffm_coef *coef, cs *A, int sample_row) { 218 | double result = coef->w_0; 219 | int k; 220 | if (coef->V) 221 | k = coef->V->size0; 222 | else 223 | k = 0; 224 | 225 | // add first order contributions 226 | int p, *Ap, *Ai; 227 | double *Ax; 228 | Ap = A->p; 229 | Ai = A->i; 230 | Ax = A->x; 231 | for (p = Ap[sample_row]; p < Ap[sample_row + 1]; p++) 232 | result += Ax[p] * coef->w->data[Ai[p]]; 233 | 234 | // add second order contributions 235 | double sec_order = 0; 236 | for (int f = 0; f < k; f++) { 237 | double late_square = 0; // ^2 238 | double square = 0; // 239 | for (p = Ap[sample_row]; p < Ap[sample_row + 1]; p++) { 240 | double x_l = Ax[p]; 241 | double v = ffm_matrix_get(coef->V, f, Ai[p]); 242 | late_square += v * x_l; 243 | square += (v * v) * (x_l * x_l); 244 | } 245 | sec_order += (late_square * late_square) - square; 246 | } 247 | result += .5 * sec_order; 248 | return result; 249 | } 250 | 251 | ffm_coef *extract_gradient(ffm_coef *coef_t0, ffm_coef *coef_t1, 252 | double stepsize) { 253 | int n_features = coef_t0->w->size; 254 | int k; 255 | if (coef_t0->V) 256 | k = coef_t0->V->size0; 257 | else 258 | k = 0; 259 | 260 | ffm_coef *grad = alloc_fm_coef(n_features, k, 0); 261 | 262 | grad->w_0 = coef_t1->w_0 / stepsize - coef_t0->w_0 / stepsize; 263 | 264 | for (int i = 0; i < n_features; i++) 265 | ffm_vector_set(grad->w, i, -(ffm_vector_get(coef_t1->w, i) / stepsize - 266 | ffm_vector_get(coef_t0->w, i) / stepsize)); 267 | 268 | for (int i = 0; i < k; i++) 269 | for (int j = 0; j < n_features; j++) 270 | ffm_matrix_set(grad->V, i, j, 271 | -(ffm_matrix_get(coef_t1->V, i, j) / stepsize - 272 | ffm_matrix_get(coef_t0->V, i, j) / stepsize)); 273 | return grad; 274 | } 275 | 276 | double l2_penalty(ffm_coef *coef) { 277 | double loss = 0; 278 | // w l2 penalty 279 | double l2_norm = ffm_blas_dnrm2(coef->w); 280 | loss += coef->lambda_w * (l2_norm * l2_norm); 281 | 282 | if (!coef->V) return loss; 283 | // V l2 penalty 284 | int k = coef->V->size0; 285 | int n_features = coef->V->size1; 286 | for (int i = 0; i < k; i++) { 287 | double lambda_V_i = ffm_vector_get(coef->lambda_V, i); 288 | for (int j = 0; j < n_features; j++) { 289 | double V_ij = ffm_matrix_get(coef->V, i, j); 290 | loss += lambda_V_i * (V_ij * V_ij); 291 | } 292 | } 293 | return loss; 294 | } 295 | -------------------------------------------------------------------------------- /src/cli.c: -------------------------------------------------------------------------------- 1 | // Author: Immanuel Bayer 2 | // License: BSD 3 clause 3 | 4 | #include "fast_fm.h" 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | const char *argp_program_version = "fastFM dev"; 11 | const char *argp_program_bug_address = ""; 12 | 13 | /* Program documentation. */ 14 | static char doc[] = 15 | "fastFM -- Provides a range of solvers and loss functions for the " 16 | "FACTORICATION MACHINE model."; 17 | 18 | /* A description of the arguments we accept. */ 19 | static char args_doc[] = "TRAIN_FILE TEST_FILE"; 20 | 21 | /* The options we understand. */ 22 | static struct argp_option options[] = { 23 | {"rng-seed", 556, "NUM", 0, 24 | "Seed for random number generator (default current time(NULL))"}, 25 | {"task", 't', "S", 0, 26 | "The tasks: 'classification', 'regression' or 'ranking' are supported " 27 | "(default 'regression'). Ranking uses the Bayesian Pairwise Ranking (BPR) " 28 | "loss and needs an additional file (see '--train-pairs')"}, 29 | {"train-pairs", 555, "FILE", 0, 30 | "Ranking only! Required training pairs for bpr training."}, 31 | {0, 0, 0, 0, "Solver:", 1}, 32 | {"solver", 's', "S", 0, 33 | "The solvers: 'als', 'mcmc' and 'sgd' are available for 'regression' and " 34 | "'classification (default 'mcmc'). Ranking is only supported by 'sgd'."}, 35 | {"n-iter", 'n', "NUM", 0, "Number of iterations (default 50)"}, 36 | {"step-size", 444, "NUM", 0, "Step-size for 'sgd' updates (default 0.01)"}, 37 | {"init-var", 'i', "NUM", 0, 38 | "N(0, var) is used to initialize the coefficients of matrix V (default " 39 | "0.1)"}, 40 | {0, 0, 0, 0, "Model Complexity and Regularization:", 3}, 41 | {"rank", 'k', "NUM", 0, "Rank of the factorization, Matrix V (default 8)."}, 42 | {"l2-reg", 'r', "NUM", 0, 43 | "l2 regularization, set equal penalty for all coefficients (default 1)"}, 44 | {"l2-reg-w", 222, "NUM", 0, 45 | "l2 regularization for the linear coefficients (w)"}, 46 | {"l2-reg-V", 333, "NUM", 0, 47 | "l2 regularization for the latent representation (V) of the pairwise " 48 | "coefficients"}, 49 | {0, 0, 0, 0, "I/O options:", -5}, 50 | {"test-predict", 55, "FILE", 0, "Save prediction from TEST_FILE to FILE."}, 51 | {0, 0, 0, 0, "Informational Options:", -3}, 52 | {"verbose", 'v', 0, 0, "Produce verbose output"}, 53 | {"quiet", 'q', 0, 0, "Don't produce any output"}, 54 | {"silent", 's', 0, OPTION_ALIAS}, 55 | {0}}; 56 | 57 | /* Used by main to communicate with parse_opt. */ 58 | struct arguments { 59 | char *args[2]; /* arg1 & arg2 */ 60 | int silent, verbose; 61 | 62 | char *test_file; 63 | char *train_pairs; 64 | char *train_file; 65 | char *test_predict_file; 66 | 67 | int *arg_count; 68 | 69 | // fm parameters 70 | int k; 71 | int n_iter; 72 | double init_var; 73 | double step_size; 74 | double l2_reg; 75 | double l2_reg_w; 76 | double l2_reg_V; 77 | char *solver; 78 | char *task; 79 | int rng_seed; 80 | }; 81 | 82 | /* Parse a single option. */ 83 | static error_t parse_opt(int key, char *arg, struct argp_state *state) { 84 | /* Get the input argument from argp_parse, which we 85 | know is a pointer to our arguments structure. */ 86 | struct arguments *arguments = state->input; 87 | int *arg_count = arguments->arg_count; 88 | 89 | switch (key) { 90 | case 't': 91 | arguments->task = arg; 92 | break; 93 | 94 | case 'k': 95 | arguments->k = atoi(arg); 96 | break; 97 | 98 | case 'n': 99 | arguments->n_iter = atoi(arg); 100 | break; 101 | 102 | case 's': 103 | arguments->solver = arg; 104 | break; 105 | 106 | case 'r': 107 | arguments->l2_reg = atof(arg); 108 | break; 109 | 110 | case 222: 111 | arguments->l2_reg_w = atof(arg); 112 | break; 113 | 114 | case 333: 115 | arguments->l2_reg_V = atof(arg); 116 | break; 117 | 118 | case 444: 119 | arguments->step_size = atof(arg); 120 | break; 121 | 122 | case 556: 123 | arguments->rng_seed = atof(arg); 124 | break; 125 | 126 | case 555: 127 | arguments->train_pairs = arg; 128 | break; 129 | 130 | case 55: 131 | arguments->test_predict_file = arg; 132 | break; 133 | 134 | case 'i': 135 | arguments->init_var = atof(arg); 136 | break; 137 | 138 | case 'q': 139 | arguments->silent = 1; 140 | break; 141 | 142 | case 'v': 143 | arguments->verbose = 1; 144 | break; 145 | 146 | case ARGP_KEY_ARG: { 147 | (*arg_count)--; 148 | 149 | if (state->arg_num == 0) { 150 | arguments->train_file = arg; 151 | } 152 | 153 | if (state->arg_num == 1) { 154 | arguments->test_file = arg; 155 | } 156 | 157 | arguments->args[state->arg_num] = arg; 158 | } break; 159 | 160 | case ARGP_KEY_END: { 161 | if (*arg_count > 0) 162 | argp_failure(state, 1, 0, "too few arguments"); 163 | else if (*arg_count < 0) 164 | argp_failure(state, 1, 0, "too many arguments"); 165 | } break; 166 | 167 | default: 168 | return ARGP_ERR_UNKNOWN; 169 | } 170 | 171 | return 0; 172 | } 173 | 174 | /* Our argp parser. */ 175 | static struct argp argp = {options, parse_opt, args_doc, doc}; 176 | 177 | int main(int argc, char **argv) { 178 | /* 179 | feenableexcept(FE_INVALID | 180 | FE_DIVBYZERO | 181 | FE_OVERFLOW | 182 | FE_UNDERFLOW); 183 | */ 184 | 185 | struct arguments arguments; 186 | 187 | /* Default values. */ 188 | arguments.silent = 0; 189 | arguments.verbose = 0; 190 | 191 | // file paths 192 | arguments.test_file = NULL; 193 | arguments.train_file = NULL; 194 | arguments.test_predict_file = NULL; 195 | arguments.train_pairs = NULL; 196 | 197 | // fm default parameters 198 | arguments.k = 8; 199 | arguments.n_iter = 50; 200 | arguments.init_var = 0.01; 201 | arguments.step_size = 0.01; 202 | arguments.l2_reg = 1; 203 | arguments.l2_reg_w = 0; 204 | arguments.l2_reg_V = 0; 205 | arguments.solver = "mcmc"; 206 | arguments.task = "regression"; 207 | arguments.rng_seed = time(NULL); 208 | 209 | int arg_count = 2; 210 | arguments.arg_count = &arg_count; 211 | 212 | /* Parse our arguments; every option seen by parse_opt will 213 | be reflected in arguments. */ 214 | argp_parse(&argp, argc, argv, 0, 0, &arguments); 215 | 216 | ffm_param param = {.n_iter = arguments.n_iter, 217 | .init_sigma = arguments.init_var, 218 | .k = arguments.k, 219 | .stepsize = arguments.step_size, 220 | .rng_seed = arguments.rng_seed}; 221 | 222 | // parse solver 223 | if (strcmp(arguments.solver, "mcmc") == 0) 224 | param.SOLVER = SOLVER_MCMC; 225 | else if (strcmp(arguments.solver, "als") == 0) 226 | param.SOLVER = SOLVER_ALS; 227 | else if (strcmp(arguments.solver, "sgd") == 0) 228 | param.SOLVER = SOLVER_SGD; 229 | else 230 | assert(0 && "unknown solver"); 231 | 232 | // parse task 233 | if (strcmp(arguments.task, "regression") == 0) 234 | param.TASK = TASK_REGRESSION; 235 | else if (strcmp(arguments.task, "classification") == 0) 236 | param.TASK = TASK_CLASSIFICATION; 237 | else if (strcmp(arguments.task, "ranking") == 0) 238 | param.TASK = TASK_RANKING; 239 | else 240 | assert(0 && "unknown task"); 241 | 242 | printf( 243 | "TRAIN_FILE = %s\nTEST_FILE = %s\n" 244 | "VERBOSE = %s\nSILENT = %s\n", 245 | arguments.args[0], arguments.args[1], arguments.verbose ? "yes" : "no", 246 | arguments.silent ? "yes" : "no"); 247 | 248 | printf("task=%s", arguments.task); 249 | printf(", init-var=%f", param.init_sigma); 250 | printf(", n-iter=%i", param.n_iter); 251 | if (param.TASK == TASK_RANKING) printf(", step-size=%f", param.stepsize); 252 | printf(", solver=%s", arguments.solver); 253 | printf(", k=%i", param.k); 254 | 255 | // default if no l2_reg_w specified 256 | param.init_lambda_w = arguments.l2_reg; 257 | param.init_lambda_V = arguments.l2_reg; 258 | 259 | if (arguments.l2_reg_w != 0.0) param.init_lambda_w = arguments.l2_reg_w; 260 | if (arguments.l2_reg_V != 0.0) param.init_lambda_V = arguments.l2_reg_V; 261 | 262 | if (strcmp(arguments.solver, "mcmc") != 0) { 263 | printf(", l2-reg-w=%f", param.init_lambda_w); 264 | if (arguments.k > 0) printf(", l2-reg-V=%f", param.init_lambda_V); 265 | printf("\n"); 266 | } 267 | 268 | printf("\nload data\n"); 269 | fm_data train_data = read_svm_light_file(arguments.args[0]); 270 | fm_data test_data = read_svm_light_file(arguments.args[1]); 271 | 272 | int n_features = train_data.X->n; 273 | ffm_vector *y_test_predict = ffm_vector_calloc(test_data.y->size); 274 | ffm_coef *coef = alloc_fm_coef(n_features, arguments.k, false); 275 | 276 | printf("fit model\n"); 277 | if (param.TASK == TASK_RANKING) { 278 | assert(arguments.train_pairs != NULL && 279 | "Ranking requires the option '--train-pairs'"); 280 | ffm_matrix *train_pairs = ffm_matrix_from_file(arguments.train_pairs); 281 | cs *X_t = cs_transpose(train_data.X, 1); 282 | cs_spfree(train_data.X); 283 | train_data.X = X_t; 284 | ffm_fit_sgd_bpr(coef, train_data.X, train_pairs, param); 285 | // printf("c%", arguments.train_pairs); 286 | } else 287 | sparse_fit(coef, train_data.X, test_data.X, train_data.y, y_test_predict, 288 | param); 289 | 290 | // the predictions are calculated during the training phase for mcmc 291 | if (param.SOLVER == SOLVER_ALS) { 292 | sparse_predict(coef, test_data.X, y_test_predict); 293 | if (param.TASK == TASK_CLASSIFICATION) 294 | ffm_vector_normal_cdf(y_test_predict); 295 | } 296 | 297 | if (param.SOLVER == SOLVER_SGD) { 298 | sparse_predict(coef, test_data.X, y_test_predict); 299 | if (param.TASK == TASK_CLASSIFICATION) 300 | ffm_vector_normal_cdf(y_test_predict); 301 | } 302 | 303 | // save predictions 304 | if (arguments.test_predict_file) { 305 | FILE *f = fopen(arguments.test_predict_file, "w"); 306 | for (int i = 0; i < y_test_predict->size; i++) 307 | fprintf(f, "%f\n", y_test_predict->data[i]); 308 | fclose(f); 309 | } 310 | 311 | if (param.TASK == TASK_REGRESSION) 312 | printf("\nr2 score: %f \n", ffm_r2_score(test_data.y, y_test_predict)); 313 | if (param.TASK == TASK_CLASSIFICATION) 314 | printf("\nacc score: %f \n", 315 | ffm_vector_accuracy(test_data.y, y_test_predict)); 316 | 317 | /* 318 | printf("calculate kendall tau\n"); 319 | if (param.TASK == TASK_RANKING) 320 | { 321 | ffm_vector * true_order = ffm_vector_get_order(test_data.y); 322 | ffm_vector * pred_order = ffm_vector_get_order(y_test_predict); 323 | double kendall_tau = \ 324 | ffm_vector_kendall_tau(true_order, pred_order); 325 | printf("\nkendall tau: %f \n", kendall_tau); 326 | } 327 | */ 328 | 329 | exit(0); 330 | } 331 | -------------------------------------------------------------------------------- /src/tests/test_random.c: -------------------------------------------------------------------------------- 1 | #include "fast_fm.h" 2 | #include 3 | 4 | void test_rng_seed(void) { 5 | ffm_rng *rng1 = ffm_rng_seed(123); 6 | ffm_rng *rng2 = ffm_rng_seed(123); 7 | ffm_rng *rng3 = ffm_rng_seed(123 + 10); 8 | 9 | g_assert_cmpfloat(ffm_rand_normal(rng1, 0, 1), ==, 10 | ffm_rand_normal(rng2, 0, 1)); 11 | g_assert_cmpfloat(ffm_rand_normal(rng1, 0, 1), !=, 12 | ffm_rand_normal(rng3, 0, 1)); 13 | } 14 | 15 | void test_uniform_mean(void) { 16 | ffm_vector *x = ffm_vector_alloc(100000); 17 | ffm_rng *kr; 18 | kr = ffm_rng_seed(1200); 19 | 20 | double normal_mean = 0.5; 21 | double normal_sigma = sqrt((1. / 12)); 22 | int n = x->size; 23 | 24 | for (int i = 0; i < x->size; i++) ffm_vector_set(x, i, ffm_rand_uniform(kr)); 25 | double mean = ffm_vector_mean(x); 26 | 27 | /*printf("test %f < %f < %f (n=%d)\n", 28 | normal_mean - (3. * normal_sigma) / sqrt(n), mean, 29 | normal_mean + (3. * normal_sigma) / sqrt(n), n);*/ 30 | g_assert_cmpfloat(normal_mean - (3. * normal_sigma) / sqrt(n), <, mean); 31 | g_assert_cmpfloat(mean, <, normal_mean + (3. * normal_sigma) / sqrt(n)); 32 | } 33 | 34 | void test_uniform_var(void) { 35 | ffm_vector *x = ffm_vector_alloc(100000); 36 | int n = x->size; 37 | ffm_rng *kr; 38 | kr = ffm_rng_seed(1200); 39 | 40 | double uniform_sigma = sqrt((1. / 12)); 41 | double uniform_var = (1. / 12); 42 | 43 | double normal_mean = uniform_var; 44 | double tmp = uniform_sigma; 45 | double normal_sigma = sqrt(2 * (tmp * tmp * tmp * tmp) / (n - 1)); 46 | 47 | for (int i = 0; i < n; i++) ffm_vector_set(x, i, ffm_rand_uniform(kr)); 48 | double var = ffm_vector_variance(x); 49 | int n_var = 1; 50 | 51 | /*printf("test %f < %f < %f (n=%d)\n", 52 | normal_mean - (3. * normal_sigma) / sqrt(n_var), var, 53 | normal_mean + (3. * normal_sigma) / sqrt(n_var), n);*/ 54 | g_assert_cmpfloat(normal_mean - (3. * normal_sigma) / sqrt(n_var), <, var); 55 | g_assert_cmpfloat(var, <, normal_mean + (3. * normal_sigma) / sqrt(n_var)); 56 | } 57 | 58 | void test_normal_mean(void) { 59 | ffm_vector *x = ffm_vector_alloc(100000); 60 | ffm_rng *kr; 61 | kr = ffm_rng_seed(1200); 62 | 63 | double normal_mean = 20.; 64 | double normal_sigma = 4.; 65 | int n = x->size; 66 | 67 | for (int i = 0; i < x->size; i++) 68 | ffm_vector_set(x, i, ffm_rand_normal(kr, normal_mean, normal_sigma)); 69 | double mean = ffm_vector_mean(x); 70 | 71 | /*printf("test %f < %f < %f (n=%d)\n", 72 | normal_mean - (3. * normal_sigma) / sqrt(n), mean, 73 | normal_mean + (3. * normal_sigma) / sqrt(n), n);*/ 74 | g_assert_cmpfloat(normal_mean - (3. * normal_sigma) / sqrt(n), <, mean); 75 | g_assert_cmpfloat(mean, <, normal_mean + (3. * normal_sigma) / sqrt(n)); 76 | } 77 | 78 | void test_normal_var(void) { 79 | ffm_vector *x = ffm_vector_alloc(100000); 80 | int n = x->size; 81 | ffm_rng *kr; 82 | kr = ffm_rng_seed(123); 83 | 84 | double org_normal_sigma = sqrt(4.); 85 | double org_normal_var = org_normal_sigma * org_normal_sigma; 86 | 87 | double normal_mean = org_normal_var; 88 | double tmp = org_normal_sigma; 89 | double normal_sigma = sqrt(2. * (tmp * tmp * tmp * tmp) / (n - 1.0)); 90 | 91 | for (int i = 0; i < n; i++) 92 | ffm_vector_set(x, i, ffm_rand_normal(kr, 10, org_normal_sigma)); 93 | double var = ffm_vector_variance(x); 94 | int n_var = 1; 95 | 96 | /*printf("test %f < %f < %f (n=%d)\n", 97 | normal_mean - (3. * normal_sigma) / sqrt(n_var), var, 98 | normal_mean + (3. * normal_sigma) / sqrt(n_var), n);*/ 99 | g_assert_cmpfloat(normal_mean - (3. * normal_sigma) / sqrt(n_var), <, var); 100 | g_assert_cmpfloat(var, <, normal_mean + (3. * normal_sigma) / sqrt(n_var)); 101 | } 102 | 103 | void test_gamma_mean(void) { 104 | ffm_vector *x = ffm_vector_alloc(100000); 105 | ffm_rng *kr; 106 | kr = ffm_rng_seed(1200); 107 | 108 | double shape = 1; 109 | double scale = 5; 110 | double normal_mean = scale * shape; 111 | double normal_sigma = sqrt(shape * scale * scale); 112 | int n = x->size; 113 | 114 | for (int i = 0; i < x->size; i++) 115 | ffm_vector_set(x, i, ffm_rand_gamma(kr, shape, scale)); 116 | double mean = ffm_vector_mean(x); 117 | 118 | /*printf("test %f < %f < %f (n=%d)\n", 119 | normal_mean - (3. * normal_sigma) / sqrt(n), mean, 120 | normal_mean + (3. * normal_sigma) / sqrt(n), n);*/ 121 | g_assert_cmpfloat(normal_mean - (3. * normal_sigma) / sqrt(n), <, mean); 122 | g_assert_cmpfloat(mean, <, normal_mean + (3. * normal_sigma) / sqrt(n)); 123 | } 124 | 125 | void test_gamma_mean_small_scale(void) { 126 | ffm_vector *x = ffm_vector_alloc(100000); 127 | ffm_rng *kr; 128 | kr = ffm_rng_seed(1200); 129 | 130 | double shape = 1; 131 | double scale = .5; 132 | double normal_mean = scale * shape; 133 | double normal_sigma = sqrt(shape * scale * scale); 134 | int n = x->size; 135 | 136 | for (int i = 0; i < x->size; i++) 137 | ffm_vector_set(x, i, ffm_rand_gamma(kr, shape, scale)); 138 | double mean = ffm_vector_mean(x); 139 | 140 | /*printf("test %f < %f < %f (n=%d)\n", 141 | normal_mean - (3. * normal_sigma) / sqrt(n), mean, 142 | normal_mean + (3. * normal_sigma) / sqrt(n), n);*/ 143 | g_assert_cmpfloat(normal_mean - (3. * normal_sigma) / sqrt(n), <, mean); 144 | g_assert_cmpfloat(mean, <, normal_mean + (3. * normal_sigma) / sqrt(n)); 145 | } 146 | 147 | void test_gamma_var(void) { 148 | ffm_vector *x = ffm_vector_alloc(100000); 149 | int n = x->size; 150 | ffm_rng *kr; 151 | kr = ffm_rng_seed(123); 152 | 153 | double shape = 1; 154 | double scale = 5; 155 | double org_normal_sigma = sqrt(shape * scale * scale); 156 | double org_normal_var = shape * scale * scale; 157 | 158 | double normal_mean = org_normal_var; 159 | double tmp = org_normal_sigma; 160 | double normal_sigma = sqrt(2. * (tmp * tmp * tmp * tmp) / (n - 1.0)); 161 | 162 | for (int i = 0; i < n; i++) 163 | ffm_vector_set(x, i, ffm_rand_gamma(kr, shape, scale)); 164 | double var = ffm_vector_variance(x); 165 | int n_var = 1; 166 | 167 | /*printf("test %f < %f < %f (n=%d)\n", 168 | normal_mean - (3. * normal_sigma) / sqrt(n_var), var, 169 | normal_mean + (3. * normal_sigma) / sqrt(n_var), n);*/ 170 | g_assert_cmpfloat(normal_mean - (3. * normal_sigma) / sqrt(n_var), <, var); 171 | g_assert_cmpfloat(var, <, normal_mean + (3. * normal_sigma) / sqrt(n_var)); 172 | } 173 | 174 | void test_exp_mean(void) { 175 | ffm_vector *x = ffm_vector_alloc(100000); 176 | ffm_rng *kr; 177 | kr = ffm_rng_seed(1200); 178 | 179 | double scale = 5; 180 | double normal_mean = 1.0 / scale; 181 | double normal_sigma = sqrt(1.0 / (scale * scale)); 182 | int n = x->size; 183 | 184 | for (int i = 0; i < x->size; i++) 185 | ffm_vector_set(x, i, ffm_rand_exp(kr, scale)); 186 | double mean = ffm_vector_mean(x); 187 | 188 | /*printf("test %f < %f < %f (n=%d)\n", 189 | normal_mean - (3. * normal_sigma) / sqrt(n), mean, 190 | normal_mean + (3. * normal_sigma) / sqrt(n), n);*/ 191 | g_assert_cmpfloat(normal_mean - (3. * normal_sigma) / sqrt(n), <, mean); 192 | g_assert_cmpfloat(mean, <, normal_mean + (3. * normal_sigma) / sqrt(n)); 193 | } 194 | 195 | void test_exp_var(void) { 196 | ffm_vector *x = ffm_vector_alloc(100000); 197 | int n = x->size; 198 | ffm_rng *kr; 199 | kr = ffm_rng_seed(1234); 200 | 201 | double scale = 5; 202 | double org_normal_sigma = sqrt(1.0 / (scale * scale)); 203 | double org_normal_var = 1.0 / (scale * scale); 204 | 205 | double normal_mean = org_normal_var; 206 | double tmp = org_normal_sigma; 207 | double normal_sigma = sqrt(2. * (tmp * tmp * tmp * tmp) / (n - 1.0)); 208 | 209 | for (int i = 0; i < n; i++) ffm_vector_set(x, i, ffm_rand_exp(kr, scale)); 210 | double var = ffm_vector_variance(x); 211 | int n_var = 1; 212 | 213 | /*printf("test %f < %f < %f (n=%d)\n", 214 | normal_mean - (3. * normal_sigma) / sqrt(n_var), var, 215 | normal_mean + (3. * normal_sigma) / sqrt(n_var), n);*/ 216 | g_assert_cmpfloat(normal_mean - (3. * normal_sigma) / sqrt(n_var), <, var); 217 | g_assert_cmpfloat(var, <, normal_mean + (3. * normal_sigma) / sqrt(n_var)); 218 | } 219 | 220 | void test_left_trunc_normal_mean(void) { 221 | ffm_vector *x = ffm_vector_alloc(100000); 222 | ffm_rng *kr; 223 | kr = ffm_rng_seed(1202); 224 | 225 | double trunc = 5; 226 | double trunc_mean = 0; 227 | double trunc_sigma = 3; 228 | 229 | // formulas from http://en.wikipedia.org/wiki/Truncated_normal_distribution 230 | double alpha = (trunc - trunc_mean) / trunc_sigma; 231 | double lambda_alpha = ffm_normal_pdf(alpha) / (1.0 - ffm_normal_cdf(alpha)); 232 | double delta = lambda_alpha * (lambda_alpha - alpha); 233 | 234 | double normal_mean = trunc_mean + trunc_sigma * lambda_alpha; 235 | double normal_sigma = sqrt(trunc_sigma * trunc_sigma * (1.0 - delta)); 236 | int n = x->size; 237 | 238 | // for transformation to Nonstandart Normal Population (Seq. 3.1 & 3.2) 239 | // Barr & Sherrill: Mean and Variance of Truncated Normal Distributions 240 | trunc = (trunc + trunc_mean) / trunc_sigma; 241 | for (int i = 0; i < x->size; i++) 242 | ffm_vector_set( 243 | x, i, trunc_mean + ffm_rand_left_trunc_normal(kr, trunc) * trunc_sigma); 244 | double mean = ffm_vector_mean(x); 245 | 246 | /*printf("test %f < %f < %f (n=%d)\n", 247 | normal_mean - (3. * normal_sigma) / sqrt(n), mean, 248 | normal_mean + (3. * normal_sigma) / sqrt(n), n);*/ 249 | g_assert_cmpfloat(normal_mean - (3. * normal_sigma) / sqrt(n), <, mean); 250 | g_assert_cmpfloat(mean, <, normal_mean + (3. * normal_sigma) / sqrt(n)); 251 | } 252 | 253 | void test_left_trunc_normal_mean_neg_trunc(void) { 254 | ffm_vector *x = ffm_vector_alloc(100000); 255 | ffm_rng *kr; 256 | kr = ffm_rng_seed(1202); 257 | 258 | double trunc = -2; 259 | double trunc_mean = 0; 260 | double trunc_sigma = 1; 261 | 262 | // formulas from http://en.wikipedia.org/wiki/Truncated_normal_distribution 263 | double alpha = (trunc - trunc_mean) / trunc_sigma; 264 | double lambda_alpha = ffm_normal_pdf(alpha) / (1.0 - ffm_normal_cdf(alpha)); 265 | double delta = lambda_alpha * (lambda_alpha - alpha); 266 | 267 | double normal_mean = trunc_mean + trunc_sigma * lambda_alpha; 268 | double normal_sigma = sqrt(trunc_sigma * trunc_sigma * (1.0 - delta)); 269 | int n = x->size; 270 | 271 | // for transformation to Nonstandart Normal Population (Seq. 3.1 & 3.2) 272 | // Barr & Sherrill: Mean and Variance of Truncated Normal Distributions 273 | trunc = (trunc + trunc_mean) / trunc_sigma; 274 | for (int i = 0; i < x->size; i++) 275 | ffm_vector_set( 276 | x, i, trunc_mean + ffm_rand_left_trunc_normal(kr, trunc) * trunc_sigma); 277 | double mean = ffm_vector_mean(x); 278 | 279 | /*printf("test %f < %f < %f (n=%d)\n", 280 | normal_mean - (3. * normal_sigma) / sqrt(n), mean, 281 | normal_mean + (3. * normal_sigma) / sqrt(n), n); */ 282 | g_assert_cmpfloat(normal_mean - (3. * normal_sigma) / sqrt(n), <, mean); 283 | g_assert_cmpfloat(mean, <, normal_mean + (3. * normal_sigma) / sqrt(n)); 284 | } 285 | 286 | void test_left_trunc_normal_var(void) { 287 | ffm_vector *x = ffm_vector_alloc(100000); 288 | int n = x->size; 289 | ffm_rng *kr; 290 | kr = ffm_rng_seed(123); 291 | 292 | double trunc = 5; 293 | double trunc_mean = 0; 294 | double trunc_sigma = 3; 295 | 296 | double alpha = (trunc - trunc_mean) / trunc_sigma; 297 | double lambda_alpha = ffm_normal_pdf(alpha) / (1.0 - ffm_normal_cdf(alpha)); 298 | double delta = lambda_alpha * (lambda_alpha - alpha); 299 | 300 | double org_normal_sigma = sqrt(trunc_sigma * trunc_sigma * (1.0 - delta)); 301 | double org_normal_var = org_normal_sigma * org_normal_sigma; 302 | 303 | double normal_mean = org_normal_var; 304 | double tmp = org_normal_sigma; 305 | double normal_sigma = sqrt(2. * (tmp * tmp * tmp * tmp) / (n - 1.0)); 306 | 307 | // for transformation to Nonstandart Normal Population (Seq. 3.1 & 3.2) 308 | // Barr & Sherrill: Mean and Variance of Truncated Normal Distributions 309 | trunc = (trunc + trunc_mean) / trunc_sigma; 310 | for (int i = 0; i < x->size; i++) 311 | ffm_vector_set( 312 | x, i, trunc_mean + ffm_rand_left_trunc_normal(kr, trunc) * trunc_sigma); 313 | double var = ffm_vector_variance(x); 314 | int n_var = 1; 315 | 316 | /*printf("test %f < %f < %f (n=%d)\n", 317 | normal_mean - (3. * normal_sigma) / sqrt(n_var), var, 318 | normal_mean + (3. * normal_sigma) / sqrt(n_var), n);*/ 319 | g_assert_cmpfloat(normal_mean - (3. * normal_sigma) / sqrt(n_var), <, var); 320 | g_assert_cmpfloat(var, <, normal_mean + (3. * normal_sigma) / sqrt(n_var)); 321 | } 322 | 323 | int main(int argc, char **argv) { 324 | g_test_init(&argc, &argv, NULL); 325 | g_test_add_func("/random/rng/ seed", test_rng_seed); 326 | g_test_add_func("/random/uniform/ mean-test", test_uniform_mean); 327 | g_test_add_func("/random/uniform/ var-test", test_uniform_var); 328 | 329 | g_test_add_func("/random/normal/ mean-test", test_normal_mean); 330 | g_test_add_func("/random/normal/ var-test", test_normal_var); 331 | 332 | g_test_add_func("/random/gamma/ mean-test", test_gamma_mean); 333 | g_test_add_func("/random/gamma/ mean-test (scale < 1)", 334 | test_gamma_mean_small_scale); 335 | g_test_add_func("/random/gamma/ var-test", test_gamma_var); 336 | 337 | g_test_add_func("/random/exp/ mean-test", test_exp_mean); 338 | g_test_add_func("/random/exp/ var-test", test_exp_var); 339 | 340 | g_test_add_func("/random/left trunc normal/ mean-test", 341 | test_left_trunc_normal_mean); 342 | g_test_add_func("/random/left trunc normal/ mean-test (neg trunc)", 343 | test_left_trunc_normal_mean_neg_trunc); 344 | g_test_add_func("/random/left trunc normal/ var-test", 345 | test_left_trunc_normal_var); 346 | 347 | return g_test_run(); 348 | } 349 | -------------------------------------------------------------------------------- /src/ffm_als_mcmc.c: -------------------------------------------------------------------------------- 1 | // Author: Immanuel Bayer 2 | // License: BSD 3 clause 3 | 4 | #include "fast_fm.h" 5 | 6 | 7 | void sparse_predict(ffm_coef *coef, cs *A, ffm_vector *y_pred) { 8 | cs * X = cs_transpose (A, 1); 9 | row_predict(coef, X, y_pred); 10 | cs_spfree (X) ; 11 | } 12 | 13 | void col_predict(ffm_coef *coef, cs *A, ffm_vector *y_pred) { 14 | 15 | // y[:] = w_0 16 | ffm_vector_set_all(y_pred, coef->w_0); 17 | // y += Xw 18 | if (coef->w) cs_gaxpy(A, coef->w->data, y_pred->data); 19 | 20 | // check if second order interactions are used 21 | if (!coef->V) return; 22 | 23 | ffm_matrix *V = coef->V; 24 | int k = V->size0; 25 | 26 | int p, j, f, n, *Ap, *Ai; 27 | double *Ax; 28 | n = A->n; 29 | Ap = A->p; 30 | Ai = A->i; 31 | Ax = A->x; 32 | 33 | ffm_vector *tmp = ffm_vector_alloc(A->m); 34 | // over all k 35 | for (f = 0; f < k; f++) { 36 | ffm_vector_set_all(tmp, 0); 37 | // over all cols 38 | for (j = 0; j < n; j++) { 39 | // all nz in this column 40 | // Ai[p] is the (row) position in the original matrix 41 | // Ax[p] is the value at position Ai[p] 42 | for (p = Ap[j]; p < Ap[j + 1]; p++) { 43 | const int row = Ai[p]; 44 | const double tmp_v = ffm_matrix_get(V, f, j); 45 | const double tmp_x = Ax[p]; 46 | 47 | tmp->data[row] += tmp_v * tmp_x; 48 | y_pred->data[row] -= 0.5 * (tmp_x * tmp_x) * (tmp_v * tmp_v); 49 | } 50 | } 51 | ffm_vector_mul(tmp, tmp); 52 | ffm_blas_daxpy(0.5, tmp, y_pred); 53 | } 54 | ffm_vector_free(tmp); 55 | } 56 | 57 | void row_predict(ffm_coef *coef, cs *X, ffm_vector *y_pred) { 58 | // y[:] = w_0 59 | ffm_vector_set_all(y_pred, coef->w_0); 60 | 61 | // y += Xw 62 | if (coef->w) Cs_row_gaxpy(X, coef->w->data, y_pred->data); 63 | 64 | // check if second order interactions are used 65 | if (!coef->V) return; 66 | eval_second_order_term(coef->V, X, y_pred); 67 | } 68 | 69 | int eval_second_order_term(ffm_matrix *V, cs *A, ffm_vector *y) { 70 | // operate on X.T 71 | int k = V->size0; 72 | 73 | int p, j, f, n, *Ap, *Ai; 74 | double *Ax; 75 | if (!CS_CSC(A)) return (0); /* check inputs */ 76 | n = A->n; 77 | Ap = A->p; 78 | Ai = A->i; 79 | Ax = A->x; 80 | // over all k 81 | for (f = 0; f < k; f++) { 82 | // over all rows 83 | for (j = 0; j < n; j++) { 84 | double tmp_sum = 0; 85 | // all nz in this column 86 | // Ai[p] is the (col) position in the original matrix 87 | // Ax[p] is the value at position Ai[p] 88 | for (p = Ap[j]; p < Ap[j + 1]; p++) { 89 | // V 90 | double tmp_v = ffm_matrix_get(V, f, Ai[p]); 91 | double tmp_x = Ax[p]; 92 | tmp_sum += tmp_x * tmp_v; 93 | y->data[j] -= 0.5 * (tmp_x * tmp_x) * (tmp_v * tmp_v); 94 | } 95 | y->data[j] += 0.5 * (tmp_sum * tmp_sum); 96 | } 97 | } 98 | return (1); 99 | } 100 | 101 | void sparse_fit(ffm_coef *coef, cs *X_train, cs *X_test, ffm_vector *y, 102 | ffm_vector *y_pred_test, ffm_param param) { 103 | int n_features = X_train->n; 104 | int n_samples = X_train->m; 105 | int k = coef->V ? coef->V->size0 : 0; 106 | int n_iter = param.n_iter; 107 | ffm_vector *w = coef->w; 108 | ffm_matrix *V = coef->V; 109 | double *w_0 = &coef->w_0; 110 | 111 | ffm_rng *rng; 112 | if (param.warm_start) { 113 | // The rng seed needs to be different for each warm start to ensure 114 | // proper mixing of the mcmc chain. 115 | rng= ffm_rng_seed(param.rng_seed * param.n_iter % 31); 116 | } else { 117 | rng= ffm_rng_seed(param.rng_seed); 118 | } 119 | 120 | if (!param.warm_start) init_ffm_coef(coef, param); 121 | 122 | // init err = predict - y 123 | ffm_vector *err = ffm_vector_alloc(n_samples); 124 | sparse_predict(coef, X_train, err); 125 | 126 | ffm_vector *z_target = NULL; 127 | if (param.TASK == TASK_CLASSIFICATION) { 128 | z_target = ffm_vector_calloc(n_samples); 129 | 130 | // ffm_vector_normal_cdf(err); 131 | 132 | if (param.SOLVER == SOLVER_MCMC) 133 | sample_target(rng, z_target, z_target, y); 134 | else 135 | map_update_target(z_target, z_target, y); 136 | 137 | // update class err 138 | ffm_blas_daxpy(-1, z_target, err); 139 | } else 140 | ffm_blas_daxpy(-1, y, err); 141 | 142 | // allocate memory for caches 143 | ffm_vector *column_norms = ffm_vector_alloc(n_features); 144 | Cs_col_norm(X_train, column_norms); 145 | ffm_vector *a_theta_v = ffm_vector_calloc(n_samples); 146 | ffm_vector *XV_f = ffm_vector_calloc(n_samples); 147 | ffm_vector *V_f = ffm_vector_calloc(n_features); 148 | // caches that are not always needed 149 | ffm_vector *tmp_predict_test = NULL; 150 | if (param.SOLVER == SOLVER_MCMC) { 151 | tmp_predict_test = ffm_vector_calloc(y_pred_test->size); 152 | if (!param.warm_start) ffm_vector_set_all(y_pred_test, 0); 153 | } 154 | 155 | int n; 156 | for (n = param.iter_count; n < n_iter; n++) { 157 | if (param.SOLVER == SOLVER_MCMC) sample_hyper_parameter(coef, err, rng); 158 | 159 | double tmp_sigma2 = 0; 160 | double tmp_mu = 0; 161 | // learn bias 162 | if (!param.ignore_w_0) { 163 | double w_0_old = coef->w_0; 164 | if (param.SOLVER == SOLVER_MCMC) { 165 | double tmp_sigma2 = 1. / (coef->alpha * n_samples); 166 | double tmp_mu = 167 | tmp_sigma2 * 168 | (coef->alpha * (-ffm_vector_sum(err) + *w_0 * n_samples)); 169 | *w_0 = ffm_rand_normal(rng, tmp_mu, sqrt(tmp_sigma2)); 170 | } else 171 | *w_0 = (-ffm_vector_sum(err) + *w_0 * n_samples) / ((double)n_samples); 172 | assert(isfinite(*w_0) && "w_0 not finite"); 173 | ffm_vector_add_constant(err, +(*w_0 - w_0_old)); // update error 174 | } 175 | 176 | // first order interactions 177 | if (!param.ignore_w) 178 | for (int f = 0; f < n_features; f++) { 179 | double w_f = ffm_vector_get(w, f); 180 | // w[f] = (err.dot(X_f) + w[f] * norm_rows_X[f]) / (norm_rows_X[f] + 181 | // lambda_) 182 | double tmp = Cs_ddot(X_train, f, err->data); 183 | double c_norm = ffm_vector_get(column_norms, f); 184 | double new_w = 0; 185 | if (param.SOLVER == SOLVER_MCMC) { 186 | tmp_sigma2 = 1. / (coef->alpha * c_norm + coef->lambda_w); 187 | tmp_mu = tmp_sigma2 * (coef->alpha * (w_f * c_norm - tmp) + 188 | coef->mu_w * coef->lambda_w); 189 | new_w = ffm_rand_normal(rng, tmp_mu, sqrt(tmp_sigma2)); 190 | } else 191 | new_w = (-tmp + w_f * c_norm) / (c_norm + coef->lambda_w); 192 | assert(isfinite(new_w) && "w not finite"); 193 | ffm_vector_set(w, f, new_w); 194 | Cs_scal_apy(X_train, f, ffm_vector_get(w, f) - w_f, 195 | err->data); // update error 196 | } 197 | 198 | // second order interactions 199 | if (k > 0) { 200 | for (int f = 0; f < k; f++) { 201 | // XV_f = X.dot(V[:,f]) 202 | ffm_vector_set_all(XV_f, 0); 203 | // ffm_matrix_get_row(V_f, V, f); 204 | // cs_gaxpy(X_train, V_f->data, XV_f->data); 205 | double *V_f_ptr = ffm_matrix_get_row_ptr(V, f); 206 | // cache 207 | cs_gaxpy(X_train, V_f_ptr, XV_f->data); 208 | double lambda_V_k = ffm_vector_get(coef->lambda_V, f); 209 | double mu_V_k = ffm_vector_get(coef->mu_V, f); 210 | 211 | for (int l = 0; l < n_features; l++) { 212 | double V_fl = ffm_matrix_get(V, f, l); 213 | double sum_denominator, sum_nominator; 214 | sum_nominator = sum_denominator = 0; 215 | sparse_v_lf_frac(&sum_denominator, &sum_nominator, X_train, l, err, 216 | XV_f, a_theta_v, V_fl); 217 | double new_V_fl = 0; 218 | if (param.SOLVER == SOLVER_MCMC) { 219 | tmp_sigma2 = 1. / (coef->alpha * sum_denominator + lambda_V_k); 220 | tmp_mu = tmp_sigma2 * 221 | (coef->alpha * sum_nominator + mu_V_k * lambda_V_k); 222 | new_V_fl = ffm_rand_normal(rng, tmp_mu, sqrt(tmp_sigma2)); 223 | } else 224 | new_V_fl = sum_nominator / (sum_denominator + lambda_V_k); 225 | assert(isfinite(new_V_fl) && "V not finite"); 226 | ffm_matrix_set(V, f, l, new_V_fl); 227 | // err = err - a_theta * (V[l, f] - V_fl) # update residual 228 | update_second_order_error(l, X_train, a_theta_v, new_V_fl - V_fl, 229 | err); 230 | // update cache 231 | /* y = alpha*A[:,j]*x+y */ 232 | Cs_scal_apy(X_train, l, new_V_fl - V_fl, XV_f->data); 233 | } 234 | } 235 | } 236 | 237 | // recalculate error in order to stop error amplification 238 | // from numerical inexact error and cache updates 239 | 240 | sparse_predict(coef, X_train, err); 241 | if (param.TASK == TASK_CLASSIFICATION) { 242 | // printf("pred\n"); 243 | // ffm_vector_printf(err); 244 | // approximate target 245 | if (param.SOLVER == SOLVER_MCMC) 246 | sample_target(rng, err, z_target, y); 247 | else 248 | map_update_target(err, z_target, y); 249 | // ffm_vector_normal_cdf(err); 250 | ffm_blas_daxpy(-1, z_target, err); 251 | // printf("z_target\n"); 252 | // ffm_vector_printf(z_target); 253 | } else 254 | ffm_blas_daxpy(-1, y, err); 255 | 256 | // save test predictions for posterior mean 257 | if (param.SOLVER == SOLVER_MCMC) { 258 | sparse_predict(coef, X_test, tmp_predict_test); 259 | 260 | if (param.TASK == TASK_CLASSIFICATION) 261 | ffm_vector_normal_cdf(tmp_predict_test); 262 | ffm_vector_update_mean(y_pred_test, n, tmp_predict_test); 263 | } 264 | } 265 | if (param.TASK == TASK_CLASSIFICATION) ffm_vector_free(z_target); 266 | param.iter_count = n; // TODO this gets lost when returning param 267 | ffm_vector_free_all(column_norms, err, a_theta_v, XV_f, V_f); 268 | ffm_rng_free(rng); 269 | } 270 | 271 | void sample_target(ffm_rng *rng, ffm_vector *y_pred, ffm_vector *z_target, 272 | ffm_vector *y_true) { 273 | assert((y_pred->size == z_target->size && y_pred->size == y_true->size) && 274 | "vectors have different length"); 275 | for (int i = 0; i < y_pred->size; i++) { 276 | double mu = fabs(y_pred->data[i]); 277 | double t_gaussian = ffm_rand_left_trunc_normal(rng, mu); 278 | if (y_true->data[i] > 0) // left truncated 279 | z_target->data[i] = +t_gaussian; 280 | else // right truncated 281 | z_target->data[i] = -t_gaussian; 282 | } 283 | } 284 | 285 | void map_update_target(ffm_vector *y_pred, ffm_vector *z_target, 286 | ffm_vector *y_true) { 287 | assert((y_pred->size == z_target->size && y_pred->size == y_true->size) && 288 | "vectors have different length"); 289 | for (int i = 0; i < y_pred->size; i++) { 290 | double mu = y_pred->data[i]; 291 | if (y_true->data[i] > 0) // left truncated 292 | z_target->data[i] = ffm_normal_pdf(-mu) / (1.0 - ffm_normal_cdf(-mu)); 293 | else // right truncated 294 | z_target->data[i] = -(ffm_normal_pdf(-mu) / ffm_normal_cdf(-mu)); 295 | } 296 | } 297 | 298 | void sample_hyper_parameter(ffm_coef *coef, ffm_vector *err, ffm_rng *rng) { 299 | int n_features = coef->w->size; 300 | int n_samples = err->size; 301 | int k = coef->V ? coef->V->size0 : 0; 302 | 303 | /* 304 | printf("alpah%f, lambda_w%f, mu_w%f", 305 | coef->alpha, coef->lambda_w, coef->mu_w); 306 | if (k> 0) 307 | { 308 | ffm_vector_printf(coef->mu_V); 309 | ffm_vector_printf(coef->lambda_V); 310 | } 311 | */ 312 | 313 | ffm_vector *w = coef->w; 314 | ffm_matrix *V = coef->V; 315 | 316 | // sample alpha 317 | double alpha_n = .5 * (1. + n_samples); 318 | double l2_norm = ffm_blas_dnrm2(err); 319 | double beta_n = .5 * (l2_norm * l2_norm + 1.); 320 | coef->alpha = ffm_rand_gamma(rng, alpha_n, 1. / beta_n); 321 | 322 | // sample lambda's 323 | double alpha_w = 0.5 * (1 + n_features + 1); 324 | double beta_w = 0; 325 | for (int i = 0; i < n_features; i++) 326 | beta_w += +ffm_pow_2(ffm_vector_get(w, i) - coef->mu_w); 327 | beta_w += ffm_pow_2(coef->mu_w) + 1; 328 | beta_w *= 0.5; 329 | coef->lambda_w = ffm_rand_gamma(rng, alpha_w, 1. / beta_w); 330 | 331 | double alpha_V = 0.5 * (1 + n_features + 1); 332 | for (int j = 0; j < k; j++) { 333 | double beta_V_fl = 0; 334 | double mu_V_j = ffm_vector_get(coef->mu_V, j); 335 | for (int i = 0; i < n_features; i++) 336 | beta_V_fl += +ffm_pow_2(ffm_matrix_get(V, j, i) - mu_V_j); 337 | beta_V_fl += ffm_pow_2(mu_V_j) + 1; 338 | beta_V_fl *= 0.5; 339 | ffm_vector_set(coef->lambda_V, j, 340 | ffm_rand_gamma(rng, alpha_V, 1. / beta_V_fl)); 341 | } 342 | 343 | // sample mu's 344 | double sigma2_mu_w = 1.0 / ((n_features + 1) * coef->lambda_w); 345 | double w_sum = 0; 346 | for (int i = 0; i < n_features; i++) w_sum += ffm_vector_get(w, i); 347 | double mu_mu_w = w_sum / (n_features + 1); 348 | coef->mu_w = ffm_rand_normal(rng, mu_mu_w, sqrt(sigma2_mu_w)); 349 | 350 | for (int j = 0; j < k; j++) { 351 | double sigma2_mu_v = 352 | 1.0 / ((n_features + 1) * ffm_vector_get(coef->lambda_V, j)); 353 | double v_sum = 0; 354 | for (int i = 0; i < n_features; i++) v_sum += ffm_matrix_get(V, j, i); 355 | double mu_mu_v = v_sum / (n_features + 1); 356 | ffm_vector_set(coef->mu_V, j, 357 | ffm_rand_normal(rng, mu_mu_v, sqrt(sigma2_mu_v))); 358 | } 359 | } 360 | 361 | void update_second_order_error(int j_column, cs *A, ffm_vector *a_theta_v, 362 | double delta, ffm_vector *error) { 363 | int p, *Ap, *Ai; 364 | Ap = A->p; 365 | Ai = A->i; 366 | 367 | // iterate over all nz elements of column j 368 | // Ai [p] original row pos 369 | for (p = Ap[j_column]; p < Ap[j_column + 1]; p++) 370 | error->data[Ai[p]] += delta * a_theta_v->data[Ai[p]]; 371 | } 372 | 373 | void sparse_v_lf_frac(double *sum_denominator, double *sum_nominator, cs *A, 374 | int col_index, ffm_vector *err, ffm_vector *cache, 375 | ffm_vector *a_theta_v, double v_lf) { 376 | int p, j, *Ap, *Ai; 377 | double *Ax; 378 | // if (!CS_CSC (A)) return (0) ; /* check inputs */ 379 | Ap = A->p; 380 | Ai = A->i; 381 | Ax = A->x; 382 | j = col_index; 383 | // for (j = 0 ; j < n ; j++) 384 | //{ 385 | for (p = Ap[j]; p < Ap[j + 1]; p++) { 386 | double A_jp = Ax[p]; 387 | double a_theta = A_jp * cache->data[Ai[p]] - (v_lf * A_jp * A_jp); 388 | a_theta_v->data[Ai[p]] = a_theta; 389 | *sum_denominator += a_theta * a_theta; 390 | *sum_nominator += (v_lf * a_theta - err->data[Ai[p]]) * a_theta; 391 | } 392 | // } 393 | } 394 | -------------------------------------------------------------------------------- /src/kmath.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "kmath.h" 5 | 6 | /************************************** 7 | *** Pseudo-random number generator *** 8 | **************************************/ 9 | 10 | /* 11 | 64-bit Mersenne Twister pseudorandom number generator. Adapted from: 12 | 13 | http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/VERSIONS/C-LANG/mt19937-64.c 14 | 15 | which was written by Takuji Nishimura and Makoto Matsumoto and released 16 | under the 3-clause BSD license. 17 | */ 18 | 19 | #define KR_NN 312 20 | #define KR_MM 156 21 | #define KR_UM 0xFFFFFFFF80000000ULL /* Most significant 33 bits */ 22 | #define KR_LM 0x7FFFFFFFULL /* Least significant 31 bits */ 23 | #define M_SQRT2 1.41421356237309504880 /*-- sqrt(2) */ 24 | 25 | struct _krand_t { 26 | int mti; 27 | krint64_t mt[KR_NN]; 28 | }; 29 | 30 | static void kr_srand0(krint64_t seed, krand_t *kr) 31 | { 32 | kr->mt[0] = seed; 33 | for (kr->mti = 1; kr->mti < KR_NN; ++kr->mti) 34 | kr->mt[kr->mti] = 6364136223846793005ULL * (kr->mt[kr->mti - 1] ^ (kr->mt[kr->mti - 1] >> 62)) + kr->mti; 35 | } 36 | 37 | krand_t *kr_srand(krint64_t seed) 38 | { 39 | krand_t *kr; 40 | kr = malloc(sizeof(krand_t)); 41 | kr_srand0(seed, kr); 42 | return kr; 43 | } 44 | 45 | krint64_t kr_rand(krand_t *kr) 46 | { 47 | krint64_t x; 48 | static const krint64_t mag01[2] = { 0, 0xB5026F5AA96619E9ULL }; 49 | if (kr->mti >= KR_NN) { 50 | int i; 51 | if (kr->mti == KR_NN + 1) kr_srand0(5489ULL, kr); 52 | for (i = 0; i < KR_NN - KR_MM; ++i) { 53 | x = (kr->mt[i] & KR_UM) | (kr->mt[i+1] & KR_LM); 54 | kr->mt[i] = kr->mt[i + KR_MM] ^ (x>>1) ^ mag01[(int)(x&1)]; 55 | } 56 | for (; i < KR_NN - 1; ++i) { 57 | x = (kr->mt[i] & KR_UM) | (kr->mt[i+1] & KR_LM); 58 | kr->mt[i] = kr->mt[i + (KR_MM - KR_NN)] ^ (x>>1) ^ mag01[(int)(x&1)]; 59 | } 60 | x = (kr->mt[KR_NN - 1] & KR_UM) | (kr->mt[0] & KR_LM); 61 | kr->mt[KR_NN - 1] = kr->mt[KR_MM - 1] ^ (x>>1) ^ mag01[(int)(x&1)]; 62 | kr->mti = 0; 63 | } 64 | x = kr->mt[kr->mti++]; 65 | x ^= (x >> 29) & 0x5555555555555555ULL; 66 | x ^= (x << 17) & 0x71D67FFFEDA60000ULL; 67 | x ^= (x << 37) & 0xFFF7EEE000000000ULL; 68 | x ^= (x >> 43); 69 | return x; 70 | } 71 | 72 | #ifdef _KR_MAIN 73 | int main(int argc, char *argv[]) 74 | { 75 | long i, N = 200000000; 76 | krand_t *kr; 77 | if (argc > 1) N = atol(argv[1]); 78 | kr = kr_srand(11); 79 | for (i = 0; i < N; ++i) kr_rand(kr); 80 | // for (i = 0; i < N; ++i) lrand48(); 81 | free(kr); 82 | return 0; 83 | } 84 | #endif 85 | 86 | /* ***************************** 87 | *** Non-linear programming *** 88 | ******************************/ 89 | 90 | /* Hooke-Jeeves algorithm for nonlinear minimization 91 | 92 | Based on the pseudocodes by Bell and Pike (CACM 9(9):684-685), and 93 | the revision by Tomlin and Smith (CACM 12(11):637-638). Both of the 94 | papers are comments on Kaupe's Algorithm 178 "Direct Search" (ACM 95 | 6(6):313-314). The original algorithm was designed by Hooke and 96 | Jeeves (ACM 8:212-229). This program is further revised according to 97 | Johnson's implementation at Netlib (opt/hooke.c). 98 | 99 | Hooke-Jeeves algorithm is very simple and it works quite well on a 100 | few examples. However, it might fail to converge due to its heuristic 101 | nature. A possible improvement, as is suggested by Johnson, may be to 102 | choose a small r at the beginning to quickly approach to the minimum 103 | and a large r at later step to hit the minimum. 104 | */ 105 | 106 | static double __kmin_hj_aux(kmin_f func, int n, double *x1, void *data, double fx1, double *dx, int *n_calls) 107 | { 108 | int k, j = *n_calls; 109 | double ftmp; 110 | for (k = 0; k != n; ++k) { 111 | x1[k] += dx[k]; 112 | ftmp = func(n, x1, data); ++j; 113 | if (ftmp < fx1) fx1 = ftmp; 114 | else { /* search the opposite direction */ 115 | dx[k] = 0.0 - dx[k]; 116 | x1[k] += dx[k] + dx[k]; 117 | ftmp = func(n, x1, data); ++j; 118 | if (ftmp < fx1) fx1 = ftmp; 119 | else x1[k] -= dx[k]; /* back to the original x[k] */ 120 | } 121 | } 122 | *n_calls = j; 123 | return fx1; /* here: fx1=f(n,x1) */ 124 | } 125 | 126 | double kmin_hj(kmin_f func, int n, double *x, void *data, double r, double eps, int max_calls) 127 | { 128 | double fx, fx1, *x1, *dx, radius; 129 | int k, n_calls = 0; 130 | x1 = (double*)calloc(n, sizeof(double)); 131 | dx = (double*)calloc(n, sizeof(double)); 132 | for (k = 0; k != n; ++k) { /* initial directions, based on MGJ */ 133 | dx[k] = fabs(x[k]) * r; 134 | if (dx[k] == 0) dx[k] = r; 135 | } 136 | radius = r; 137 | fx1 = fx = func(n, x, data); ++n_calls; 138 | for (;;) { 139 | memcpy(x1, x, n * sizeof(double)); /* x1 = x */ 140 | fx1 = __kmin_hj_aux(func, n, x1, data, fx, dx, &n_calls); 141 | while (fx1 < fx) { 142 | for (k = 0; k != n; ++k) { 143 | double t = x[k]; 144 | dx[k] = x1[k] > x[k]? fabs(dx[k]) : 0.0 - fabs(dx[k]); 145 | x[k] = x1[k]; 146 | x1[k] = x1[k] + x1[k] - t; 147 | } 148 | fx = fx1; 149 | if (n_calls >= max_calls) break; 150 | fx1 = func(n, x1, data); ++n_calls; 151 | fx1 = __kmin_hj_aux(func, n, x1, data, fx1, dx, &n_calls); 152 | if (fx1 >= fx) break; 153 | for (k = 0; k != n; ++k) 154 | if (fabs(x1[k] - x[k]) > .5 * fabs(dx[k])) break; 155 | if (k == n) break; 156 | } 157 | if (radius >= eps) { 158 | if (n_calls >= max_calls) break; 159 | radius *= r; 160 | for (k = 0; k != n; ++k) dx[k] *= r; 161 | } else break; /* converge */ 162 | } 163 | free(x1); free(dx); 164 | return fx1; 165 | } 166 | 167 | // I copied this function somewhere several years ago with some of my modifications, but I forgot the source. 168 | double kmin_brent(kmin1_f func, double a, double b, void *data, double tol, double *xmin) 169 | { 170 | double bound, u, r, q, fu, tmp, fa, fb, fc, c; 171 | const double gold1 = 1.6180339887; 172 | const double gold2 = 0.3819660113; 173 | const double tiny = 1e-20; 174 | const int max_iter = 100; 175 | 176 | double e, d, w, v, mid, tol1, tol2, p, eold, fv, fw; 177 | int iter; 178 | 179 | fa = func(a, data); fb = func(b, data); 180 | if (fb > fa) { // swap, such that f(a) > f(b) 181 | tmp = a; a = b; b = tmp; 182 | tmp = fa; fa = fb; fb = tmp; 183 | } 184 | c = b + gold1 * (b - a), fc = func(c, data); // golden section extrapolation 185 | while (fb > fc) { 186 | bound = b + 100.0 * (c - b); // the farthest point where we want to go 187 | r = (b - a) * (fb - fc); 188 | q = (b - c) * (fb - fa); 189 | if (fabs(q - r) < tiny) { // avoid 0 denominator 190 | tmp = q > r? tiny : 0.0 - tiny; 191 | } else tmp = q - r; 192 | u = b - ((b - c) * q - (b - a) * r) / (2.0 * tmp); // u is the parabolic extrapolation point 193 | if ((b > u && u > c) || (b < u && u < c)) { // u lies between b and c 194 | fu = func(u, data); 195 | if (fu < fc) { // (b,u,c) bracket the minimum 196 | a = b; b = u; fa = fb; fb = fu; 197 | break; 198 | } else if (fu > fb) { // (a,b,u) bracket the minimum 199 | c = u; fc = fu; 200 | break; 201 | } 202 | u = c + gold1 * (c - b); fu = func(u, data); // golden section extrapolation 203 | } else if ((c > u && u > bound) || (c < u && u < bound)) { // u lies between c and bound 204 | fu = func(u, data); 205 | if (fu < fc) { // fb > fc > fu 206 | b = c; c = u; u = c + gold1 * (c - b); 207 | fb = fc; fc = fu; fu = func(u, data); 208 | } else { // (b,c,u) bracket the minimum 209 | a = b; b = c; c = u; 210 | fa = fb; fb = fc; fc = fu; 211 | break; 212 | } 213 | } else if ((u > bound && bound > c) || (u < bound && bound < c)) { // u goes beyond the bound 214 | u = bound; fu = func(u, data); 215 | } else { // u goes the other way around, use golden section extrapolation 216 | u = c + gold1 * (c - b); fu = func(u, data); 217 | } 218 | a = b; b = c; c = u; 219 | fa = fb; fb = fc; fc = fu; 220 | } 221 | if (a > c) u = a, a = c, c = u; // swap 222 | 223 | // now, afb and fb tol1) { 233 | // related to parabolic interpolation 234 | r = (b - w) * (fb - fv); 235 | q = (b - v) * (fb - fw); 236 | p = (b - v) * q - (b - w) * r; 237 | q = 2.0 * (q - r); 238 | if (q > 0.0) p = 0.0 - p; 239 | else q = 0.0 - q; 240 | eold = e; e = d; 241 | if (fabs(p) >= fabs(0.5 * q * eold) || p <= q * (a - b) || p >= q * (c - b)) { 242 | d = gold2 * (e = (b >= mid ? a - b : c - b)); 243 | } else { 244 | d = p / q; u = b + d; // actual parabolic interpolation happens here 245 | if (u - a < tol2 || c - u < tol2) 246 | d = (mid > b)? tol1 : 0.0 - tol1; 247 | } 248 | } else d = gold2 * (e = (b >= mid ? a - b : c - b)); // golden section interpolation 249 | u = fabs(d) >= tol1 ? b + d : b + (d > 0.0? tol1 : -tol1); 250 | fu = func(u, data); 251 | if (fu <= fb) { // u is the minimum point so far 252 | if (u >= b) a = b; 253 | else c = b; 254 | v = w; w = b; b = u; fv = fw; fw = fb; fb = fu; 255 | } else { // adjust (a,c) and (u,v,w) 256 | if (u < b) a = u; 257 | else c = u; 258 | if (fu <= fw || w == b) { 259 | v = w; w = u; 260 | fv = fw; fw = fu; 261 | } else if (fu <= fv || v == b || v == w) { 262 | v = u; fv = fu; 263 | } 264 | } 265 | } 266 | *xmin = b; 267 | return fb; 268 | } 269 | 270 | /************************* 271 | *** Special functions *** 272 | *************************/ 273 | 274 | /* Log gamma function 275 | * \log{\Gamma(z)} 276 | * AS245, 2nd algorithm, http://lib.stat.cmu.edu/apstat/245 277 | */ 278 | double kf_lgamma(double z) 279 | { 280 | double x = 0; 281 | x += 0.1659470187408462e-06 / (z+7); 282 | x += 0.9934937113930748e-05 / (z+6); 283 | x -= 0.1385710331296526 / (z+5); 284 | x += 12.50734324009056 / (z+4); 285 | x -= 176.6150291498386 / (z+3); 286 | x += 771.3234287757674 / (z+2); 287 | x -= 1259.139216722289 / (z+1); 288 | x += 676.5203681218835 / z; 289 | x += 0.9999999999995183; 290 | return log(x) - 5.58106146679532777 - z + (z-0.5) * log(z+6.5); 291 | } 292 | 293 | /* complementary error function 294 | * \frac{2}{\sqrt{\pi}} \int_x^{\infty} e^{-t^2} dt 295 | * AS66, 2nd algorithm, http://lib.stat.cmu.edu/apstat/66 296 | */ 297 | double kf_erfc(double x) 298 | { 299 | const double p0 = 220.2068679123761; 300 | const double p1 = 221.2135961699311; 301 | const double p2 = 112.0792914978709; 302 | const double p3 = 33.912866078383; 303 | const double p4 = 6.37396220353165; 304 | const double p5 = .7003830644436881; 305 | const double p6 = .03526249659989109; 306 | const double q0 = 440.4137358247522; 307 | const double q1 = 793.8265125199484; 308 | const double q2 = 637.3336333788311; 309 | const double q3 = 296.5642487796737; 310 | const double q4 = 86.78073220294608; 311 | const double q5 = 16.06417757920695; 312 | const double q6 = 1.755667163182642; 313 | const double q7 = .08838834764831844; 314 | double expntl, z, p; 315 | z = fabs(x) * M_SQRT2; 316 | if (z > 37.) return x > 0.? 0. : 2.; 317 | expntl = exp(z * z * - .5); 318 | if (z < 10. / M_SQRT2) // for small z 319 | p = expntl * ((((((p6 * z + p5) * z + p4) * z + p3) * z + p2) * z + p1) * z + p0) 320 | / (((((((q7 * z + q6) * z + q5) * z + q4) * z + q3) * z + q2) * z + q1) * z + q0); 321 | else p = expntl / 2.506628274631001 / (z + 1. / (z + 2. / (z + 3. / (z + 4. / (z + .65))))); 322 | return x > 0.? 2. * p : 2. * (1. - p); 323 | } 324 | 325 | /* The following computes regularized incomplete gamma functions. 326 | * Formulas are taken from Wiki, with additional input from Numerical 327 | * Recipes in C (for modified Lentz's algorithm) and AS245 328 | * (http://lib.stat.cmu.edu/apstat/245). 329 | * 330 | * A good online calculator is available at: 331 | * 332 | * http://www.danielsoper.com/statcalc/calc23.aspx 333 | * 334 | * It calculates upper incomplete gamma function, which equals 335 | * kf_gammaq(s,z)*tgamma(s). 336 | */ 337 | 338 | #define KF_GAMMA_EPS 1e-14 339 | #define KF_TINY 1e-290 340 | 341 | // regularized lower incomplete gamma function, by series expansion 342 | static double _kf_gammap(double s, double z) 343 | { 344 | double sum, x; 345 | int k; 346 | for (k = 1, sum = x = 1.; k < 100; ++k) { 347 | sum += (x *= z / (s + k)); 348 | if (x / sum < KF_GAMMA_EPS) break; 349 | } 350 | return exp(s * log(z) - z - kf_lgamma(s + 1.) + log(sum)); 351 | } 352 | // regularized upper incomplete gamma function, by continued fraction 353 | static double _kf_gammaq(double s, double z) 354 | { 355 | int j; 356 | double C, D, f; 357 | f = 1. + z - s; C = f; D = 0.; 358 | // Modified Lentz's algorithm for computing continued fraction 359 | // See Numerical Recipes in C, 2nd edition, section 5.2 360 | for (j = 1; j < 100; ++j) { 361 | double a = j * (s - j), b = (j<<1) + 1 + z - s, d; 362 | D = b + a * D; 363 | if (D < KF_TINY) D = KF_TINY; 364 | C = b + a / C; 365 | if (C < KF_TINY) C = KF_TINY; 366 | D = 1. / D; 367 | d = C * D; 368 | f *= d; 369 | if (fabs(d - 1.) < KF_GAMMA_EPS) break; 370 | } 371 | return exp(s * log(z) - z - kf_lgamma(s) - log(f)); 372 | } 373 | 374 | double kf_gammap(double s, double z) 375 | { 376 | return z <= 1. || z < s? _kf_gammap(s, z) : 1. - _kf_gammaq(s, z); 377 | } 378 | 379 | double kf_gammaq(double s, double z) 380 | { 381 | return z <= 1. || z < s? 1. - _kf_gammap(s, z) : _kf_gammaq(s, z); 382 | } 383 | 384 | /* Regularized incomplete beta function. The method is taken from 385 | * Numerical Recipe in C, 2nd edition, section 6.4. The following web 386 | * page calculates the incomplete beta function, which equals 387 | * kf_betai(a,b,x) * gamma(a) * gamma(b) / gamma(a+b): 388 | * 389 | * http://www.danielsoper.com/statcalc/calc36.aspx 390 | */ 391 | static double kf_betai_aux(double a, double b, double x) 392 | { 393 | double C, D, f; 394 | int j; 395 | if (x == 0.) return 0.; 396 | if (x == 1.) return 1.; 397 | f = 1.; C = f; D = 0.; 398 | // Modified Lentz's algorithm for computing continued fraction 399 | for (j = 1; j < 200; ++j) { 400 | double aa, d; 401 | int m = j>>1; 402 | aa = (j&1)? -(a + m) * (a + b + m) * x / ((a + 2*m) * (a + 2*m + 1)) 403 | : m * (b - m) * x / ((a + 2*m - 1) * (a + 2*m)); 404 | D = 1. + aa * D; 405 | if (D < KF_TINY) D = KF_TINY; 406 | C = 1. + aa / C; 407 | if (C < KF_TINY) C = KF_TINY; 408 | D = 1. / D; 409 | d = C * D; 410 | f *= d; 411 | if (fabs(d - 1.) < KF_GAMMA_EPS) break; 412 | } 413 | return exp(kf_lgamma(a+b) - kf_lgamma(a) - kf_lgamma(b) + a * log(x) + b * log(1.-x)) / a / f; 414 | } 415 | double kf_betai(double a, double b, double x) 416 | { 417 | return x < (a + 1.) / (a + b + 2.)? kf_betai_aux(a, b, x) : 1. - kf_betai_aux(b, a, 1. - x); 418 | } 419 | 420 | /****************** 421 | *** Statistics *** 422 | ******************/ 423 | 424 | double km_ks_dist(int na, const double a[], int nb, const double b[]) // a[] and b[] MUST BE sorted 425 | { 426 | int ia = 0, ib = 0; 427 | double fa = 0, fb = 0, sup = 0, na1 = 1. / na, nb1 = 1. / nb; 428 | while (ia < na || ib < nb) { 429 | if (ia == na) fb += nb1, ++ib; 430 | else if (ib == nb) fa += na1, ++ia; 431 | else if (a[ia] < b[ib]) fa += na1, ++ia; 432 | else if (a[ia] > b[ib]) fb += nb1, ++ib; 433 | else fa += na1, fb += nb1, ++ia, ++ib; 434 | if (sup < fabs(fa - fb)) sup = fabs(fa - fb); 435 | } 436 | return sup; 437 | } 438 | 439 | #ifdef KF_MAIN 440 | #include 441 | #include "ksort.h" 442 | KSORT_INIT_GENERIC(double) 443 | int main(int argc, char *argv[]) 444 | { 445 | double x = 5.5, y = 3; 446 | double a, b; 447 | double xx[] = {0.22, -0.87, -2.39, -1.79, 0.37, -1.54, 1.28, -0.31, -0.74, 1.72, 0.38, -0.17, -0.62, -1.10, 0.30, 0.15, 2.30, 0.19, -0.50, -0.09}; 448 | double yy[] = {-5.13, -2.19, -2.43, -3.83, 0.50, -3.25, 4.32, 1.63, 5.18, -0.43, 7.11, 4.87, -3.10, -5.81, 3.76, 6.31, 2.58, 0.07, 5.76, 3.50}; 449 | ks_introsort(double, 20, xx); ks_introsort(double, 20, yy); 450 | printf("K-S distance: %f\n", km_ks_dist(20, xx, 20, yy)); 451 | printf("erfc(%lg): %lg, %lg\n", x, erfc(x), kf_erfc(x)); 452 | printf("upper-gamma(%lg,%lg): %lg\n", x, y, kf_gammaq(y, x)*tgamma(y)); 453 | a = 2; b = 2; x = 0.5; 454 | printf("incomplete-beta(%lg,%lg,%lg): %lg\n", a, b, x, kf_betai(a, b, x) / exp(kf_lgamma(a+b) - kf_lgamma(a) - kf_lgamma(b))); 455 | return 0; 456 | } 457 | #endif 458 | -------------------------------------------------------------------------------- /src/tests/test_ffm_utils.c: -------------------------------------------------------------------------------- 1 | #include "fast_fm.h" 2 | #include 3 | 4 | void test_ffm_vector_mean(void) { 5 | double data[] = {1, 2, -3, 4, 5}; 6 | ffm_vector x = {.data = data, .size = 5}; 7 | double mean = ffm_vector_mean(&x); 8 | double ref_mean = 1.8; 9 | g_assert_cmpfloat(fabs(ref_mean - mean), <=, 1e-15); 10 | } 11 | 12 | void test_ffm_vector_variance(void) { 13 | double data[] = {-1, 2, 3, 4, 5}; 14 | ffm_vector x = {.data = data, .size = 5}; 15 | double var = ffm_vector_variance(&x); 16 | double ref_var = 4.24; 17 | g_assert_cmpfloat(fabs(ref_var - var), <=, 1e-15); 18 | } 19 | 20 | void test_ffm_normal_cdf(void) { 21 | double x[] = {0.1, 1.2, 2.3, -1.1, -3.3, -7.7}; 22 | double Phi[] = {.539827837277028981, .884930329778291731, 23 | .989275889978324194, .1356660609463826751, 24 | .0004834241423837772, .0000000000000068033}; 25 | for (int i = 0; i < 6; i++) 26 | g_assert_cmpfloat(fabs(ffm_normal_cdf(x[i]) - Phi[i]), <=, 1e-15); 27 | } 28 | void test_ffm_normal_pdf(void) { 29 | double x[] = {0.1, 1.2, 2.3, -1.1, -3.3, -7.7}; 30 | double phi[] = {3.96952547477011808e-01, 1.94186054983212952e-01, 31 | 2.83270377416011861e-02, 2.17852177032550526e-01, 32 | 1.72256893905368123e-03, 5.32414837225295172e-14}; 33 | for (int i = 0; i < 6; i++) 34 | g_assert_cmpfloat(fabs(ffm_normal_pdf(x[i]) - phi[i]), <=, 1e-15); 35 | } 36 | void test_ffm_vector_update_mean(void) { 37 | double true_mean[] = {5, -100, 1}; 38 | double update0[] = {5, 100, 10}; 39 | double update1[] = {7, 0, -7}; 40 | double update2[] = {3, -400, 0}; 41 | ffm_vector y_true_mean = {.data = true_mean, .size = 3}; 42 | ffm_vector y_update0 = {.data = update0, .size = 3}; 43 | ffm_vector y_update1 = {.data = update1, .size = 3}; 44 | ffm_vector y_update2 = {.data = update2, .size = 3}; 45 | ffm_vector *y_running_mean = ffm_vector_calloc(3); 46 | 47 | ffm_vector_update_mean(y_running_mean, 0, &y_update0); 48 | ffm_vector_update_mean(y_running_mean, 1, &y_update1); 49 | ffm_vector_update_mean(y_running_mean, 2, &y_update2); 50 | 51 | for (int i = 0; i < 3; i++) 52 | g_assert_cmpfloat(ffm_vector_get(y_running_mean, i), ==, 53 | ffm_vector_get(&y_true_mean, i)); 54 | ffm_vector_free(y_running_mean); 55 | } 56 | 57 | void test_ffm_vector_kendall_tau(void) { 58 | double order[] = {1, 2, 3, 4, 5}; 59 | double order_wrong[] = {5, 3, 4, 2, 1}; 60 | double order_inv[] = {5, 4, 3, 2, 1}; 61 | ffm_vector y_order = {.data = order, .size = 5}; 62 | ffm_vector y_inv = {.data = order_inv, .size = 5}; 63 | ffm_vector y_wrong = {.data = order_wrong, .size = 5}; 64 | 65 | g_assert_cmpfloat(ffm_vector_kendall_tau(&y_order, &y_order), ==, 1); 66 | g_assert_cmpfloat(ffm_vector_kendall_tau(&y_order, &y_inv), ==, -1); 67 | g_assert_cmpfloat(ffm_vector_kendall_tau(&y_order, &y_wrong), !=, -1); 68 | } 69 | 70 | void test_ffm_vector_get_order(void) { 71 | double values[] = {1, 2, 5.5, 20, 3}; 72 | double order[] = {0, 1, 4, 2, 3}; 73 | ffm_vector y_values = {.data = values, .size = 5}; 74 | ffm_vector *y_order = ffm_vector_get_order(&y_values); 75 | for (int i = 0; i < y_order->size; i++) 76 | assert(ffm_vector_get(y_order, i) == order[i]); 77 | } 78 | 79 | void test_ffm_vector_to_rank_comparision(void) { 80 | double y_inc[] = {1, 2, 3}; 81 | ffm_vector y = {.data = y_inc, .size = 3}; 82 | ffm_matrix *comparison_inc = ffm_vector_to_rank_comparision(&y); 83 | for (int i = 0; i < comparison_inc->size0; i++) 84 | assert(ffm_matrix_get(comparison_inc, i, 0) > 85 | ffm_matrix_get(comparison_inc, i, 1)); 86 | 87 | double y_dec[] = {3, 2, 1}; 88 | y.data = y_dec; 89 | ffm_matrix *comparison_dec = ffm_vector_to_rank_comparision(&y); 90 | for (int i = 0; i < comparison_dec->size0; i++) 91 | assert(ffm_matrix_get(comparison_dec, i, 0) < 92 | ffm_matrix_get(comparison_dec, i, 1)); 93 | ffm_matrix_free(comparison_dec); 94 | ffm_matrix_free(comparison_inc); 95 | } 96 | 97 | void test_ffm_average_precision_at_cutoff(void) { 98 | double org_d[] = {1, 2, 3, 4, 5}; 99 | ffm_vector org = {.data = org_d, .size = 5}; 100 | double pred_d[] = {6, 4, 7, 1, 2}; 101 | ffm_vector pred = {.data = pred_d, .size = 5}; 102 | g_assert_cmpfloat(ffm_average_precision_at_cutoff(&org, &pred, 2), ==, 0.25); 103 | double pred_d2[] = {1, 1, 1, 1, 1, 1}; 104 | pred.data = pred_d2; 105 | g_assert_cmpfloat(ffm_average_precision_at_cutoff(&org, &pred, 5), ==, 0.2); 106 | double pred_d3[] = {1, 2, 3, 1, 1, 1}; 107 | pred.data = pred_d3; 108 | g_assert_cmpfloat(ffm_average_precision_at_cutoff(&org, &pred, 3), ==, 1.0); 109 | } 110 | 111 | void test_ffm_sigmoid(void) { 112 | g_assert_cmpfloat(ffm_sigmoid(-100000), <, 1e-16); 113 | g_assert_cmpfloat(fabs(ffm_sigmoid(100000) - 1), <, 1e-16); 114 | g_assert_cmpfloat(fabs(ffm_sigmoid(0) - .5), <, 1e-16); 115 | } 116 | 117 | void test_ffm_vector_accuracy(void) { 118 | double labels[] = {1, 1, -1, -1}; 119 | double labels_wrong[] = {-1, -1, 1, 1}; 120 | double labels_half[] = {-1, 1, 1, -1}; 121 | double labels_probas[] = {0.55, 0.9, .01, .48}; 122 | ffm_vector org = {.data = labels, .size = 4}; 123 | ffm_vector wrong = {.data = labels_wrong, .size = 4}; 124 | ffm_vector half = {.data = labels_half, .size = 4}; 125 | ffm_vector probas = {.data = labels_probas, .size = 4}; 126 | 127 | g_assert_cmpfloat(ffm_vector_accuracy(&org, &wrong), ==, 0); 128 | g_assert_cmpfloat(ffm_vector_accuracy(&org, &org), ==, 1); 129 | g_assert_cmpfloat(ffm_vector_accuracy(&org, &half), ==, 0.5); 130 | g_assert_cmpfloat(ffm_vector_accuracy(&org, &probas), ==, 1); 131 | } 132 | 133 | void test_ffm_vector_median(void) { 134 | double values[] = {5.1, 10.0, 1.1, 2.2, -2}; 135 | ffm_vector v = {.data = values, .size = 5}; 136 | double median = ffm_vector_median(&v); 137 | assert(median == 2.2); 138 | 139 | double values_even[] = {5.1, 10.0, 1.1, -2}; 140 | ffm_vector v_even = {.data = values_even, .size = 4}; 141 | double median_even = ffm_vector_median(&v_even); 142 | g_assert_cmpfloat(fabs(median_even - 3.1), <, 1e-9); 143 | } 144 | void test_ffm_vector_make_labels(void) { 145 | double values[] = {5.1, 10.0, 1.1, -2}; 146 | ffm_vector v = {.data = values, .size = 4}; 147 | double labels[] = {1, 1, -1, -1}; 148 | ffm_vector_make_labels(&v); 149 | for (int i = 0; i < v.size; i++) 150 | g_assert_cmpfloat(labels[i], ==, ffm_vector_get(&v, i)); 151 | } 152 | void test_ffm_vector_sort(void) { 153 | double values[] = {5.1, 10.0, 1.1, 2.2, -2}; 154 | ffm_vector v = {.data = values, .size = 5}; 155 | double values_sorted[] = {-2, 1.1, 2.2, 5.1, 10.0}; 156 | ffm_vector_sort(&v); 157 | for (int i = 0; i < 5; i++) assert(ffm_vector_get(&v, i) == values_sorted[i]); 158 | } 159 | 160 | void test_ffm_r2_score(void) { 161 | double y_true_d[] = {3, -.5, 2, 7}; 162 | ffm_vector y_true = {.data = y_true_d, .size = 4}; 163 | double y_pred_d[] = {2.5, 0, 2, 8}; 164 | ffm_vector y_pred = {.data = y_pred_d, .size = 4}; 165 | 166 | g_assert_cmpfloat(fabs(0.94860 - ffm_r2_score(&y_true, &y_pred)), <, 1e-3); 167 | } 168 | 169 | void test_ffm_vector_sum(void) { 170 | ffm_vector *v = ffm_vector_alloc(5); 171 | ffm_vector_set_all(v, 2); 172 | 173 | g_assert_cmpfloat(10, ==, ffm_vector_sum(v)); 174 | } 175 | 176 | void test_Cs_daxpy(void) { 177 | // init X 178 | int m = 5; 179 | int n = 2; 180 | cs *X = cs_spalloc(m, n, m * n, 1, 1); /* create triplet identity matrix */ 181 | cs_entry(X, 0, 0, 6); 182 | cs_entry(X, 0, 1, 1); 183 | cs_entry(X, 1, 0, 2); 184 | cs_entry(X, 1, 1, 3); 185 | cs_entry(X, 2, 0, 3); 186 | cs_entry(X, 3, 0, 6); 187 | cs_entry(X, 3, 1, 1); 188 | cs_entry(X, 4, 0, 4); 189 | cs_entry(X, 4, 1, 5); 190 | // printf ("X:\n") ; cs_print (X, 0) ; /* print A */ 191 | cs *X_csc = cs_compress(X); /* A = compressed-column form of T */ 192 | 193 | ffm_vector *v = ffm_vector_alloc(5); 194 | ffm_vector_set_all(v, 10); 195 | ffm_vector_set(v, 4, 0); 196 | 197 | ffm_vector *res = ffm_vector_calloc(5); 198 | 199 | // test multiplying second column 200 | Cs_daxpy(X_csc, 1, .5, v->data, res->data); 201 | g_assert_cmpfloat(5, ==, ffm_vector_get(res, 0)); 202 | g_assert_cmpfloat(0, ==, ffm_vector_get(res, 4)); 203 | 204 | // test multiplying first column 205 | ffm_vector_set_all(res, 0); 206 | Cs_daxpy(X_csc, 0, 3, v->data, res->data); 207 | g_assert_cmpfloat(180, ==, ffm_vector_get(res, 0)); 208 | g_assert_cmpfloat(0, ==, ffm_vector_get(res, 4)); 209 | } 210 | 211 | void test_Cs_row_gaxpy(void) { 212 | // init X 213 | int m = 5; 214 | int n = 2; 215 | cs *X = cs_spalloc(m, n, m * n, 1, 1); /* create triplet identity matrix */ 216 | cs_entry(X, 0, 0, 6); 217 | cs_entry(X, 0, 1, 1); 218 | cs_entry(X, 1, 0, 2); 219 | cs_entry(X, 1, 1, 3); 220 | cs_entry(X, 2, 0, 3); 221 | cs_entry(X, 3, 0, 6); 222 | cs_entry(X, 3, 1, 1); 223 | cs_entry(X, 4, 0, 4); 224 | cs_entry(X, 4, 1, 5); 225 | // printf ("X:\n") ; cs_print (X, 0) ; /* print A */ 226 | cs *X_csc = cs_compress(X); /* A = compressed-column form of T */ 227 | cs *X_csr = cs_transpose(X_csc, 1); 228 | 229 | ffm_vector *v = ffm_vector_alloc(2); 230 | ffm_vector_set(v, 0, 2); 231 | ffm_vector_set(v, 1, 3); 232 | 233 | ffm_vector *res = ffm_vector_calloc(5); 234 | ffm_vector_set(res, 1, 3); 235 | 236 | ffm_vector *res_row = ffm_vector_calloc(5); 237 | ffm_vector_set(res_row, 1, 3); 238 | 239 | Cs_row_gaxpy(X_csr, v->data, res->data); 240 | cs_gaxpy(X_csc, v->data, res_row->data); 241 | 242 | g_assert_cmpfloat(ffm_vector_get(res_row, 0), ==, ffm_vector_get(res, 0)); 243 | g_assert_cmpfloat(ffm_vector_get(res_row, 1), ==, ffm_vector_get(res, 1)); 244 | 245 | cs_spfree(X_csc); 246 | cs_spfree(X_csr); 247 | } 248 | 249 | void test_Cs_scal_apy(void) { 250 | // init X 251 | int m = 5; 252 | int n = 2; 253 | cs *X = cs_spalloc(m, n, m * n, 1, 1); /* create triplet identity matrix */ 254 | cs_entry(X, 0, 0, 6); 255 | cs_entry(X, 0, 1, 1); 256 | cs_entry(X, 1, 0, 2); 257 | cs_entry(X, 1, 1, 3); 258 | cs_entry(X, 2, 0, 3); 259 | cs_entry(X, 3, 0, 6); 260 | cs_entry(X, 3, 1, 1); 261 | cs_entry(X, 4, 0, 4); 262 | cs_entry(X, 4, 1, 5); 263 | // printf ("X:\n") ; cs_print (X, 0) ; /* print A */ 264 | cs *X_csc = cs_compress(X); /* A = compressed-column form of T */ 265 | 266 | ffm_vector *res = ffm_vector_alloc(5); 267 | ffm_vector_set_all(res, 1); 268 | 269 | // test multiplying second column 270 | Cs_scal_apy(X_csc, 0, 2.0, res->data); 271 | g_assert_cmpfloat(13, ==, ffm_vector_get(res, 0)); 272 | g_assert_cmpfloat(9, ==, ffm_vector_get(res, 4)); 273 | } 274 | 275 | void test_Cs_scal_a2py(void) { 276 | // init X 277 | int m = 5; 278 | int n = 2; 279 | cs *X = cs_spalloc(m, n, m * n, 1, 1); /* create triplet identity matrix */ 280 | cs_entry(X, 0, 0, 6); 281 | cs_entry(X, 0, 1, 1); 282 | cs_entry(X, 1, 0, 2); 283 | cs_entry(X, 1, 1, 3); 284 | cs_entry(X, 2, 0, 3); 285 | cs_entry(X, 3, 0, 6); 286 | cs_entry(X, 3, 1, 1); 287 | cs_entry(X, 4, 0, 4); 288 | cs_entry(X, 4, 1, 5); 289 | // printf ("X:\n") ; cs_print (X, 0) ; /* print A */ 290 | cs *X_csc = cs_compress(X); /* A = compressed-column form of T */ 291 | 292 | ffm_vector *res = ffm_vector_alloc(5); 293 | ffm_vector_set_all(res, 1); 294 | 295 | // test multiplying second column 296 | Cs_scal_a2py(X_csc, 0, 2.0, res->data); 297 | g_assert_cmpfloat(73, ==, ffm_vector_get(res, 0)); 298 | g_assert_cmpfloat(33, ==, ffm_vector_get(res, 4)); 299 | } 300 | 301 | void test_Cs_col_norm(void) { 302 | // init X 303 | int m = 5; 304 | int n = 2; 305 | cs *X = cs_spalloc(m, n, m * n, 1, 1); /* create triplet identity matrix */ 306 | cs_entry(X, 0, 0, 6); 307 | cs_entry(X, 0, 1, 1); 308 | cs_entry(X, 1, 0, 2); 309 | cs_entry(X, 1, 1, 3); 310 | cs_entry(X, 2, 0, 3); 311 | cs_entry(X, 3, 0, 6); 312 | cs_entry(X, 3, 1, 1); 313 | cs_entry(X, 4, 0, 4); 314 | cs_entry(X, 4, 1, 5); 315 | // printf ("X:\n") ; cs_print (X, 0) ; /* print A */ 316 | cs *X_csc = cs_compress(X); /* A = compressed-column form of T */ 317 | 318 | ffm_vector *res = ffm_vector_calloc(n); 319 | 320 | // test multiplying second column 321 | Cs_col_norm(X_csc, res); 322 | g_assert_cmpfloat(101, ==, ffm_vector_get(res, 0)); 323 | g_assert_cmpfloat(36, ==, ffm_vector_get(res, 1)); 324 | } 325 | 326 | void test_Cs_ddot(void) { 327 | // init X 328 | int m = 5; 329 | int n = 2; 330 | cs *X = cs_spalloc(m, n, m * n, 1, 1); /* create triplet identity matrix */ 331 | cs_entry(X, 0, 0, 6); 332 | cs_entry(X, 0, 1, 1); 333 | cs_entry(X, 1, 0, 2); 334 | cs_entry(X, 1, 1, 3); 335 | cs_entry(X, 2, 0, 3); 336 | cs_entry(X, 3, 0, 6); 337 | cs_entry(X, 3, 1, 1); 338 | cs_entry(X, 4, 0, 4); 339 | cs_entry(X, 4, 1, 5); 340 | // printf ("X:\n") ; cs_print (X, 0) ; /* print A */ 341 | cs *X_csc = cs_compress(X); /* A = compressed-column form of T */ 342 | 343 | ffm_vector *y = ffm_vector_calloc(m); 344 | ffm_vector_set_all(y, 2); 345 | 346 | g_assert_cmpfloat(42, ==, Cs_ddot(X_csc, 0, y->data)); 347 | g_assert_cmpfloat(20, ==, Cs_ddot(X_csc, 1, y->data)); 348 | } 349 | 350 | void test_Cs_write(void) { 351 | // init X 352 | int m = 5; 353 | int n = 2; 354 | cs *X = cs_spalloc(m, n, m * n, 1, 1); /* create triplet identity matrix */ 355 | cs_entry(X, 0, 0, 6); 356 | cs_entry(X, 0, 1, 1); 357 | cs_entry(X, 1, 0, 2.22); 358 | cs_entry(X, 1, 1, 3); 359 | cs_entry(X, 2, 0, 3.333); 360 | cs_entry(X, 3, 0, 6); 361 | cs_entry(X, 3, 1, 1); 362 | cs_entry(X, 4, 0, 4.4444); 363 | cs_entry(X, 4, 1, 5); 364 | // printf ("X:\n") ; cs_print (X, 0) ; /* print A */ 365 | 366 | FILE *f = fopen("data/x_dummy.txt", "r"); 367 | Cs_write(f, X); 368 | fclose(f); 369 | 370 | f = fopen("data/x_dummy.txt", "r"); 371 | cs *X_from_file = cs_load(f); 372 | fclose(f); 373 | 374 | // test that matrix is correct 375 | for (int j = 0; j < X->nz; j++) 376 | g_assert_cmpfloat(X_from_file->x[j], ==, X->x[j]); 377 | } 378 | 379 | void test_read_ffm_matrix_from_file(void) { 380 | ffm_matrix *X = ffm_matrix_from_file("data/matrix"); 381 | assert(ffm_matrix_get(X, 0, 0) == 1); 382 | assert(ffm_matrix_get(X, 1, 0) == 2); 383 | assert(ffm_matrix_get(X, 2, 0) == 4); 384 | 385 | assert(ffm_matrix_get(X, 0, 1) == 2); 386 | assert(ffm_matrix_get(X, 1, 1) == 3); 387 | assert(ffm_matrix_get(X, 2, 1) == 5); 388 | assert(X->size0 == 3); 389 | assert(X->size1 == 2); 390 | } 391 | 392 | void test_read_svm_light_file(void) { 393 | int m = 5; 394 | int n = 2; 395 | cs *X = cs_spalloc(m, n, m * n, 1, 1); /* create triplet identity matrix */ 396 | cs_entry(X, 0, 0, 6); 397 | cs_entry(X, 0, 1, 1); 398 | cs_entry(X, 1, 0, 2); 399 | cs_entry(X, 1, 1, 3); 400 | //cs_entry(X, 2, 0, 3); 401 | cs_entry(X, 3, 0, 6); 402 | cs_entry(X, 3, 1, 1); 403 | cs_entry(X, 4, 0, 4); 404 | cs_entry(X, 4, 1, 5); 405 | // printf ("X:\n") ; cs_print (X, 0) ; /* print A */ 406 | cs *X_csc = cs_compress(X); /* A = compressed-column form of T */ 407 | cs_spfree(X); 408 | 409 | fm_data data = read_svm_light_file("data/svm_light_dummy"); 410 | ffm_vector *y = data.y; 411 | 412 | g_assert_cmpfloat(298.0, ==, ffm_vector_get(y, 0)); 413 | g_assert_cmpfloat(266.0, ==, ffm_vector_get(y, 1)); 414 | g_assert_cmpfloat(29.0, ==, ffm_vector_get(y, 2)); 415 | g_assert_cmpfloat(298.0, ==, ffm_vector_get(y, 3)); 416 | g_assert_cmpfloat(848.0, ==, ffm_vector_get(y, 4)); 417 | 418 | int size = sizeof(double) * X_csc->nzmax; 419 | assert(!memcmp(X_csc->x, data.X->x, size)); 420 | } 421 | 422 | void test_read_svm_light_file_without_target(void) { 423 | int m = 5; 424 | int n = 2; 425 | cs *X = cs_spalloc(m, n, m * n, 1, 1); /* create triplet identity matrix */ 426 | cs_entry(X, 0, 0, 6); 427 | cs_entry(X, 0, 1, 1); 428 | cs_entry(X, 1, 0, 2); 429 | cs_entry(X, 1, 1, 3); 430 | cs_entry(X, 2, 0, 3); 431 | //cs_entry(X, 3, 0, 6); 432 | //cs_entry(X, 3, 1, 1); 433 | cs_entry(X, 4, 0, 4); 434 | cs_entry(X, 4, 1, 5); 435 | // printf ("X:\n") ; cs_print (X, 0) ; /* print A */ 436 | cs *X_csc = cs_compress(X); /* A = compressed-column form of T */ 437 | cs_spfree(X); 438 | 439 | fm_data data = read_svm_light_file("data/svm_light_dummy_witout_target"); 440 | 441 | // check if dummy zero target exists 442 | for (int i = 0; i < data.y->size; i++) assert(data.y->data[i] == 0); 443 | 444 | int size = sizeof(double) * X_csc->nzmax; 445 | assert(!memcmp(X_csc->x, data.X->x, size)); 446 | } 447 | 448 | void test_ffm_vector_mean_square_error(void) { 449 | int size = 10; 450 | ffm_vector *a = ffm_vector_alloc(size); 451 | for (int i = 0; i < size; i++) a->data[i] = i; 452 | ffm_vector *b = ffm_vector_alloc(size); 453 | ffm_vector_memcpy(b, a); 454 | g_assert_cmpfloat(ffm_vector_mean_squared_error(a, b), ==, 0); 455 | ffm_vector_free_all(a, b); 456 | } 457 | 458 | void test_ffm_vector_functions(void) { 459 | int size = 5; 460 | ffm_vector *a = ffm_vector_alloc(size); 461 | ffm_vector *b = ffm_vector_alloc(size); 462 | 463 | ffm_vector_set_all(a, 1.0); 464 | ffm_vector_scale(a, 2.0); 465 | ffm_vector_memcpy(b, a); 466 | for (int i = 0; i < size; i++) g_assert_cmpfloat(a->data[i], ==, b->data[i]); 467 | 468 | ffm_vector_add(a, b); 469 | for (int i = 0; i < size; i++) 470 | g_assert_cmpfloat(a->data[i], ==, 2.0 * b->data[i]); 471 | ffm_vector_sub(a, b); 472 | for (int i = 0; i < size; i++) g_assert_cmpfloat(a->data[i], ==, b->data[i]); 473 | 474 | ffm_vector_set_all(b, 2.0); 475 | ffm_vector_mul(a, b); 476 | for (int i = 0; i < size; i++) 477 | g_assert_cmpfloat(a->data[i], ==, 2.0 * b->data[i]); 478 | ffm_vector_free_all(a, b); 479 | } 480 | 481 | void test_ffm_matrix_functions(void) { 482 | ffm_matrix *X = ffm_matrix_calloc(3, 4); 483 | for (int i = 0; i < X->size0 * X->size1; i++) X->data[i] = i; 484 | g_assert_cmpfloat(ffm_matrix_get(X, 0, 2), ==, 2); 485 | g_assert_cmpfloat(ffm_matrix_get(X, 2, 2), ==, 10); 486 | 487 | ffm_matrix *Y = ffm_matrix_calloc(3, 4); 488 | double count = 0; 489 | for (int i = 0; i < Y->size0; i++) 490 | for (int j = 0; j < Y->size1; j++) { 491 | ffm_matrix_set(Y, i, j, count); 492 | count++; 493 | } 494 | 495 | for (int i = 0; i < Y->size0; i++) 496 | for (int j = 0; j < Y->size1; j++) 497 | g_assert_cmpfloat(ffm_matrix_get(Y, i, j), ==, ffm_matrix_get(X, i, j)); 498 | 499 | g_assert_cmpfloat(*ffm_matrix_get_row_ptr(X, 0), ==, 0); 500 | g_assert_cmpfloat(*ffm_matrix_get_row_ptr(X, 1), ==, 4); 501 | g_assert_cmpfloat(*ffm_matrix_get_row_ptr(X, 2), ==, 8); 502 | 503 | ffm_matrix_set(X, 1, 1, 3.21); 504 | g_assert_cmpfloat(ffm_matrix_get(X, 1, 1), ==, 3.21); 505 | ffm_matrix_set(X, 2, 3, 43.21); 506 | g_assert_cmpfloat(ffm_matrix_get(X, 2, 3), ==, 43.21); 507 | } 508 | 509 | void test_ffm_blas(void) { 510 | int size = 6; 511 | ffm_vector *a = ffm_vector_calloc(size); 512 | ffm_vector *b = ffm_vector_calloc(size); 513 | 514 | ffm_vector_set_all(a, 1.5); 515 | ffm_vector_set_all(b, 2.0); 516 | 517 | g_assert_cmpfloat(ffm_blas_ddot(a, b), ==, 18); 518 | ffm_blas_daxpy(2.0, a, b); 519 | for (int i = 0; i < size; i++) g_assert_cmpfloat(b->data[i], ==, 5); 520 | 521 | ffm_vector_set_all(a, 1.0); 522 | g_assert_cmpfloat(ffm_blas_dnrm2(a), ==, sqrt(6.0)); 523 | } 524 | 525 | int main(int argc, char **argv) { 526 | g_test_init(&argc, &argv, NULL); 527 | g_test_add_func("/utils/ffm_vector/ mean", test_ffm_vector_mean); 528 | g_test_add_func("/utils/ffm_vector/ var", test_ffm_vector_variance); 529 | g_test_add_func("/utils/ffm/ normal cdf", test_ffm_normal_cdf); 530 | g_test_add_func("/utils/ffm/ normal pdf", test_ffm_normal_pdf); 531 | g_test_add_func("/utils/ffm_matrix/ operations", test_ffm_matrix_functions); 532 | g_test_add_func("/utils/ffm_vector/ blas", test_ffm_blas); 533 | g_test_add_func("/utils/ffm_vector/ sort", test_ffm_vector_sort); 534 | g_test_add_func("/utils/ffm_vector/ median", test_ffm_vector_median); 535 | g_test_add_func("/utils/ffm_vector/ make labels", 536 | test_ffm_vector_make_labels); 537 | g_test_add_func("/utils/ffm_vector/ get order", test_ffm_vector_get_order); 538 | g_test_add_func("/utils/ffm_vector/ rank comparison", 539 | test_ffm_vector_to_rank_comparision); 540 | g_test_add_func("/utils/ffm_vector/ operations", test_ffm_vector_functions); 541 | g_test_add_func("/utils/vector sum", test_ffm_vector_sum); 542 | g_test_add_func("/utils/ffm_vector/ mean_square_error", 543 | test_ffm_vector_mean_square_error); 544 | g_test_add_func("/utils/ffm_vector/ accuracy", test_ffm_vector_accuracy); 545 | g_test_add_func("/utils/ffm_vector/ average precision at cutoff", 546 | test_ffm_average_precision_at_cutoff); 547 | g_test_add_func("/utils/ffm_vector/ kendall tau", 548 | test_ffm_vector_kendall_tau); 549 | g_test_add_func("/utils/ffm_vector/ update mean", 550 | test_ffm_vector_update_mean); 551 | g_test_add_func("/utils/ffm_sigmoid ", test_ffm_sigmoid); 552 | g_test_add_func("/utils/cs daxpy", test_Cs_daxpy); 553 | g_test_add_func("/utils/cs gaxpy row", test_Cs_row_gaxpy); 554 | g_test_add_func("/utils/cs scal_apy", test_Cs_scal_apy); 555 | g_test_add_func("/utils/cs scal_a2py", test_Cs_scal_a2py); 556 | g_test_add_func("/utils/cs col_norm", test_Cs_col_norm); 557 | g_test_add_func("/utils/cs ddot", test_Cs_ddot); 558 | g_test_add_func("/utils/ffm_r2_score", test_ffm_r2_score); 559 | g_test_add_func("/utils/Cs_write", test_Cs_write); 560 | g_test_add_func("/utils/read ffm_matrix_from_file", 561 | test_read_ffm_matrix_from_file); 562 | g_test_add_func("/utils/read svm_light file", test_read_svm_light_file); 563 | g_test_add_func("/utils/read svm_light file w.o. target", 564 | test_read_svm_light_file_without_target); 565 | 566 | return g_test_run(); 567 | } 568 | -------------------------------------------------------------------------------- /src/ffm_utils.c: -------------------------------------------------------------------------------- 1 | // Author: Immanuel Bayer 2 | // License: BSD 3 clause 3 | 4 | #include "fast_fm.h" 5 | #include "kvec.h" 6 | #include 7 | 8 | // ########################### ffm scalar ################################### 9 | double ffm_sigmoid(double x) { 10 | if (fabs(x) > 36) return x > 0 ? 1 : 0; 11 | return 1.0 / (1.0 + exp(-x)); 12 | } 13 | 14 | double ffm_pow_2(double x) { return x * x; } 15 | 16 | double ffm_normal_pdf(double x) { return exp(-(x * x) / 2.0) / sqrt(M_PI * 2); } 17 | // source: Evaluating the Normal Distribution - Marsaglia 18 | double ffm_normal_cdf(double x) { 19 | if (x > 8) return 1; 20 | if (x < -8) return 0; 21 | long double s = x, t = 0, b = x, q = x * x, i = 1; 22 | while (s != t) s = (t = s) + (b *= q / (i += 2)); 23 | return .5 + s * exp(-.5 * q - .91893853320467274178L); 24 | } 25 | // ########################### ffm_vector ################################## 26 | 27 | // Algorithms for Computing the Sample Variance: Analysis and Recommendations 28 | // corrected two-pass algoritm (eq. 1.7) 29 | double ffm_vector_variance(ffm_vector *x) { 30 | double mean = ffm_vector_mean(x); 31 | double var = 0; 32 | double correction = 0; 33 | for (int j = 0; j < x->size; j++) { 34 | correction += x->data[j] - mean; 35 | var += (x->data[j] - mean) * (x->data[j] - mean); 36 | } 37 | var = var * (1.0 / (double)x->size); 38 | var -= (1.0 / (double)x->size) * (correction * correction); 39 | return var; 40 | } 41 | 42 | // Algorithms for Computing the Sample Variance: Analysis and Recommendations 43 | // equation 1.3a 44 | double ffm_vector_mean(ffm_vector *x) { 45 | double mean = x->data[0]; 46 | for (int j = 1; j < x->size; j++) 47 | mean = mean + (1.0 / (j + 1)) * (x->data[j] - mean); 48 | return mean; 49 | } 50 | 51 | void ffm_vector_normal_cdf(ffm_vector *x) { 52 | for (int i = 0; i < x->size; i++) x->data[i] = ffm_normal_cdf(x->data[i]); 53 | } 54 | void ffm_vector_update_mean(ffm_vector *mean, int index, ffm_vector *x) { 55 | assert(mean->size == x->size && "vectors have different length"); 56 | if (index == 0) { 57 | ffm_vector_memcpy(mean, x); 58 | return; 59 | } 60 | double weight = 1.0 / (index + 1.0); 61 | int N = mean->size; 62 | for (int i = 0; i < N; i++) 63 | mean->data[i] = mean->data[i] + weight * (x->data[i] - mean->data[i]); 64 | } 65 | 66 | double ffm_vector_kendall_tau(ffm_vector *a, ffm_vector *b) { 67 | assert(a->size == b->size && "vectors have different length"); 68 | double N = b->size; 69 | double n_concordant = 0; 70 | double n_disconcordant = 0; 71 | for (int i = 0; i < N; i++) 72 | for (int j = i + 1; j < N; j++) { 73 | // concordant 74 | if (ffm_vector_get(a, i) > ffm_vector_get(a, j) && 75 | ffm_vector_get(b, i) > ffm_vector_get(b, j)) 76 | n_concordant++; 77 | if (ffm_vector_get(a, i) < ffm_vector_get(a, j) && 78 | ffm_vector_get(b, i) < ffm_vector_get(b, j)) 79 | n_concordant++; 80 | // discordant 81 | if (ffm_vector_get(a, i) > ffm_vector_get(a, j) && 82 | ffm_vector_get(b, i) < ffm_vector_get(b, j)) 83 | n_disconcordant++; 84 | if (ffm_vector_get(a, i) < ffm_vector_get(a, j) && 85 | ffm_vector_get(b, i) > ffm_vector_get(b, j)) 86 | n_disconcordant++; 87 | } 88 | return (n_concordant - n_disconcordant) / (.5 * N * (N - 1)); 89 | } 90 | 91 | ffm_vector *ffm_vector_get_order(ffm_vector *y) { 92 | int N = y->size; 93 | ffm_vector *indices = ffm_vector_calloc(N); 94 | ffm_vector *a = ffm_vector_calloc(N); 95 | ffm_vector_memcpy(a, y); 96 | for (int i = 0; i < N; i++) ffm_vector_set(indices, i, i); 97 | // use selection sort on tmp value array and indices array 98 | for (int i = 0; i < N; i++) { 99 | int min = i; 100 | for (int j = i + 1; j < N; j++) 101 | if (ffm_vector_get(a, j) < ffm_vector_get(a, min)) min = j; 102 | // exchange values 103 | double tmp_a = ffm_vector_get(a, i); 104 | ffm_vector_set(a, i, ffm_vector_get(a, min)); 105 | ffm_vector_set(a, min, tmp_a); 106 | // exchange indices array 107 | double tmp_int = ffm_vector_get(indices, i); 108 | ffm_vector_set(indices, i, ffm_vector_get(indices, min)); 109 | ffm_vector_set(indices, min, tmp_int); 110 | } 111 | ffm_vector_free(a); 112 | return indices; 113 | } 114 | 115 | ffm_matrix *ffm_vector_to_rank_comparision(ffm_vector *y) { 116 | int n_compares = 0; 117 | for (int i = 0; i < y->size; i++) 118 | for (int j = i + 1; j < y->size; j++) n_compares++; 119 | ffm_matrix *compars = ffm_matrix_calloc(n_compares, 2); 120 | int comp_row = 0; 121 | for (int i = 0; i < y->size; i++) 122 | for (int j = i + 1; j < y->size; j++) { 123 | if (ffm_vector_get(y, i) > ffm_vector_get(y, j)) { 124 | ffm_matrix_set(compars, comp_row, 0, i); 125 | ffm_matrix_set(compars, comp_row, 1, j); 126 | } else { 127 | ffm_matrix_set(compars, comp_row, 0, j); 128 | ffm_matrix_set(compars, comp_row, 1, i); 129 | } 130 | comp_row++; 131 | } 132 | return compars; 133 | } 134 | // if cutoff =-1 ignore cutoff 135 | double ffm_average_precision_at_cutoff(ffm_vector *y_true, ffm_vector *y_pred, 136 | int cutoff) { 137 | double score = 0; 138 | double num_hits = 0.0; 139 | if (cutoff == -1) cutoff = y_true->size; 140 | for (int i = 0; i < y_pred->size; i++) { 141 | if (i >= cutoff) break; 142 | double p = ffm_vector_get(y_pred, i); 143 | bool in_true = ffm_vector_contains(y_true, p, -1); 144 | bool already_found = ffm_vector_contains(y_pred, p, i); 145 | if (in_true && !already_found) { 146 | num_hits += 1.0; 147 | score += num_hits / (i + 1.0); 148 | } 149 | } 150 | double dev = y_true->size < cutoff ? y_true->size : cutoff; 151 | return score / dev; 152 | } 153 | 154 | // if cutoff =-1 ignore cutoff 155 | bool ffm_vector_contains(ffm_vector *y, double value, int cutoff) { 156 | int stop = y->size < cutoff ? y->size : cutoff; 157 | if (cutoff == -1) stop = y->size; 158 | for (int i = 0; i < stop; i++) 159 | if (ffm_vector_get(y, i) == value) return true; 160 | return false; 161 | } 162 | 163 | double ffm_vector_accuracy(ffm_vector *y_true, ffm_vector *y_pred) { 164 | assert(y_true->size == y_pred->size && "vectors have different length"); 165 | double acc = 0; 166 | for (int i = 0; i < y_true->size; i++) { 167 | if (ffm_vector_get(y_true, i) >= .0 && ffm_vector_get(y_pred, i) >= 0.5) 168 | acc++; 169 | else if (ffm_vector_get(y_true, i) < .0 && ffm_vector_get(y_pred, i) < 0.5) 170 | acc++; 171 | } 172 | if (acc == 0) return 0; 173 | return acc / (double)(y_true->size); 174 | } 175 | double ffm_vector_median(ffm_vector *y) { 176 | ffm_vector *cp = ffm_vector_alloc(y->size); 177 | ffm_vector_memcpy(cp, y); 178 | ffm_vector_sort(cp); 179 | double median = NAN; 180 | 181 | if (y->size % 2 == 0) 182 | median = (ffm_vector_get(cp, y->size / 2) + 183 | ffm_vector_get(cp, (y->size / 2) - 1)) / 184 | 2.0; 185 | else 186 | median = ffm_vector_get(cp, y->size / 2); 187 | ffm_vector_free(cp); 188 | return median; 189 | } 190 | void ffm_vector_make_labels(ffm_vector *y) { 191 | double median = ffm_vector_median(y); 192 | for (int i = 0; i < y->size; i++) 193 | if (ffm_vector_get(y, i) > median) 194 | ffm_vector_set(y, i, 1); 195 | else 196 | ffm_vector_set(y, i, -1); 197 | } 198 | int __cmpfunc_for_ffm_vector_sort(const void *a, const void *b) { 199 | return (*(double *)a - *(double *)b); 200 | } 201 | void ffm_vector_sort(ffm_vector *y) { 202 | qsort(y->data, y->size, sizeof(double), __cmpfunc_for_ffm_vector_sort); 203 | } 204 | 205 | double ffm_vector_mean_squared_error(ffm_vector *a, ffm_vector *b) { 206 | assert(a->size == b->size && "vectors have different length"); 207 | double sum = 0; 208 | for (int i = 0; i < a->size; i++) { 209 | double tmp = (a->data[i] - b->data[i]); 210 | sum += tmp * tmp; 211 | } 212 | if (sum != 0) return sqrt(sum / a->size); 213 | return 0; 214 | } 215 | int ffm_vector_free(ffm_vector *a) { 216 | if (a->owner) { 217 | free(a->data); 218 | free(a); 219 | return 0; 220 | } else 221 | return 1; 222 | } 223 | ffm_vector *ffm_vector_alloc(int size) { 224 | assert(size > 0 && "can't allocate vector with size <= 0"); 225 | struct ffm_vector *x = malloc(sizeof *x); 226 | double *ptr; 227 | ptr = malloc(size * sizeof(double)); 228 | x->data = ptr; 229 | x->owner = 1; 230 | x->size = size; 231 | return x; 232 | } 233 | 234 | ffm_vector *ffm_vector_calloc(int size) { 235 | struct ffm_vector *x = malloc(sizeof *x); 236 | double *ptr; 237 | ptr = calloc(size, sizeof(double)); 238 | x->data = ptr; 239 | x->owner = 1; 240 | x->size = size; 241 | return x; 242 | } 243 | // copy values from b to a 244 | int ffm_vector_memcpy(ffm_vector *a, ffm_vector const *b) { 245 | assert(a->size == b->size && "vectors have different length"); 246 | memcpy(a->data, b->data, a->size * sizeof(double)); 247 | return 1; 248 | } 249 | // a = a +b 250 | int ffm_vector_add(ffm_vector *a, ffm_vector const *b) { 251 | assert(a->size == b->size && "vectors have different length"); 252 | for (int i = 0; i < a->size; i++) a->data[i] = a->data[i] + b->data[i]; 253 | return 1; 254 | } 255 | // a = a - b 256 | int ffm_vector_sub(ffm_vector *a, ffm_vector const *b) { 257 | assert(a->size == b->size && "vectors have different length"); 258 | for (int i = 0; i < a->size; i++) a->data[i] = a->data[i] - b->data[i]; 259 | return 1; 260 | } 261 | // a = a * alpha 262 | int ffm_vector_scale(ffm_vector *a, double b) { 263 | for (int i = 0; i < a->size; i++) a->data[i] = a->data[i] * b; 264 | return 1; 265 | } 266 | // a = a * b 267 | int ffm_vector_mul(ffm_vector *a, ffm_vector const *b) { 268 | assert(a->size == b->size && "vectors have different length"); 269 | for (int i = 0; i < a->size; i++) a->data[i] = a->data[i] * b->data[i]; 270 | return 1; 271 | } 272 | // a = alpha 273 | void ffm_vector_set_all(ffm_vector *a, double b) { 274 | for (int i = 0; i < a->size; i++) a->data[i] = b; 275 | } 276 | double ffm_vector_sum(ffm_vector *a) { 277 | double tmp = 0; 278 | for (int i = 0; i < a->size; i++) tmp += a->data[i]; 279 | return tmp; 280 | } 281 | void ffm_vector_set(ffm_vector *a, int i, double alpha) { a->data[i] = alpha; } 282 | double ffm_vector_get(ffm_vector *a, int i) { return a->data[i]; } 283 | void ffm_vector_add_constant(ffm_vector *a, double alpha) { 284 | for (int i = 0; i < a->size; i++) a->data[i] = a->data[i] + alpha; 285 | } 286 | 287 | void ffm_vector_printf(ffm_vector *a) { 288 | for (int i = 0; i < a->size; i++) printf("%f, ", a->data[i]); 289 | printf("\n"); 290 | } 291 | 292 | // ########################### ffm_matrix ################################## 293 | void ffm_matrix_printf(ffm_matrix *X) { 294 | for (int i = 0; i < X->size0; i++) { 295 | for (int j = 0; j < X->size1; j++) 296 | printf("%f, ", X->data[i * X->size1 + j]); 297 | printf("\n"); 298 | } 299 | } 300 | int ffm_matrix_free(ffm_matrix *a) { 301 | if (a->owner) free(a->data); 302 | return 1; 303 | } 304 | ffm_matrix *ffm_matrix_alloc(int size0, int size1) { 305 | struct ffm_matrix *x = malloc(sizeof *x); 306 | double *ptr; 307 | ptr = malloc(size0 * size1 * sizeof(double)); 308 | x->data = ptr; 309 | x->owner = 1; 310 | x->size0 = size0; 311 | x->size1 = size1; 312 | return x; 313 | } 314 | 315 | ffm_matrix *ffm_matrix_calloc(int size0, int size1) { 316 | assert(size0 > 0 && "can't allocate matrix with size0 <= 0"); 317 | assert(size1 > 0 && "can't allocate matrix with size1 <= 0"); 318 | struct ffm_matrix *x = malloc(sizeof *x); 319 | double *ptr; 320 | ptr = calloc(size0 * size1, sizeof(double)); 321 | x->data = ptr; 322 | x->owner = 1; 323 | x->size0 = size0; 324 | x->size1 = size1; 325 | return x; 326 | } 327 | double *ffm_matrix_get_row_ptr(ffm_matrix *X, int i) { 328 | return X->data + i * X->size1; 329 | } 330 | void ffm_matrix_set(ffm_matrix *X, int i, int j, double a) { 331 | assert(i < X->size0 && "index out of range"); 332 | assert(j < X->size1 && "index out of range"); 333 | X->data[i * X->size1 + j] = a; 334 | } 335 | double ffm_matrix_get(ffm_matrix *X, int i, int j) { 336 | assert(i < X->size0 && "index out of range"); 337 | assert(j < X->size1 && "index out of range"); 338 | return X->data[i * X->size1 + j]; 339 | } 340 | // ########################### cblas helper ################################# 341 | double ffm_blas_ddot(ffm_vector *x, ffm_vector const *y) { 342 | assert(x->size == y->size && "vectors have different length"); 343 | return cblas_ddot(x->size, x->data, 1, y->data, 1); 344 | } 345 | void ffm_blas_daxpy(double alpha, ffm_vector *x, ffm_vector const *y) { 346 | assert(x->size == y->size && "vectors have different length"); 347 | return cblas_daxpy(x->size, alpha, x->data, 1, y->data, 1); 348 | } 349 | double ffm_blas_dnrm2(ffm_vector *x) { 350 | return cblas_dnrm2(x->size, x->data, 1); 351 | } 352 | 353 | ffm_matrix *ffm_matrix_from_file(char *path) { 354 | assert(access(path, F_OK) != -1 && "file doesn't exist"); 355 | FILE *fp = fopen(path, "r"); 356 | 357 | // get number of rows 358 | size_t len = 1; 359 | char *line = NULL; 360 | ssize_t read; 361 | unsigned long row_count = 0; 362 | while ((read = getline(&line, &len, fp)) != -1) row_count++; 363 | rewind(fp); 364 | 365 | // get number of columns 366 | unsigned long column_count = 1; 367 | char c = fgetc(fp); 368 | while (c != '\n') { 369 | if (c == ' ') column_count++; 370 | c = fgetc(fp); 371 | } 372 | rewind(fp); 373 | 374 | ffm_matrix *X = ffm_matrix_calloc(row_count, column_count); 375 | 376 | int current_row = 0; 377 | while ((read = getline(&line, &len, fp)) != -1) { 378 | char *end_str; 379 | char *token = strtok_r(line, " ", &end_str); 380 | int current_col = 0; 381 | // loop over features in current row 382 | while (token != NULL) { 383 | ffm_matrix_set(X, current_row, current_col, atof(token)); 384 | current_col++; 385 | token = strtok_r(NULL, " ", &end_str); 386 | } 387 | current_row++; 388 | } 389 | return X; 390 | } 391 | // ######## fm helper ############ 392 | 393 | fm_data read_svm_light_file(char *path) { 394 | assert(access(path, F_OK) != -1 && "file doesn't exist"); 395 | FILE *fp = fopen(path, "r"); 396 | 397 | cs *T = cs_spalloc(0, 0, 1, 1, 1); /* allocate result */ 398 | 399 | char *line = NULL; 400 | size_t len = 1; 401 | ssize_t read; 402 | int line_nr = 0; 403 | 404 | // check if file contains target 405 | bool hasTarget = true; 406 | char c = fgetc(fp); 407 | while (c != ' ') { 408 | if (c == ':') hasTarget = false; 409 | c = fgetc(fp); 410 | } 411 | rewind(fp); 412 | 413 | /* We create a new array to store double values. 414 | We don't want it zero-terminated or cleared to 0's. */ 415 | double target; 416 | double dummy_target = 0; 417 | 418 | kvec_t(double)array; 419 | kv_init(array); 420 | 421 | // read svm_light file line by line 422 | while ((read = getline(&line, &len, fp)) != -1) { 423 | char *end_str; 424 | char *token = strtok_r(line, " ", &end_str); 425 | //printf("linr nr: %i \n", line_nr); 426 | 427 | if (hasTarget) { 428 | target = atof(token); 429 | kv_push(double, array, target); // append 430 | } else { 431 | kv_push(double, array, dummy_target); // append 432 | } 433 | 434 | if (hasTarget) token = strtok_r(NULL, " ", &end_str); 435 | 436 | // loop over features in current row 437 | while (token != NULL) { 438 | char *end_token; 439 | char *token2 = strtok_r(token, ":", &end_token); 440 | double col_nr = atoi(token2); 441 | 442 | token2 = strtok_r(NULL, ":", &end_token); 443 | if (token2 != NULL) { 444 | double value = atof(token2); 445 | assert(cs_entry(T, (int)line_nr, (int)col_nr, value) && 446 | "cs_entry failed, out of memory?"); 447 | } 448 | token = strtok_r(NULL, " ", &end_str); 449 | } 450 | line_nr++; 451 | } 452 | 453 | ffm_vector *y = ffm_vector_alloc(line_nr); 454 | 455 | // copy from kvec to ffm_vector 456 | for (int k = 0; k < line_nr; k++) 457 | ffm_vector_set(y, k, kv_a(double, array, k)); 458 | 459 | kv_destroy(array); 460 | cs *X = cs_compress(T); 461 | cs_spfree(T); 462 | 463 | return (fm_data){.y = y, .X = X}; 464 | } 465 | 466 | int Cs_write(FILE *f, const cs *A) { 467 | if (!CS_TRIPLET(A)) return (0); /* check inputs */ 468 | 469 | int p, nz, *Ap, *Ai; 470 | double *Ax; 471 | Ap = A->p; 472 | Ai = A->i; 473 | Ax = A->x; 474 | nz = A->nz; 475 | 476 | for (p = 0; p < nz; p++) { 477 | fprintf(f, "%g %g %g\n", (double)(Ai[p]), (double)(Ap[p]), Ax ? Ax[p] : 1); 478 | } 479 | return (1); 480 | } 481 | 482 | ffm_coef *alloc_fm_coef(int n_features, int k, int ignore_w) { 483 | struct ffm_coef *coef = malloc(sizeof *coef); 484 | 485 | if (ignore_w) 486 | coef->w = NULL; 487 | else 488 | coef->w = ffm_vector_calloc(n_features); 489 | 490 | if (k > 0) { 491 | coef->V = ffm_matrix_calloc(k, n_features); 492 | coef->mu_V = ffm_vector_calloc(k); 493 | coef->lambda_V = ffm_vector_calloc(k); 494 | } else { 495 | coef->V = NULL; 496 | coef->mu_V = NULL; 497 | coef->lambda_V = NULL; 498 | } 499 | 500 | coef->alpha = 0; 501 | coef->mu_w = 0; 502 | coef->w_0 = 0; 503 | coef->lambda_w = 0; 504 | return coef; 505 | } 506 | 507 | void free_ffm_coef(ffm_coef *coef) { 508 | if (coef->w) { 509 | ffm_vector_free(coef->w); 510 | coef->w = NULL; 511 | } 512 | if (coef->mu_V) { 513 | ffm_vector_free(coef->mu_V); 514 | coef->mu_V = NULL; 515 | } 516 | if (coef->lambda_V) { 517 | ffm_vector_free(coef->lambda_V); 518 | coef->lambda_V = NULL; 519 | } 520 | if (!coef->V) return; 521 | ffm_matrix_free(coef->V); 522 | coef->V = NULL; 523 | } 524 | 525 | void init_ffm_coef(ffm_coef *coef, ffm_param param) { 526 | int k = coef->V ? coef->V->size0 : 0; 527 | 528 | ffm_rng *rng = ffm_rng_seed(param.rng_seed); 529 | 530 | coef->w_0 = 0; 531 | 532 | if (!param.ignore_w) { 533 | double sum = 0; 534 | for (int i = 0; i < coef->w->size; i++) { 535 | double tmp = ffm_rand_normal(rng, 0, param.init_sigma); 536 | ffm_vector_set(coef->w, i, tmp); 537 | sum += tmp; 538 | } 539 | coef->mu_w = sum / (coef->w->size); 540 | } 541 | // init V 542 | if (k > 0) { 543 | for (int i = 0; i < coef->V->size0; i++) { 544 | double sum = 0; 545 | for (int j = 0; j < coef->V->size1; j++) { 546 | double tmp = ffm_rand_normal(rng, 0, param.init_sigma); 547 | ffm_matrix_set(coef->V, i, j, tmp); 548 | sum += tmp; 549 | } 550 | ffm_vector_set(coef->mu_V, i, sum / coef->V->size1); 551 | } 552 | ffm_vector_set_all(coef->lambda_V, param.init_lambda_V); 553 | } 554 | coef->lambda_w = param.init_lambda_w; 555 | // use default hyperparameter settings if not set 556 | if (param.SOLVER == SOLVER_MCMC) { 557 | if (coef->lambda_w == 0) coef->lambda_w = 1; 558 | } 559 | 560 | ffm_rng_free(rng); 561 | } 562 | 563 | void free_fm_data(fm_data *data) { 564 | ffm_vector_free(data->y); 565 | cs_spfree(data->X); 566 | } 567 | 568 | double ffm_r2_score(ffm_vector *y_true, ffm_vector *y_pred) { 569 | double ss_tot = ffm_vector_variance(y_true); 570 | 571 | double n_samples = y_true->size; 572 | ss_tot *= n_samples; 573 | double ss_res = 0; 574 | for (int i = 0; i < y_pred->size; i++) 575 | ss_res += ffm_pow_2(ffm_vector_get(y_true, i) - ffm_vector_get(y_pred, i)); 576 | 577 | return 1.0 - (ss_res / ss_tot); 578 | } 579 | 580 | /* y = alpha*A[:,j]*x+y */ 581 | // A sparse, x, y dense 582 | // modification of cs_gaxpy 583 | int Cs_daxpy(const cs *A, int col_index, double alpha, const double *x, 584 | double *y) { 585 | int p, j, *Ap, *Ai; 586 | double *Ax; 587 | if (!CS_CSC(A) || !x || !y) return (0); /* check inputs */ 588 | Ap = A->p; 589 | Ai = A->i; 590 | Ax = A->x; 591 | j = col_index; 592 | // for (j = 0 ; j < n ; j++) 593 | //{ 594 | for (p = Ap[j]; p < Ap[j + 1]; p++) { 595 | y[Ai[p]] += Ax[p] * x[Ai[p]] * alpha; 596 | } 597 | // } 598 | return (1); 599 | } 600 | 601 | // y = A*x+y 602 | // with A in RowMajor format. 603 | int Cs_row_gaxpy(const cs *A, const double *x, double *y) { 604 | CS_INT p, j, n, *Ap, *Ai; 605 | CS_ENTRY *Ax; 606 | // if (!CS_CSC (A) || !x || !y) return (0) ; /* check inputs */ 607 | n = A->n; 608 | Ap = A->p; 609 | Ai = A->i; 610 | Ax = A->x; 611 | for (j = 0; j < n; j++) { 612 | for (p = Ap[j]; p < Ap[j + 1]; p++) { 613 | y[j] += Ax[p] * x[Ai[p]]; 614 | } 615 | } 616 | return (1); 617 | } 618 | 619 | /* y = alpha*A[:,j]+y */ 620 | // A sparse, x, y dense 621 | // modification of cs_gaxpy 622 | int Cs_scal_apy(const cs *A, int col_index, double alpha, double *y) { 623 | int p, j, *Ap, *Ai; 624 | double *Ax; 625 | if (!CS_CSC(A) || !y) return (0); /* check inputs */ 626 | Ap = A->p; 627 | Ai = A->i; 628 | Ax = A->x; 629 | j = col_index; 630 | // for (j = 0 ; j < n ; j++) 631 | //{ 632 | for (p = Ap[j]; p < Ap[j + 1]; p++) { 633 | y[Ai[p]] += Ax[p] * alpha; 634 | } 635 | // } 636 | return (1); 637 | } 638 | 639 | /* y = */ 640 | // A sparse, y dense 641 | // modification of cs_gaxpy 642 | double Cs_ddot(const cs *A, int col_index, double *y) { 643 | int p, j, *Ap, *Ai; 644 | double *Ax; 645 | if (!CS_CSC(A) || !y) return (0); /* check inputs */ 646 | Ap = A->p; 647 | Ai = A->i; 648 | Ax = A->x; 649 | j = col_index; 650 | double sum = 0; 651 | // for (j = 0 ; j < n ; j++) 652 | //{ 653 | for (p = Ap[j]; p < Ap[j + 1]; p++) { 654 | sum += Ax[p] * y[Ai[p]]; 655 | } 656 | // } 657 | return sum; 658 | } 659 | 660 | /* y = alpha*A[:,j]^2+y */ 661 | // A sparse, x, y dense 662 | // modification of cs_gaxpy 663 | int Cs_scal_a2py(const cs *A, int col_index, double alpha, double *y) { 664 | int p, j, *Ap, *Ai; 665 | double *Ax; 666 | if (!CS_CSC(A) || !y) return (0); /* check inputs */ 667 | Ap = A->p; 668 | Ai = A->i; 669 | Ax = A->x; 670 | j = col_index; 671 | // for (j = 0 ; j < n ; j++) 672 | //{ 673 | for (p = Ap[j]; p < Ap[j + 1]; p++) { 674 | y[Ai[p]] += Ax[p] * Ax[p] * alpha; 675 | } 676 | // } 677 | return (1); 678 | } 679 | 680 | /* y = X^2.sum(axis=0) */ 681 | // A sparse, x, y dense 682 | // modification of cs_gaxpy 683 | int Cs_col_norm(const cs *A, ffm_vector *y) { 684 | int p, n, j, *Ap; // *Ai ; 685 | double *Ax; 686 | if (!CS_CSC(A) || !y) return (0); /* check inputs */ 687 | Ap = A->p; /* Ai = A->i ;*/ 688 | n = A->n, Ax = A->x; 689 | for (j = 0; j < n; j++) { 690 | double norm = 0; 691 | for (p = Ap[j]; p < Ap[j + 1]; p++) { 692 | norm += Ax[p] * Ax[p]; 693 | } 694 | ffm_vector_set(y, j, norm); 695 | } 696 | return (1); 697 | } 698 | -------------------------------------------------------------------------------- /src/tests/test_ffm_sgd.c: -------------------------------------------------------------------------------- 1 | #include "fast_fm.h" 2 | #include 3 | #include "TestFixtures.h" 4 | 5 | void test_sgd_predict_sample(TestFixture_T *pFixtureInput, gconstpointer pg) { 6 | int sample_row = 1; 7 | 8 | double y_pred = 9 | ffm_predict_sample(pFixtureInput->coef, pFixtureInput->X_t, sample_row); 10 | // only first order == 24 11 | g_assert_cmpfloat(y_pred, ==, 672); 12 | } 13 | 14 | void test_first_order_sgd(TestFixture_T *pFix, gconstpointer pg) { 15 | // int k = pFix->coef->V->size0; 16 | int k = 0; 17 | int n_features = pFix->X->n; 18 | int n_iter = 50; 19 | double init_sigma = .1; 20 | double step_size = .002; 21 | 22 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 23 | 24 | ffm_vector *y_pred = ffm_vector_calloc(5); 25 | ffm_param param = {.n_iter = n_iter * 100, 26 | .init_sigma = init_sigma, 27 | .stepsize = step_size, 28 | .SOLVER = SOLVER_SGD, 29 | .TASK = TASK_REGRESSION}; 30 | param.init_lambda_w = 0.5; 31 | ffm_fit_sgd(coef, pFix->X_t, pFix->y, ¶m); 32 | row_predict(coef, pFix->X_t, y_pred); 33 | 34 | g_assert_cmpfloat(ffm_r2_score(y_pred, pFix->y), >, .85); 35 | 36 | ffm_vector *y_pred_als = ffm_vector_calloc(5); 37 | ffm_coef *coef_als = alloc_fm_coef(n_features, k, false); 38 | ffm_param param_als = {.n_iter = 50, 39 | .init_sigma = 0.1, 40 | .SOLVER = SOLVER_ALS, 41 | .TASK = TASK_REGRESSION}; 42 | param_als.init_lambda_w = 3.5; 43 | sparse_fit(coef_als, pFix->X, pFix->X, pFix->y, y_pred_als, param_als); 44 | row_predict(coef_als, pFix->X_t, y_pred_als); 45 | 46 | // compare fit of als and sgd 47 | g_assert_cmpfloat(ffm_r2_score(y_pred, y_pred_als), >, .98); 48 | // compare coef of als and sgd 49 | g_assert_cmpfloat(ffm_r2_score(coef->w, coef_als->w), >, .98); 50 | 51 | ffm_vector_free_all(y_pred, y_pred_als); 52 | free_ffm_coef(coef); 53 | free_ffm_coef(coef_als); 54 | } 55 | 56 | void test_second_order_sgd(TestFixture_T *pFix, gconstpointer pg) { 57 | int n_features = pFix->X->n; 58 | int k = 3; 59 | int n_iter = 10; 60 | double init_sigma = .01; 61 | double step_size = .0002; 62 | 63 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 64 | 65 | ffm_vector *y_pred = ffm_vector_calloc(5); 66 | ffm_param param = {.n_iter = n_iter * 100, 67 | .init_sigma = init_sigma, 68 | .stepsize = step_size, 69 | .SOLVER = SOLVER_SGD, 70 | .TASK = TASK_REGRESSION}; 71 | param.init_lambda_w = 0.5; 72 | param.init_lambda_V = 50.5; 73 | ffm_fit_sgd(coef, pFix->X_t, pFix->y, ¶m); 74 | row_predict(coef, pFix->X_t, y_pred); 75 | 76 | g_assert_cmpfloat(ffm_r2_score(y_pred, pFix->y), >, .98); 77 | 78 | ffm_vector *y_pred_als = ffm_vector_calloc(5); 79 | ffm_coef *coef_als = alloc_fm_coef(n_features, k, false); 80 | 81 | ffm_param param_als = { 82 | .n_iter = 10, .init_sigma = 0.01, .SOLVER = SOLVER_ALS}; 83 | param_als.init_lambda_w = 3.5; 84 | param_als.init_lambda_V = 50.5; 85 | sparse_fit(coef_als, pFix->X, pFix->X, pFix->y, y_pred_als, param_als); 86 | sparse_predict(coef_als, pFix->X, y_pred_als); 87 | 88 | // compare fit of als and sgd 89 | g_assert_cmpfloat(ffm_r2_score(y_pred, y_pred_als), >, .98); 90 | 91 | ffm_vector_free_all(y_pred, y_pred_als); 92 | free_ffm_coef(coef); 93 | free_ffm_coef(coef_als); 94 | } 95 | 96 | void test_sgd_classification(TestFixture_T *pFix, gconstpointer pg) { 97 | int n_features = pFix->X->n; 98 | int k = 2; 99 | int n_iter = 10; 100 | double init_sigma = .01; 101 | double step_size = .0002; 102 | 103 | // map to classification problem 104 | ffm_vector_make_labels(pFix->y); 105 | 106 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 107 | 108 | ffm_vector *y_pred = ffm_vector_calloc(5); 109 | ffm_param param = {.n_iter = n_iter * 100, 110 | .init_sigma = init_sigma, 111 | .stepsize = step_size, 112 | .SOLVER = SOLVER_SGD, 113 | .TASK = TASK_CLASSIFICATION}; 114 | param.init_lambda_w = 0.5; 115 | param.init_lambda_V = 0.5; 116 | ffm_fit_sgd(coef, pFix->X_t, pFix->y, ¶m); 117 | row_predict(coef, pFix->X_t, y_pred); 118 | for (int i = 0; i < y_pred->size; i++) 119 | ffm_vector_set(y_pred, i, ffm_sigmoid(ffm_vector_get(y_pred, i))); 120 | 121 | g_assert_cmpfloat(ffm_vector_accuracy(pFix->y, y_pred), >=, .8); 122 | 123 | ffm_vector_free(y_pred); 124 | free_ffm_coef(coef); 125 | } 126 | 127 | void test_first_order_bpr(TestFixture_T *pFix, gconstpointer pg) { 128 | int n_features = pFix->X->n; 129 | int n_samples = pFix->X->m; 130 | int k = 0; 131 | int n_iter = 50; 132 | double init_sigma = .01; 133 | double step_size = .002; 134 | 135 | ffm_matrix *compares = ffm_vector_to_rank_comparision(pFix->y); 136 | ffm_vector *true_order = ffm_vector_get_order(pFix->y); 137 | 138 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 139 | for (int i = 0; i < 2; i++) coef->w->data[i] = 0.1; 140 | 141 | ffm_vector *y_pred = ffm_vector_calloc(n_samples); 142 | ffm_param param = {.n_iter = n_iter, 143 | .init_sigma = init_sigma, 144 | .stepsize = step_size, 145 | .SOLVER = SOLVER_SGD, 146 | .TASK = TASK_RANKING}; 147 | param.init_lambda_w = 0.0; 148 | ffm_fit_sgd_bpr(coef, pFix->X_t, compares, param); 149 | row_predict(coef, pFix->X_t, y_pred); 150 | ffm_vector *pred_order = ffm_vector_get_order(y_pred); 151 | double kendall_tau = ffm_vector_kendall_tau(true_order, pred_order); 152 | g_assert_cmpfloat(kendall_tau, ==, 1); 153 | 154 | ffm_vector_free_all(y_pred, true_order, pred_order); 155 | free_ffm_coef(coef); 156 | } 157 | 158 | void test_update_second_order_bpr(TestFixture_T *pFix, gconstpointer pg) { 159 | double cache_p = 1.1; 160 | double cache_n = 2.2; 161 | double y_err = -1; 162 | double step_size = 0.1; 163 | double lambda_V = 4; 164 | 165 | int sample_row_p = 1; 166 | int sample_row_n = 0; 167 | int V_col = 0; 168 | update_second_order_bpr(pFix->X_t, pFix->coef->V, cache_n, cache_p, y_err, 169 | step_size, lambda_V, sample_row_p, sample_row_n, 170 | V_col); 171 | 172 | // 1 - 0.1*(-1 * (4*1.1 - 4^2 - (1*2.2 - 1^2*1)) + 4 *1) = -0.68 173 | g_assert_cmpfloat(fabs(ffm_matrix_get(pFix->coef->V, 0, 0) - (-0.68)), <, 174 | 1e-10); 175 | 176 | //> 2 - 0.1*(-1 * (0*1.1 - 0^2*2 - (2*2.2 - 2^2*2)) + 4 *2) 177 | //[1] 1.56 178 | g_assert_cmpfloat(ffm_matrix_get(pFix->coef->V, 0, 1), ==, 1.56); 179 | } 180 | 181 | void test_second_order_bpr(TestFixture_T *pFix, gconstpointer pg) { 182 | int n_features = pFix->X->n; 183 | int n_samples = pFix->X->m; 184 | int k = 2; 185 | int n_iter = 200; 186 | double init_sigma = .01; 187 | double step_size = .02; 188 | 189 | ffm_matrix *compares = ffm_vector_to_rank_comparision(pFix->y); 190 | ffm_vector *true_order = ffm_vector_get_order(pFix->y); 191 | 192 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 193 | 194 | ffm_vector *y_pred = ffm_vector_calloc(n_samples); 195 | ffm_param param = {.n_iter = n_iter, 196 | .init_sigma = init_sigma, 197 | .stepsize = step_size, 198 | .SOLVER = SOLVER_SGD, 199 | .TASK = TASK_RANKING}; 200 | param.init_lambda_w = 0.5; 201 | param.init_lambda_V = 0.5; 202 | ffm_fit_sgd_bpr(coef, pFix->X_t, compares, param); 203 | 204 | sparse_predict(coef, pFix->X, y_pred); 205 | ffm_vector *pred_order = ffm_vector_get_order(y_pred); 206 | double kendall_tau = ffm_vector_kendall_tau(true_order, pred_order); 207 | g_assert_cmpfloat(kendall_tau, ==, 1); 208 | 209 | ffm_vector_free_all(y_pred, true_order, pred_order); 210 | free_ffm_coef(coef); 211 | } 212 | 213 | void test_sgd_generated_data(void) { 214 | int n_features = 10; 215 | int n_samples = 100; 216 | int k = 0; 217 | TestFixture_T *data = makeTestFixture(124, n_samples, n_features, k); 218 | ffm_vector *y_pred = ffm_vector_calloc(n_samples); 219 | 220 | int n_iter = 40; 221 | double init_sigma = 0.1; 222 | double step_size = .05; 223 | 224 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 225 | 226 | ffm_param param = {.n_iter = n_iter * 100, 227 | .init_sigma = init_sigma, 228 | .stepsize = step_size, 229 | .SOLVER = SOLVER_SGD, 230 | .TASK = TASK_REGRESSION}; 231 | param.init_lambda_w = 0.05; 232 | ffm_fit_sgd(coef, data->X_t, data->y, ¶m); 233 | sparse_predict(coef, data->X, y_pred); 234 | 235 | g_assert_cmpfloat(ffm_r2_score(y_pred, data->y), >, 0.95); 236 | 237 | ffm_vector_free(y_pred); 238 | free_ffm_coef(coef); 239 | TestFixtureDestructor(data, NULL); 240 | } 241 | 242 | void test_sgd_classification_generated_data(void) { 243 | int n_features = 10; 244 | int n_samples = 100; 245 | int k = 2; 246 | TestFixture_T *data = makeTestFixture(124, n_samples, n_features, k); 247 | ffm_vector_make_labels(data->y); 248 | ffm_vector *y_pred = ffm_vector_calloc(n_samples); 249 | 250 | int n_iter = 200; 251 | double init_sigma = 0.1; 252 | double step_size = .2; 253 | 254 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 255 | 256 | ffm_param param = {.n_iter = n_iter, 257 | .init_sigma = init_sigma, 258 | .stepsize = step_size, 259 | .SOLVER = SOLVER_SGD, 260 | .TASK = TASK_CLASSIFICATION}; 261 | param.init_lambda_w = 0.05; 262 | param.init_lambda_V = 0.05; 263 | 264 | ffm_fit_sgd(coef, data->X_t, data->y, ¶m); 265 | sparse_predict(coef, data->X, y_pred); 266 | for (int i = 0; i < y_pred->size; i++) 267 | ffm_vector_set(y_pred, i, ffm_sigmoid(ffm_vector_get(y_pred, i))); 268 | 269 | g_assert_cmpfloat(ffm_vector_accuracy(data->y, y_pred), >=, .81); 270 | 271 | ffm_vector_free(y_pred); 272 | free_ffm_coef(coef); 273 | TestFixtureDestructor(data, NULL); 274 | } 275 | 276 | void test_sgd_bpr_generated_data(void) { 277 | int n_features = 15; 278 | int n_samples = 10; 279 | int k = 4; 280 | TestFixture_T *data = makeTestFixture(1245, n_samples, n_features, k); 281 | ffm_matrix *compares = ffm_vector_to_rank_comparision(data->y); 282 | ffm_vector *true_order = ffm_vector_get_order(data->y); 283 | 284 | ffm_vector *y_pred = ffm_vector_calloc(n_samples); 285 | 286 | int n_iter = 100; 287 | double init_sigma = 0.1; 288 | double step_size = .1; 289 | 290 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 291 | 292 | ffm_param param = {.n_iter = n_iter, 293 | .init_sigma = init_sigma, 294 | .stepsize = step_size, 295 | .SOLVER = SOLVER_SGD, 296 | .TASK = TASK_RANKING}; 297 | param.init_lambda_w = 0.05; 298 | param.init_lambda_V = 0.05; 299 | ffm_fit_sgd_bpr(coef, data->X_t, compares, param); 300 | 301 | sparse_predict(coef, data->X, y_pred); 302 | ffm_vector *pred_order = ffm_vector_get_order(y_pred); 303 | 304 | double kendall_tau = ffm_vector_kendall_tau(true_order, pred_order); 305 | g_assert_cmpfloat(kendall_tau, >, .45); 306 | 307 | ffm_vector_free(y_pred); 308 | free_ffm_coef(coef); 309 | TestFixtureDestructor(data, NULL); 310 | } 311 | 312 | void test_extract_gradient() { 313 | int n_features = 3; 314 | int k = 2; 315 | double stepsize = .5; 316 | 317 | ffm_coef *coef_t0 = alloc_fm_coef(n_features, k, false); 318 | coef_t0->w_0 = 0.5; 319 | ffm_vector_set(coef_t0->w, 0, 1); 320 | ffm_vector_set(coef_t0->w, 1, 2); 321 | ffm_vector_set(coef_t0->w, 2, 3); 322 | ffm_matrix_set(coef_t0->V, 0, 0, 4); 323 | ffm_matrix_set(coef_t0->V, 1, 0, 5); 324 | ffm_matrix_set(coef_t0->V, 0, 1, 6); 325 | ffm_matrix_set(coef_t0->V, 1, 1, 7); 326 | ffm_matrix_set(coef_t0->V, 0, 2, 8); 327 | ffm_matrix_set(coef_t0->V, 1, 2, 9); 328 | 329 | ffm_coef *coef_t1 = alloc_fm_coef(n_features, k, false); 330 | 331 | ffm_coef *grad = extract_gradient(coef_t0, coef_t1, stepsize); 332 | 333 | g_assert_cmpfloat(coef_t0->w_0, ==, grad->w_0 * -stepsize); 334 | // check w grad 335 | for (int i = 0; i < n_features; i++) 336 | g_assert_cmpfloat(ffm_vector_get(coef_t0->w, i), ==, 337 | ffm_vector_get(grad->w, i) * stepsize); 338 | // check V grad 339 | for (int i = 0; i < k; i++) 340 | for (int j = 0; j < n_features; j++) 341 | g_assert_cmpfloat(ffm_matrix_get(coef_t0->V, i, j), ==, 342 | ffm_matrix_get(grad->V, i, j) * stepsize); 343 | 344 | free_ffm_coef(coef_t0); 345 | free_ffm_coef(coef_t1); 346 | free_ffm_coef(grad); 347 | } 348 | 349 | void test_l2_penalty() { 350 | int n_features = 2; 351 | int k = 1; 352 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 353 | ffm_vector_set(coef->w, 0, 1); 354 | ffm_vector_set(coef->w, 1, 2); 355 | ffm_matrix_set(coef->V, 0, 0, 3); 356 | ffm_matrix_set(coef->V, 0, 1, 4); 357 | 358 | coef->lambda_w = 0.5; 359 | double lambda_V_all = 0.5; 360 | ffm_vector_set_all(coef->lambda_V, lambda_V_all); 361 | 362 | double true_loss = coef->lambda_w * 5 + lambda_V_all * 25; 363 | double loss = l2_penalty(coef); 364 | g_assert_cmpfloat(true_loss, ==, loss); 365 | free_ffm_coef(coef); 366 | } 367 | 368 | void test_gradient_check_reg(TestFixture_T *pFix, gconstpointer pg) { 369 | cs *X_crs = pFix->X_t; 370 | ffm_vector *y = pFix->y; 371 | int test_sample_row = 0; 372 | double y_true = ffm_vector_get(y, test_sample_row); 373 | int n_features = pFix->coef->w->size; 374 | 375 | double eps = 0.0001; 376 | 377 | ffm_param param = {.n_iter = 1, 378 | .stepsize = .001, 379 | .init_sigma = .1, 380 | .k = 2, 381 | .init_lambda_w = 0.5, 382 | .init_lambda_V = 1.5, 383 | .warm_start = 1, 384 | .SOLVER = SOLVER_SGD, 385 | .TASK = TASK_REGRESSION, 386 | .rng_seed = 44}; 387 | 388 | ffm_coef *coef_t0 = alloc_fm_coef(n_features, param.k, false); 389 | init_ffm_coef(coef_t0, param); 390 | 391 | ffm_coef *coef_t1 = alloc_fm_coef(n_features, param.k, false); 392 | init_ffm_coef(coef_t1, param); 393 | 394 | ffm_fit_sgd(coef_t1, X_crs, y, ¶m); 395 | ffm_coef *grad = extract_gradient(coef_t0, coef_t1, param.stepsize); 396 | 397 | // check w gradient updates 398 | for (int i = 0; i < n_features; i++) { 399 | // keep copy 400 | double tmp = ffm_vector_get(coef_t0->w, i); 401 | // x + eps 402 | ffm_vector_set(coef_t0->w, i, tmp + eps); 403 | double y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row); 404 | double sq_loss = 0.5 * pow(y_true - y_pred, 2); 405 | double l_plus = sq_loss + 0.5 * l2_penalty(coef_t0); 406 | // x - eps 407 | ffm_vector_set(coef_t0->w, i, tmp - eps); 408 | y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row); 409 | sq_loss = 0.5 * pow(y_true - y_pred, 2); 410 | double l_minus = sq_loss + 0.5 * l2_penalty(coef_t0); 411 | // restore 412 | ffm_vector_set(coef_t0->w, i, tmp); 413 | double grad_i = (l_plus - l_minus) / (2 * eps); 414 | 415 | g_assert_cmpfloat(fabs(grad_i - ffm_vector_get(grad->w, i)), <, 1e-10); 416 | } 417 | 418 | // check V gradient updates 419 | for (int f = 0; f < param.k; f++) 420 | for (int i = 0; i < n_features; i++) { 421 | // keep copy 422 | double tmp = ffm_matrix_get(coef_t0->V, f, i); 423 | // x + eps 424 | ffm_matrix_set(coef_t0->V, f, i, tmp + eps); 425 | double y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row); 426 | double sq_loss = 0.5 * pow(y_true - y_pred, 2); 427 | double l_plus = sq_loss + 0.5 * l2_penalty(coef_t0); 428 | // x - eps 429 | ffm_matrix_set(coef_t0->V, f, i, tmp - eps); 430 | y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row); 431 | sq_loss = 0.5 * pow(y_true - y_pred, 2); 432 | double l_minus = sq_loss + 0.5 * l2_penalty(coef_t0); 433 | // restore 434 | ffm_matrix_set(coef_t0->V, f, i, tmp); 435 | double grad_i = (l_plus - l_minus) / (2 * eps); 436 | 437 | g_assert_cmpfloat(fabs(grad_i - ffm_matrix_get(grad->V, f, i)), <, 1e-10); 438 | } 439 | 440 | free_ffm_coef(coef_t0); 441 | free_ffm_coef(coef_t1); 442 | free_ffm_coef(grad); 443 | } 444 | 445 | void test_gradient_check_class(TestFixture_T *pFix, gconstpointer pg) { 446 | cs *X_crs = pFix->X_t; 447 | ffm_vector *y = pFix->y; 448 | int test_sample_row = 0; 449 | double y_true = ffm_vector_get(y, test_sample_row); 450 | int n_features = pFix->coef->w->size; 451 | 452 | double eps = 0.0001; 453 | 454 | ffm_param param = {.n_iter = 1, 455 | .stepsize = .01, 456 | .init_sigma = .01, 457 | .k = 2, 458 | .init_lambda_w = 1.5, 459 | .init_lambda_V = 2.0, 460 | .warm_start = 1, 461 | .SOLVER = SOLVER_SGD, 462 | .TASK = TASK_CLASSIFICATION, 463 | .rng_seed = 44}; 464 | 465 | ffm_coef *coef_t0 = alloc_fm_coef(n_features, param.k, false); 466 | init_ffm_coef(coef_t0, param); 467 | 468 | ffm_coef *coef_t1 = alloc_fm_coef(n_features, param.k, false); 469 | init_ffm_coef(coef_t1, param); 470 | 471 | ffm_fit_sgd(coef_t1, X_crs, y, ¶m); 472 | ffm_coef *grad = extract_gradient(coef_t0, coef_t1, param.stepsize); 473 | 474 | // check w gradient updates 475 | for (int i = 0; i < n_features; i++) { 476 | // keep copy 477 | double tmp = ffm_vector_get(coef_t0->w, i); 478 | // x + eps 479 | ffm_vector_set(coef_t0->w, i, tmp + eps); 480 | double y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row); 481 | double log_loss = -log(ffm_sigmoid(y_true * y_pred)); 482 | double l_plus = log_loss + 0.5 * l2_penalty(coef_t0); 483 | // x - eps 484 | ffm_vector_set(coef_t0->w, i, tmp - eps); 485 | y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row); 486 | log_loss = -log(ffm_sigmoid(y_true * y_pred)); 487 | double l_minus = log_loss + 0.5 * l2_penalty(coef_t0); 488 | // restore 489 | ffm_vector_set(coef_t0->w, i, tmp); 490 | // finite central differences 491 | double grad_i = (l_plus - l_minus) / (2 * eps); 492 | 493 | // g_assert_cmpfloat(grad_i, ==, ffm_vector_get(grad->w, i)); 494 | g_assert_cmpfloat(fabs(grad_i - ffm_vector_get(grad->w, i)), <, 1e-9); 495 | } 496 | 497 | // check V gradient updates 498 | for (int f = 0; f < param.k; f++) 499 | for (int i = 0; i < n_features; i++) { 500 | // keep copy 501 | double tmp = ffm_matrix_get(coef_t0->V, f, i); 502 | // x + eps 503 | ffm_matrix_set(coef_t0->V, f, i, tmp + eps); 504 | double y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row); 505 | double log_loss = -log(ffm_sigmoid(y_true * y_pred)); 506 | double l_plus = log_loss + 0.5 * l2_penalty(coef_t0); 507 | // x - eps 508 | ffm_matrix_set(coef_t0->V, f, i, tmp - eps); 509 | y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row); 510 | log_loss = -log(ffm_sigmoid(y_true * y_pred)); 511 | double l_minus = log_loss + 0.5 * l2_penalty(coef_t0); 512 | // restore 513 | ffm_matrix_set(coef_t0->V, f, i, tmp); 514 | // finite central differences 515 | double grad_i = (l_plus - l_minus) / (2 * eps); 516 | 517 | g_assert_cmpfloat(fabs(grad_i - ffm_matrix_get(grad->V, f, i)), <, 1e-10); 518 | } 519 | 520 | free_ffm_coef(coef_t0); 521 | free_ffm_coef(coef_t1); 522 | free_ffm_coef(grad); 523 | } 524 | 525 | void test_gradient_check_bpr(TestFixture_T *pFix, gconstpointer pg) { 526 | cs *X_crs = pFix->X_t; 527 | ffm_matrix *pairs = ffm_matrix_calloc(1, 2); 528 | int pos_row = 0; 529 | ffm_matrix_set(pairs, 0, 0, pos_row); 530 | int neg_row = 1; 531 | ffm_matrix_set(pairs, 0, 1, neg_row); 532 | 533 | int n_features = pFix->coef->w->size; 534 | 535 | double eps = 0.0001; 536 | 537 | ffm_param param = {.n_iter = 1, 538 | .stepsize = .01, 539 | .init_sigma = .01, 540 | .k = 2, 541 | .init_lambda_w = 0.0, 542 | .init_lambda_V = 0.0, 543 | .warm_start = 1, 544 | .SOLVER = SOLVER_SGD, 545 | .TASK = TASK_RANKING, 546 | .rng_seed = 44}; 547 | 548 | ffm_coef *coef_t0 = alloc_fm_coef(n_features, param.k, false); 549 | init_ffm_coef(coef_t0, param); 550 | 551 | ffm_coef *coef_t1 = alloc_fm_coef(n_features, param.k, false); 552 | init_ffm_coef(coef_t1, param); 553 | 554 | ffm_fit_sgd_bpr(coef_t1, X_crs, pairs, param); 555 | ffm_coef *grad = extract_gradient(coef_t0, coef_t1, param.stepsize); 556 | 557 | double y_pos, y_neg, bpr_loss, l_plus, l_minus, grad_i, tmp; 558 | // check w gradient updates 559 | for (int i = 0; i < n_features; i++) { 560 | // keep copy 561 | tmp = ffm_vector_get(coef_t0->w, i); 562 | // x + eps 563 | ffm_vector_set(coef_t0->w, i, tmp + eps); 564 | y_pos = ffm_predict_sample(coef_t0, X_crs, pos_row); 565 | y_neg = ffm_predict_sample(coef_t0, X_crs, neg_row); 566 | bpr_loss = -log(ffm_sigmoid(y_pos - y_neg)); 567 | l_plus = bpr_loss + 0.5 * l2_penalty(coef_t0); 568 | // x - eps 569 | ffm_vector_set(coef_t0->w, i, tmp - eps); 570 | y_pos = ffm_predict_sample(coef_t0, X_crs, pos_row); 571 | y_neg = ffm_predict_sample(coef_t0, X_crs, neg_row); 572 | bpr_loss = -log(ffm_sigmoid(y_pos - y_neg)); 573 | l_minus = bpr_loss + 0.5 * l2_penalty(coef_t0); 574 | // restore 575 | ffm_vector_set(coef_t0->w, i, tmp); 576 | // finite central differences 577 | grad_i = (l_plus - l_minus) / (2 * eps); 578 | 579 | // g_assert_cmpfloat(grad_i, ==, ffm_vector_get(grad->w, i)); 580 | g_assert_cmpfloat(fabs(grad_i - ffm_vector_get(grad->w, i)), <, 1e-9); 581 | } 582 | 583 | // check V gradient updates 584 | for (int f = 0; f < param.k; f++) 585 | for (int i = 0; i < n_features; i++) { 586 | // keep copy 587 | tmp = ffm_matrix_get(coef_t0->V, f, i); 588 | // x + eps 589 | ffm_matrix_set(coef_t0->V, f, i, tmp + eps); 590 | y_pos = ffm_predict_sample(coef_t0, X_crs, pos_row); 591 | y_neg = ffm_predict_sample(coef_t0, X_crs, neg_row); 592 | bpr_loss = -log(ffm_sigmoid(y_pos - y_neg)); 593 | l_plus = bpr_loss + 0.5 * l2_penalty(coef_t0); 594 | // x - eps 595 | ffm_matrix_set(coef_t0->V, f, i, tmp - eps); 596 | y_pos = ffm_predict_sample(coef_t0, X_crs, pos_row); 597 | y_neg = ffm_predict_sample(coef_t0, X_crs, neg_row); 598 | bpr_loss = -log(ffm_sigmoid(y_pos - y_neg)); 599 | l_minus = bpr_loss + 0.5 * l2_penalty(coef_t0); 600 | // restore 601 | ffm_matrix_set(coef_t0->V, f, i, tmp); 602 | // finite central differences 603 | grad_i = (l_plus - l_minus) / (2 * eps); 604 | 605 | // g_assert_cmpfloat(grad_i, ==, ffm_matrix_get(grad->V, f, i)); 606 | g_assert_cmpfloat(fabs(grad_i - ffm_matrix_get(grad->V, f, i)), <, 1e-10); 607 | } 608 | 609 | free_ffm_coef(coef_t0); 610 | free_ffm_coef(coef_t1); 611 | free_ffm_coef(grad); 612 | } 613 | 614 | int main(int argc, char **argv) { 615 | /* 616 | feenableexcept(FE_INVALID | 617 | FE_DIVBYZERO | 618 | FE_OVERFLOW | 619 | FE_UNDERFLOW); 620 | */ 621 | 622 | g_test_init(&argc, &argv, NULL); 623 | 624 | TestFixture_T Fixture; 625 | g_test_add("/sgd/util/predict sample", TestFixture_T, &Fixture, 626 | TestFixtureContructorWide, test_sgd_predict_sample, 627 | TestFixtureDestructor); 628 | g_test_add("/sgd/reg/first order", TestFixture_T, &Fixture, 629 | TestFixtureContructorLong, test_first_order_sgd, 630 | TestFixtureDestructor); 631 | g_test_add("/sgd/reg/second order", TestFixture_T, &Fixture, 632 | TestFixtureContructorLong, test_second_order_sgd, 633 | TestFixtureDestructor); 634 | g_test_add("/sgd/class/full", TestFixture_T, &Fixture, 635 | TestFixtureContructorLong, test_sgd_classification, 636 | TestFixtureDestructor); 637 | g_test_add("/sgd/bpr/update second order", TestFixture_T, &Fixture, 638 | TestFixtureContructorWide, test_update_second_order_bpr, 639 | TestFixtureDestructor); 640 | g_test_add("/sgd/bpr/first order", TestFixture_T, &Fixture, 641 | TestFixtureContructorLong, test_first_order_bpr, 642 | TestFixtureDestructor); 643 | g_test_add("/sgd/bpr/second order", TestFixture_T, &Fixture, 644 | TestFixtureContructorLong, test_second_order_bpr, 645 | TestFixtureDestructor); 646 | g_test_add_func("/sgd/class/generated data", 647 | test_sgd_classification_generated_data); 648 | g_test_add_func("/sgd/reg/generated data", test_sgd_generated_data); 649 | g_test_add_func("/sgd/bpr/generated data", test_sgd_bpr_generated_data); 650 | 651 | g_test_add_func("/sgd/util/extract_gradient", test_extract_gradient); 652 | g_test_add_func("/sgd/util/l2_penalty", test_l2_penalty); 653 | g_test_add("/sgd/reg/gradient check", TestFixture_T, &Fixture, 654 | TestFixtureContructorWide, test_gradient_check_reg, 655 | TestFixtureDestructor); 656 | g_test_add("/sgd/class/gradient check", TestFixture_T, &Fixture, 657 | TestFixtureContructorWide, test_gradient_check_class, 658 | TestFixtureDestructor); 659 | g_test_add("/sgd/bpr/gradient check", TestFixture_T, &Fixture, 660 | TestFixtureContructorWide, test_gradient_check_bpr, 661 | TestFixtureDestructor); 662 | return g_test_run(); 663 | } 664 | -------------------------------------------------------------------------------- /src/tests/test_ffm_als_mcmc.c: -------------------------------------------------------------------------------- 1 | #include "fast_fm.h" 2 | #include 3 | #include "TestFixtures.h" 4 | #include 5 | #include 6 | 7 | void test_eval_second_order_term(TestFixture_T *pFix, gconstpointer pg) { 8 | ffm_vector *y_pred = ffm_vector_calloc(5); 9 | eval_second_order_term(pFix->coef->V, pFix->X_t, y_pred); 10 | 11 | g_assert_cmpfloat(240.0, ==, ffm_vector_get(y_pred, 0)); 12 | g_assert_cmpfloat(240.0, ==, ffm_vector_get(y_pred, 1)); 13 | g_assert_cmpfloat(.0, ==, ffm_vector_get(y_pred, 2)); 14 | g_assert_cmpfloat(240.0, ==, ffm_vector_get(y_pred, 3)); 15 | g_assert_cmpfloat(800.0, ==, ffm_vector_get(y_pred, 4)); 16 | 17 | ffm_vector_free(y_pred); 18 | } 19 | 20 | void test_update_second_order_error(TestFixture_T *pFix, gconstpointer pg) { 21 | ffm_vector *a_theta_v = ffm_vector_calloc(5); 22 | ffm_vector_set(a_theta_v, 2, 1); 23 | ffm_vector_set(a_theta_v, 3, 2); 24 | 25 | ffm_vector *error = ffm_vector_calloc(5); 26 | ffm_vector_set_all(error, 1.5); 27 | 28 | double delta = 0.5; 29 | int column = 1; 30 | update_second_order_error(column, pFix->X, a_theta_v, delta, error); 31 | 32 | g_assert_cmpfloat(1.5, ==, ffm_vector_get(error, 0)); 33 | g_assert_cmpfloat(1.5, ==, ffm_vector_get(error, 1)); 34 | g_assert_cmpfloat(1.5, ==, ffm_vector_get(error, 2)); 35 | g_assert_cmpfloat(2.5, ==, ffm_vector_get(error, 3)); 36 | g_assert_cmpfloat(1.5, ==, ffm_vector_get(error, 4)); 37 | 38 | ffm_vector_free_all(error, a_theta_v); 39 | } 40 | 41 | void test_sparse_update_v_ij(TestFixture_T *pFix, gconstpointer pg) { 42 | double old_v_lf = 0.5; 43 | double l2_reg = 2; 44 | int n_samples = pFix->X->m; 45 | 46 | ffm_vector *err = ffm_vector_calloc(n_samples); 47 | int i; 48 | int column_index = 0; 49 | for (i = 0; i < n_samples; i++) ffm_vector_set(err, i, i); 50 | ffm_vector_scale(err, 0.1); 51 | 52 | ffm_vector *cache = ffm_vector_calloc(n_samples); 53 | ffm_vector *a_theta = ffm_vector_calloc(n_samples); 54 | ffm_vector_memcpy(cache, err); 55 | ffm_vector_scale(cache, 20); 56 | ffm_vector_scale(err, 57 | -1); // account for sign change in error due to refactoring 58 | // nominator 658.82 59 | // denominator 1286.2500 60 | // 0.51220602526724979 61 | double sum_denominator, sum_nominator; 62 | sum_nominator = sum_denominator = 0; 63 | sparse_v_lf_frac(&sum_denominator, &sum_nominator, pFix->X, column_index, err, 64 | cache, a_theta, old_v_lf); 65 | double new_v_lf = sum_nominator / (sum_denominator + l2_reg); 66 | 67 | g_assert_cmpfloat(0.51220602526724979, ==, new_v_lf); 68 | ffm_vector_free_all(err, cache, a_theta); 69 | } 70 | 71 | void test_sparse_predict(TestFixture_T *pFix, gconstpointer pg) { 72 | ffm_vector *y_pred = ffm_vector_calloc(5); 73 | 74 | sparse_predict(pFix->coef, pFix->X, y_pred); 75 | 76 | g_assert_cmpfloat(298.0, ==, ffm_vector_get(y_pred, 0)); 77 | g_assert_cmpfloat(266.0, ==, ffm_vector_get(y_pred, 1)); 78 | g_assert_cmpfloat(29.0, ==, ffm_vector_get(y_pred, 2)); 79 | g_assert_cmpfloat(298.0, ==, ffm_vector_get(y_pred, 3)); 80 | g_assert_cmpfloat(848.0, ==, ffm_vector_get(y_pred, 4)); 81 | 82 | ffm_vector_free(y_pred); 83 | } 84 | 85 | void test_row_predict(TestFixture_T *pFix, gconstpointer pg) { 86 | ffm_vector *y_pred = ffm_vector_calloc(5); 87 | 88 | row_predict(pFix->coef, pFix->X_t, y_pred); 89 | 90 | g_assert_cmpfloat(298.0, ==, ffm_vector_get(y_pred, 0)); 91 | g_assert_cmpfloat(266.0, ==, ffm_vector_get(y_pred, 1)); 92 | g_assert_cmpfloat(29.0, ==, ffm_vector_get(y_pred, 2)); 93 | g_assert_cmpfloat(298.0, ==, ffm_vector_get(y_pred, 3)); 94 | g_assert_cmpfloat(848.0, ==, ffm_vector_get(y_pred, 4)); 95 | 96 | ffm_vector_free(y_pred); 97 | } 98 | 99 | void test_col_predict(TestFixture_T *pFix, gconstpointer pg) { 100 | ffm_vector *y_pred = ffm_vector_calloc(5); 101 | 102 | col_predict(pFix->coef, pFix->X, y_pred); 103 | 104 | g_assert_cmpfloat(298.0, ==, ffm_vector_get(y_pred, 0)); 105 | g_assert_cmpfloat(266.0, ==, ffm_vector_get(y_pred, 1)); 106 | g_assert_cmpfloat(29.0, ==, ffm_vector_get(y_pred, 2)); 107 | g_assert_cmpfloat(298.0, ==, ffm_vector_get(y_pred, 3)); 108 | g_assert_cmpfloat(848.0, ==, ffm_vector_get(y_pred, 4)); 109 | 110 | ffm_vector_free(y_pred); 111 | } 112 | 113 | void test_sparse_als_zero_order_only(TestFixture_T *pFix, gconstpointer pg) { 114 | int n_features = pFix->X->n; 115 | int k = 0; 116 | ffm_param param = {.n_iter = 1, 117 | .warm_start = true, 118 | .ignore_w = true, 119 | .init_sigma = 0.1, 120 | .SOLVER = SOLVER_ALS, 121 | .TASK = TASK_REGRESSION}; 122 | 123 | ffm_coef *coef = alloc_fm_coef(n_features, k, true); 124 | param.init_lambda_w = 0; 125 | 126 | sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param); 127 | // g_assert_cmpfloat(4466.666666, ==, coef->w_0); 128 | g_assert_cmpfloat(fabs(4466.666666 - coef->w_0), <, 1e-6); 129 | 130 | free_ffm_coef(coef); 131 | } 132 | 133 | void test_sparse_als_first_order_only(TestFixture_T *pFix, gconstpointer pg) { 134 | int n_features = pFix->X->n; 135 | int k = 0; 136 | ffm_param param = {.n_iter = 1, 137 | .warm_start = true, 138 | .ignore_w_0 = true, 139 | .init_sigma = 0.1, 140 | .SOLVER = SOLVER_ALS, 141 | .TASK = TASK_REGRESSION}; 142 | 143 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 144 | coef->w_0 = 0; 145 | param.init_lambda_w = 0; 146 | 147 | ffm_vector_set(coef->w, 0, 10); 148 | ffm_vector_set(coef->w, 1, 20); 149 | 150 | sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param); 151 | // hand calculated results 1660.57142857 -11.87755102 152 | g_assert_cmpfloat(fabs(1660.57142857 - ffm_vector_get(coef->w, 0)), <, 1e-8); 153 | g_assert_cmpfloat(fabs(-11.87755102 - ffm_vector_get(coef->w, 1)), <, 1e-8); 154 | 155 | free_ffm_coef(coef); 156 | } 157 | 158 | void test_sparse_als_second_order_only(TestFixture_T *pFix, gconstpointer pg) { 159 | int n_features = pFix->X->n; 160 | int k = 1; 161 | ffm_param param = {.n_iter = 1, 162 | .warm_start = true, 163 | .ignore_w_0 = true, 164 | .ignore_w = true, 165 | .init_sigma = 0.1, 166 | .SOLVER = SOLVER_ALS, 167 | .TASK = TASK_REGRESSION}; 168 | 169 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 170 | coef->w_0 = 0; 171 | 172 | param.init_lambda_w = 0; 173 | param.init_lambda_V = 0; 174 | 175 | ffm_matrix_set(coef->V, 0, 0, 300); 176 | ffm_matrix_set(coef->V, 0, 1, 400); 177 | 178 | sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param); 179 | // hand calculated results 0.79866412 400. 180 | g_assert_cmpfloat(fabs(0.79866412 - ffm_matrix_get(coef->V, 0, 0)), <, 1e-8); 181 | g_assert_cmpfloat(fabs(400 - ffm_matrix_get(coef->V, 0, 1)), <, 1e-8); 182 | 183 | free_ffm_coef(coef); 184 | } 185 | 186 | void test_sparse_als_all_interactions(TestFixture_T *pFix, gconstpointer pg) { 187 | int n_features = pFix->X->n; 188 | int k = 1; 189 | ffm_param param = {.n_iter = 1, 190 | .warm_start = true, 191 | .ignore_w_0 = false, 192 | .ignore_w = false, 193 | .init_sigma = 0.1, 194 | .SOLVER = SOLVER_ALS, 195 | .TASK = TASK_REGRESSION}; 196 | 197 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 198 | coef->w_0 = 0; 199 | 200 | ffm_vector_set(coef->w, 0, 10); 201 | ffm_vector_set(coef->w, 1, 20); 202 | 203 | ffm_matrix_set(coef->V, 0, 0, 300); 204 | ffm_matrix_set(coef->V, 0, 1, 400); 205 | 206 | sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param); 207 | // hand calculated results checked with libfm 208 | g_assert_cmpfloat(fabs(-1755643.33333 - coef->w_0), <, 1e-5); 209 | g_assert_cmpfloat(fabs(-191459.71428571 - ffm_vector_get(coef->w, 0)), <, 210 | 1e-6); 211 | g_assert_cmpfloat(fabs(30791.91836735 - ffm_vector_get(coef->w, 1)), <, 1e-6); 212 | g_assert_cmpfloat(fabs(253.89744249 - ffm_matrix_get(coef->V, 0, 0)), <, 213 | 1e-6); 214 | g_assert_cmpfloat(fabs(400 - ffm_matrix_get(coef->V, 0, 1)), <, 1e-6); 215 | 216 | param.n_iter = 99; 217 | sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param); 218 | 219 | g_assert_cmpfloat(fabs(210911.940403 - coef->w_0), <, 1e-7); 220 | g_assert_cmpfloat(fabs(-322970.68313639 - ffm_vector_get(coef->w, 0)), <, 221 | 1e-6); 222 | g_assert_cmpfloat(fabs(51927.60978978 - ffm_vector_get(coef->w, 1)), <, 1e-6); 223 | g_assert_cmpfloat(fabs(94.76612018 - ffm_matrix_get(coef->V, 0, 0)), <, 1e-6); 224 | g_assert_cmpfloat(fabs(400 - ffm_matrix_get(coef->V, 0, 1)), <, 1e-6); 225 | 226 | free_ffm_coef(coef); 227 | } 228 | 229 | void test_sparse_als_first_order_interactions(TestFixture_T *pFix, 230 | gconstpointer pg) { 231 | ffm_vector *y_pred = ffm_vector_calloc(5); 232 | 233 | int n_features = pFix->X->n; 234 | int k = 0; 235 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 236 | ffm_param param = {.n_iter = 500, 237 | .init_sigma = 0.1, 238 | .SOLVER = SOLVER_ALS, 239 | .TASK = TASK_REGRESSION}; 240 | sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param); 241 | sparse_predict(coef, pFix->X, y_pred); 242 | 243 | /* reference values from sklearn LinearRegression 244 | y_pred: [ 321.05084746 346.6779661 -40.15254237 321.05084746 245 | 790.37288136] 246 | coef: [ 69.6779661 152.16949153] 247 | mse: 3134.91525424 */ 248 | g_assert_cmpfloat(fabs(321.05084746 - ffm_vector_get(y_pred, 0)), <, 1e-6); 249 | g_assert_cmpfloat(fabs(346.6779661 - ffm_vector_get(y_pred, 1)), <, 1e-6); 250 | g_assert_cmpfloat(fabs(-40.15254237 - ffm_vector_get(y_pred, 2)), <, 1e-6); 251 | g_assert_cmpfloat(fabs(321.05084746 - ffm_vector_get(y_pred, 3)), <, 1e-6); 252 | g_assert_cmpfloat(fabs(790.37288136 - ffm_vector_get(y_pred, 4)), <, 1e-6); 253 | 254 | ffm_vector_free(y_pred); 255 | free_ffm_coef(coef); 256 | } 257 | 258 | void test_sparse_als_second_interactions(TestFixture_T *pFix, 259 | gconstpointer pg) { 260 | ffm_vector *y_pred = ffm_vector_calloc(5); 261 | 262 | int n_features = pFix->X->n; 263 | int k = 2; 264 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 265 | ffm_param param = {.n_iter = 1000, .init_sigma = 0.1, .SOLVER = SOLVER_ALS}; 266 | sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param); 267 | sparse_predict(coef, pFix->X, y_pred); 268 | 269 | /* reference values from sklearn LinearRegression 270 | y_pred: [ 298. 266. 29. 298. 848.] 271 | coeff: [ 9. 2. 40.] 272 | mse: 4.53374139449e-27 */ 273 | g_assert_cmpfloat(fabs(298 - ffm_vector_get(y_pred, 0)), <, 1e-4); 274 | g_assert_cmpfloat(fabs(266 - ffm_vector_get(y_pred, 1)), <, 1e-4); 275 | g_assert_cmpfloat(fabs(29 - ffm_vector_get(y_pred, 2)), <, 1e-3); 276 | g_assert_cmpfloat(fabs(298 - ffm_vector_get(y_pred, 3)), <, 1e-4); 277 | g_assert_cmpfloat(fabs(848.0 - ffm_vector_get(y_pred, 4)), <, 1e-4); 278 | 279 | ffm_vector_free(y_pred); 280 | free_ffm_coef(coef); 281 | } 282 | 283 | void test_sparse_mcmc_second_interactions(TestFixture_T *pFix, 284 | gconstpointer pg) { 285 | int n_features = pFix->X->n; 286 | int n_samples = pFix->X->m; 287 | int k = 2; 288 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 289 | ffm_vector *y_pred = ffm_vector_calloc(n_samples); 290 | ffm_param param = {.n_iter = 100, 291 | .init_sigma = 0.1, 292 | .SOLVER = SOLVER_MCMC, 293 | .TASK = TASK_REGRESSION, 294 | .rng_seed = 1234}; 295 | sparse_fit(coef, pFix->X, pFix->X, pFix->y, y_pred, param); 296 | 297 | g_assert_cmpfloat(ffm_r2_score(pFix->y, y_pred), >, .98); 298 | 299 | ffm_vector_free(y_pred); 300 | free_ffm_coef(coef); 301 | } 302 | 303 | void test_sparse_mcmc_second_interactions_classification(TestFixture_T *pFix, 304 | gconstpointer pg) { 305 | int n_features = pFix->X->n; 306 | int n_samples = pFix->X->m; 307 | int k = 2; 308 | ffm_vector_make_labels(pFix->y); 309 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 310 | ffm_vector *y_pred = ffm_vector_calloc(n_samples); 311 | ffm_param param = {.n_iter = 10, 312 | .init_sigma = 0.1, 313 | .SOLVER = SOLVER_MCMC, 314 | .TASK = TASK_CLASSIFICATION}; 315 | sparse_fit(coef, pFix->X, pFix->X, pFix->y, y_pred, param); 316 | 317 | g_assert_cmpfloat(ffm_vector_accuracy(pFix->y, y_pred), >=, .98); 318 | 319 | ffm_vector_free(y_pred); 320 | free_ffm_coef(coef); 321 | } 322 | 323 | void test_train_test_of_different_size(TestFixture_T *pFix, gconstpointer pg) { 324 | int n_features = pFix->X->n; 325 | int k = 2; 326 | 327 | int n_samples_short = 3; 328 | int m = n_samples_short; 329 | int n = n_features; 330 | cs *X = cs_spalloc(m, n, m * n, 1, 1); /* create triplet identity matrix */ 331 | cs_entry(X, 0, 0, 6); 332 | cs_entry(X, 0, 1, 1); 333 | cs_entry(X, 1, 0, 2); 334 | cs_entry(X, 1, 1, 3); 335 | cs_entry(X, 2, 0, 3); 336 | cs *X_csc = cs_compress(X); /* A = compressed-column form of T */ 337 | cs *X_t = cs_transpose(X_csc, 1); 338 | cs_spfree(X); 339 | 340 | ffm_vector *y = ffm_vector_calloc(n_samples_short); 341 | // y [ 298 266 29 298 848 ] 342 | y->data[0] = 298; 343 | y->data[1] = 266; 344 | y->data[2] = 29; 345 | 346 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 347 | ffm_vector *y_pred = ffm_vector_calloc(n_samples_short); 348 | 349 | ffm_param param = {.n_iter = 20, .init_sigma = 0.01}; 350 | // test: train > test 351 | 352 | param.SOLVER = SOLVER_ALS; 353 | sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param); 354 | sparse_predict(coef, X_csc, y_pred); 355 | param.TASK = TASK_CLASSIFICATION; 356 | sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param); 357 | sparse_predict(coef, X_csc, y_pred); 358 | 359 | param.SOLVER = SOLVER_MCMC; 360 | param.TASK = TASK_CLASSIFICATION; 361 | sparse_fit(coef, pFix->X, X_csc, pFix->y, y_pred, param); 362 | param.TASK = TASK_REGRESSION; 363 | sparse_fit(coef, pFix->X, X_csc, pFix->y, y_pred, param); 364 | 365 | // test: train < test 366 | param.SOLVER = SOLVER_MCMC; 367 | param.TASK = TASK_CLASSIFICATION; 368 | sparse_fit(coef, X_csc, pFix->X, y_pred, pFix->y, param); 369 | param.TASK = TASK_REGRESSION; 370 | sparse_fit(coef, X_csc, pFix->X, y_pred, pFix->y, param); 371 | 372 | param.SOLVER = SOLVER_ALS; 373 | sparse_fit(coef, X_csc, NULL, y_pred, NULL, param); 374 | sparse_predict(coef, pFix->X, pFix->y); 375 | param.TASK = TASK_CLASSIFICATION; 376 | sparse_fit(coef, X_csc, NULL, y_pred, NULL, param); 377 | sparse_predict(coef, pFix->X, pFix->y); 378 | 379 | ffm_vector_free(y_pred); 380 | free_ffm_coef(coef); 381 | cs_spfree(X_t); 382 | cs_spfree(X_csc); 383 | } 384 | 385 | void test_sparse_als_generated_data(void) { 386 | int n_features = 10; 387 | int n_samples = 100; 388 | int k = 2; 389 | 390 | TestFixture_T *data = makeTestFixture(124, n_samples, n_features, k); 391 | 392 | ffm_vector *y_pred = ffm_vector_calloc(n_samples); 393 | 394 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 395 | ffm_param param = {.n_iter = 50, .init_sigma = 0.01, .SOLVER = SOLVER_ALS}; 396 | param.init_lambda_w = 23.5; 397 | param.init_lambda_V = 23.5; 398 | sparse_fit(coef, data->X, NULL, data->y, NULL, param); 399 | sparse_predict(coef, data->X, y_pred); 400 | 401 | g_assert_cmpfloat(ffm_r2_score(data->y, y_pred), >, 0.85); 402 | 403 | ffm_vector_free(y_pred); 404 | free_ffm_coef(coef); 405 | TestFixtureDestructor(data, NULL); 406 | } 407 | 408 | void test_hyerparameter_sampling(void) { 409 | ffm_rng *rng = ffm_rng_seed(12345); 410 | 411 | int n_features = 20; 412 | int n_samples = 150; 413 | int k = 1; // don't just change k, the rank is hard coded in the test 414 | // (ffm_vector_get(coef->lambda_V, 0);) 415 | 416 | int n_replication = 40; 417 | int n_draws = 1000; 418 | ffm_vector *alpha_rep = ffm_vector_calloc(n_replication); 419 | ffm_vector *lambda_w_rep = ffm_vector_calloc(n_replication); 420 | ffm_vector *lambda_V_rep = ffm_vector_calloc(n_replication); 421 | ffm_vector *mu_w_rep = ffm_vector_calloc(n_replication); 422 | ffm_vector *mu_V_rep = ffm_vector_calloc(n_replication); 423 | ffm_vector *err = ffm_vector_alloc(n_samples); 424 | 425 | for (int j = 0; j < n_replication; j++) { 426 | TestFixture_T *data = makeTestFixture(124, n_samples, n_features, k); 427 | ffm_coef *coef = data->coef; 428 | 429 | sparse_predict(coef, data->X, err); 430 | ffm_vector_scale(err, -1); 431 | ffm_vector_add(err, data->y); 432 | 433 | // make sure that distribution is converged bevore selecting 434 | // reference / init values 435 | for (int l = 0; l < 50; l++) sample_hyper_parameter(coef, err, rng); 436 | 437 | double alpha_init = coef->alpha; 438 | double lambda_w_init = coef->lambda_w; 439 | double lambda_V_init = ffm_vector_get(coef->lambda_V, 0); 440 | double mu_w_init = coef->mu_w; 441 | double mu_V_init = ffm_vector_get(coef->mu_V, 0); 442 | 443 | double alpha_count = 0; 444 | double lambda_w_count = 0, lambda_V_count = 0; 445 | double mu_w_count = 0, mu_V_count = 0; 446 | 447 | for (int l = 0; l < n_draws; l++) { 448 | sample_hyper_parameter(coef, err, rng); 449 | if (alpha_init > coef->alpha) alpha_count++; 450 | if (lambda_w_init > coef->lambda_w) lambda_w_count++; 451 | if (lambda_V_init > ffm_vector_get(coef->lambda_V, 0)) lambda_V_count++; 452 | if (mu_w_init > coef->mu_w) mu_w_count++; 453 | if (mu_V_init > ffm_vector_get(coef->mu_V, 0)) mu_V_count++; 454 | } 455 | ffm_vector_set(alpha_rep, j, alpha_count / (n_draws + 1)); 456 | ffm_vector_set(lambda_w_rep, j, lambda_w_count / (n_draws + 1)); 457 | ffm_vector_set(lambda_V_rep, j, lambda_V_count / (n_draws + 1)); 458 | ffm_vector_set(mu_w_rep, j, mu_w_count / (n_draws + 1)); 459 | ffm_vector_set(mu_V_rep, j, mu_V_count / (n_draws + 1)); 460 | 461 | TestFixtureDestructor(data, NULL); 462 | } 463 | double chi_alpha = 0; 464 | for (int i = 0; i < n_replication; i++) 465 | chi_alpha += 466 | ffm_pow_2(gsl_cdf_ugaussian_Qinv(ffm_vector_get(alpha_rep, i))); 467 | g_assert_cmpfloat(gsl_ran_chisq_pdf(chi_alpha, n_replication), <, .05); 468 | 469 | double chi_lambda_w = 0; 470 | for (int i = 0; i < n_replication; i++) 471 | chi_lambda_w += 472 | ffm_pow_2(gsl_cdf_ugaussian_Qinv(ffm_vector_get(lambda_w_rep, i))); 473 | g_assert_cmpfloat(gsl_ran_chisq_pdf(chi_lambda_w, n_replication), <, .05); 474 | 475 | double chi_lambda_V = 0; 476 | for (int i = 0; i < n_replication; i++) 477 | chi_lambda_V += 478 | ffm_pow_2(gsl_cdf_ugaussian_Qinv(ffm_vector_get(lambda_V_rep, i))); 479 | g_assert_cmpfloat(gsl_ran_chisq_pdf(chi_lambda_V, n_replication), <, .05); 480 | 481 | double chi_mu_w = 0; 482 | for (int i = 0; i < n_replication; i++) 483 | chi_mu_w += ffm_pow_2(gsl_cdf_ugaussian_Qinv(ffm_vector_get(mu_w_rep, i))); 484 | g_assert_cmpfloat(gsl_ran_chisq_pdf(chi_mu_w, n_replication), <, .05); 485 | 486 | double chi_mu_V = 0; 487 | for (int i = 0; i < n_replication; i++) 488 | chi_mu_V += ffm_pow_2(gsl_cdf_ugaussian_Qinv(ffm_vector_get(mu_V_rep, i))); 489 | g_assert_cmpfloat(gsl_ran_chisq_pdf(chi_mu_V, n_replication), <, .05); 490 | 491 | ffm_vector_free_all(alpha_rep, lambda_w_rep, lambda_V_rep, mu_w_rep, mu_V_rep, 492 | err); 493 | ffm_rng_free(rng); 494 | } 495 | 496 | void test_sparse_map_gibbs_first_order_interactions(void) { 497 | int n_features = 10; 498 | int n_samples = 100; 499 | int k = 0; 500 | 501 | TestFixture_T *data = makeTestFixture(124, n_samples, n_features, k); 502 | 503 | ffm_vector *y_pred = ffm_vector_calloc(n_samples); 504 | 505 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 506 | ffm_param param = {.n_iter = 200, .init_sigma = 0.1, .SOLVER = SOLVER_MCMC}; 507 | sparse_fit(coef, data->X, data->X, data->y, y_pred, param); 508 | 509 | g_assert_cmpfloat(ffm_r2_score(data->y, y_pred), >, .99); 510 | ffm_vector_free(y_pred); 511 | free_ffm_coef(coef); 512 | TestFixtureDestructor(data, NULL); 513 | } 514 | 515 | void test_train_test_data(void) { 516 | // test if training and test data a propertly handeled 517 | // no check of prediction quality 518 | int n_features = 10; 519 | int n_samples_train = 100; 520 | int n_samples_test = 30; 521 | int k = 3; 522 | 523 | TestFixture_T *data_train = 524 | makeTestFixture(124, n_samples_train, n_features, k); 525 | TestFixture_T *data_test = 526 | makeTestFixture(124, n_samples_test, n_features, k); 527 | 528 | ffm_vector *y_pred = ffm_vector_calloc(n_samples_test); 529 | // gibts 530 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 531 | ffm_param param = {.n_iter = 200, .init_sigma = 0.1, .SOLVER = SOLVER_MCMC}; 532 | sparse_fit(coef, data_train->X, data_test->X, data_train->y, y_pred, param); 533 | free_ffm_coef(coef); 534 | // als 535 | coef = alloc_fm_coef(n_features, k, false); 536 | ffm_param param_als = { 537 | .n_iter = 200, .init_sigma = 0.1, .SOLVER = SOLVER_ALS}; 538 | sparse_fit(coef, data_train->X, data_test->X, data_train->y, y_pred, 539 | param_als); 540 | sparse_predict(coef, data_test->X, y_pred); 541 | 542 | free_ffm_coef(coef); 543 | TestFixtureDestructor(data_train, NULL); 544 | TestFixtureDestructor(data_test, NULL); 545 | } 546 | 547 | void test_sparse_map_gibbs_second_interactions(void) { 548 | int n_features = 10; 549 | int n_samples = 1000; 550 | int k = 2; 551 | double init_sigma = 0.1; 552 | 553 | TestFixture_T *data = makeTestFixture(124, n_samples, n_features, k); 554 | 555 | ffm_vector *y_pred = ffm_vector_calloc(n_samples); 556 | 557 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 558 | ffm_param param = { 559 | .n_iter = 5, .init_sigma = init_sigma, .SOLVER = SOLVER_MCMC}; 560 | sparse_fit(coef, data->X, data->X, data->y, y_pred, param); 561 | double score_5_samples = ffm_r2_score(data->y, y_pred); 562 | 563 | free_ffm_coef(coef); 564 | ffm_vector_set_all(y_pred, 0); 565 | coef = alloc_fm_coef(n_features, 0, false); 566 | ffm_param param_50 = { 567 | .n_iter = 50, .init_sigma = init_sigma, .SOLVER = SOLVER_MCMC}; 568 | sparse_fit(coef, data->X, data->X, data->y, y_pred, param_50); 569 | double score_50_samples_first_order = ffm_r2_score(data->y, y_pred); 570 | 571 | free_ffm_coef(coef); 572 | ffm_vector_set_all(y_pred, 0); 573 | coef = alloc_fm_coef(n_features, k + 5, false); 574 | sparse_fit(coef, data->X, data->X, data->y, y_pred, param_50); 575 | double score_50_samples = ffm_r2_score(data->y, y_pred); 576 | 577 | g_assert_cmpfloat(score_50_samples, >, score_50_samples_first_order); 578 | g_assert_cmpfloat(score_50_samples, >, score_5_samples); 579 | g_assert_cmpfloat(score_50_samples, >, .72); 580 | 581 | ffm_vector_free(y_pred); 582 | free_ffm_coef(coef); 583 | TestFixtureDestructor(data, NULL); 584 | } 585 | 586 | void test_sparse_als_classification(void) { 587 | int n_features = 10; 588 | int n_samples = 100; 589 | int k = 2; 590 | double init_sigma = 0.01; 591 | 592 | TestFixture_T *data = makeTestFixture(124, n_samples, n_features, k); 593 | // map to classification problem 594 | ffm_vector_make_labels(data->y); 595 | 596 | ffm_vector *y_pred = ffm_vector_calloc(n_samples); 597 | 598 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 599 | ffm_param param = {.n_iter = 50, 600 | .init_sigma = init_sigma, 601 | .SOLVER = SOLVER_ALS, 602 | .TASK = TASK_CLASSIFICATION}; 603 | param.init_lambda_w = 5.5; 604 | param.init_lambda_V = 5.5; 605 | sparse_fit(coef, data->X, NULL, data->y, NULL, param); 606 | sparse_predict(coef, data->X, y_pred); 607 | ffm_vector_normal_cdf(y_pred); 608 | 609 | g_assert_cmpfloat(ffm_vector_accuracy(data->y, y_pred), >=, .8); 610 | 611 | ffm_vector_free(y_pred); 612 | free_ffm_coef(coef); 613 | TestFixtureDestructor(data, NULL); 614 | } 615 | 616 | void test_sparse_als_classification_path(void) { 617 | int n_features = 10; 618 | int n_samples = 200; 619 | int k = 4; 620 | double init_sigma = 0.1; 621 | 622 | TestFixture_T *data = makeTestFixture(124, n_samples, n_features, k); 623 | // map to classification problem 624 | ffm_vector_make_labels(data->y); 625 | 626 | ffm_vector *y_pred = ffm_vector_calloc(n_samples); 627 | 628 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 629 | ffm_param param = {.n_iter = 0, 630 | .init_sigma = init_sigma, 631 | .SOLVER = SOLVER_ALS, 632 | .TASK = TASK_CLASSIFICATION}; 633 | param.init_lambda_w = 5.5; 634 | param.init_lambda_V = 5.5; 635 | 636 | double acc = 0; 637 | // objective does not decline strigtly monotonic because of latend target 638 | // but should still decrease on average (at least till convergence) 639 | for (int i = 1; i < 9; i = i * 2) { 640 | param.n_iter = i; 641 | sparse_fit(coef, data->X, NULL, data->y, NULL, param); 642 | sparse_predict(coef, data->X, y_pred); 643 | ffm_vector_normal_cdf(y_pred); 644 | double tmp_acc = ffm_vector_accuracy(data->y, y_pred); 645 | // training error should (almost) always decrease 646 | // printf("iter %d, last acc %f\n", i, acc); 647 | g_assert_cmpfloat(tmp_acc, >=, acc); 648 | acc = tmp_acc; 649 | } 650 | 651 | ffm_vector_free(y_pred); 652 | free_ffm_coef(coef); 653 | TestFixtureDestructor(data, NULL); 654 | } 655 | 656 | void test_sparse_mcmc_classification(void) { 657 | int n_features = 10; 658 | int n_samples = 100; 659 | int k = 2; 660 | double init_sigma = 0.1; 661 | 662 | TestFixture_T *data = makeTestFixture(124, n_samples, n_features, k); 663 | // map to classification problem 664 | ffm_vector_make_labels(data->y); 665 | 666 | ffm_vector *y_pred = ffm_vector_calloc(n_samples); 667 | 668 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 669 | ffm_param param = {.n_iter = 50, 670 | .init_sigma = init_sigma, 671 | .SOLVER = SOLVER_MCMC, 672 | .TASK = TASK_CLASSIFICATION}; 673 | param.init_lambda_w = 5.5; 674 | param.init_lambda_V = 5.5; 675 | sparse_fit(coef, data->X, data->X, data->y, y_pred, param); 676 | sparse_predict(coef, data->X, y_pred); 677 | 678 | g_assert_cmpfloat(ffm_vector_accuracy(data->y, y_pred), >=, .84); 679 | 680 | ffm_vector_free(y_pred); 681 | free_ffm_coef(coef); 682 | TestFixtureDestructor(data, NULL); 683 | } 684 | void test_numerical_stability(void) { 685 | int n_features = 10; 686 | int n_samples = 10000; 687 | int k = 2; 688 | 689 | TestFixture_T *data = makeTestFixture(15, n_samples, n_features, k); 690 | 691 | ffm_vector *y_pred = ffm_vector_calloc(n_samples); 692 | 693 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 694 | ffm_param param = {.n_iter = 7, .init_sigma = 0.01, .SOLVER = SOLVER_ALS}; 695 | param.init_lambda_w = 400; 696 | param.init_lambda_V = 400; 697 | sparse_fit(coef, data->X, data->X, data->y, y_pred, param); 698 | sparse_predict(coef, data->X, y_pred); 699 | double score_als = ffm_r2_score(data->y, y_pred); 700 | g_assert_cmpfloat(score_als, >, .98); 701 | 702 | free_ffm_coef(coef); 703 | ffm_vector_set_all(y_pred, 0); 704 | 705 | coef = alloc_fm_coef(n_features, k, false); 706 | ffm_param param_mcmc = { 707 | .n_iter = 50, .init_sigma = 0.01, .SOLVER = SOLVER_MCMC}; 708 | sparse_fit(coef, data->X, data->X, data->y, y_pred, param_mcmc); 709 | double score_gibbs = ffm_r2_score(data->y, y_pred); 710 | g_assert_cmpfloat(score_gibbs, >, .99); 711 | 712 | ffm_vector_free(y_pred); 713 | free_ffm_coef(coef); 714 | TestFixtureDestructor(data, NULL); 715 | } 716 | 717 | void test_map_update_target(void) { 718 | double pred[] = {0.5, 0.2, -0.2, -0.5, -0.1, 0.8}; 719 | double true_[] = {1, 1, -1, -1, 1, -1}; 720 | double z[] = {0.509160434, 0.6750731798, -0.6750731798, 721 | -0.509160434, 0.8626174715, -1.3674022692}; 722 | ffm_vector y_pred = {.data = pred, .size = 6}; 723 | ffm_vector y_true = {.data = true_, .size = 6}; 724 | ffm_vector *z_target = ffm_vector_alloc(6); 725 | map_update_target(&y_pred, z_target, &y_true); 726 | for (int i = 0; i < 6; i++) 727 | g_assert_cmpfloat(fabs(z_target->data[i] - z[i]), <=, 1e-9); 728 | ffm_vector_free(z_target); 729 | } 730 | 731 | void test_als_warm_start(TestFixture_T *pFix, gconstpointer pg) { 732 | int n_features = pFix->X->n; 733 | int n_samples = pFix->X->m; 734 | int k = 4; 735 | 736 | ffm_vector *y_10_iter = ffm_vector_calloc(n_samples); 737 | ffm_vector *y_15_iter = ffm_vector_calloc(n_samples); 738 | ffm_vector *y_5_plus_5_iter = ffm_vector_calloc(n_samples); 739 | 740 | ffm_param param = {.warm_start = false, 741 | .init_sigma = 0.1, 742 | .SOLVER = SOLVER_ALS, 743 | .TASK = TASK_REGRESSION, 744 | .rng_seed = 123}; 745 | 746 | param.n_iter = 10; 747 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 748 | sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param); 749 | sparse_predict(coef, pFix->X, y_10_iter); 750 | 751 | param.n_iter = 15; 752 | sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param); 753 | sparse_predict(coef, pFix->X, y_15_iter); 754 | 755 | param.n_iter = 5; 756 | sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param); 757 | param.warm_start = true; 758 | sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param); 759 | sparse_predict(coef, pFix->X, y_5_plus_5_iter); 760 | 761 | // check that the results are equal 762 | double mse = ffm_vector_mean_squared_error(y_10_iter, y_5_plus_5_iter); 763 | double mse_diff = ffm_vector_mean_squared_error(y_15_iter, y_5_plus_5_iter); 764 | 765 | g_assert_cmpfloat(mse, <=, 1e-8); 766 | g_assert_cmpfloat(mse, <, mse_diff); 767 | 768 | free_ffm_coef(coef); 769 | ffm_vector_free_all(y_10_iter, y_5_plus_5_iter); 770 | } 771 | 772 | void test_mcmc_warm_start(TestFixture_T *pFix, gconstpointer pg) { 773 | int n_features = pFix->X->n; 774 | int n_samples = pFix->X->m; 775 | int k = 4; 776 | 777 | ffm_vector *y_10_iter = ffm_vector_calloc(n_samples); 778 | ffm_vector *y_15_iter = ffm_vector_calloc(n_samples); 779 | ffm_vector *y_5_plus_5_iter = ffm_vector_calloc(n_samples); 780 | 781 | ffm_param param = {.warm_start = false, 782 | .init_sigma = 0.1, 783 | .SOLVER = SOLVER_MCMC, 784 | .TASK = TASK_REGRESSION, 785 | .rng_seed = 125}; 786 | 787 | param.n_iter = 100; 788 | // printf("n_iter %d\n", param.n_iter); 789 | ffm_coef *coef = alloc_fm_coef(n_features, k, false); 790 | sparse_fit(coef, pFix->X, pFix->X, pFix->y, y_10_iter, param); 791 | 792 | param.n_iter = 150; 793 | sparse_fit(coef, pFix->X, pFix->X, pFix->y, y_15_iter, param); 794 | 795 | param.n_iter = 50; 796 | sparse_fit(coef, pFix->X, pFix->X, pFix->y, y_5_plus_5_iter, param); 797 | param.warm_start = true; 798 | param.iter_count = param.n_iter; 799 | param.n_iter += 50; // add more iterations 800 | sparse_fit(coef, pFix->X, pFix->X, pFix->y, y_5_plus_5_iter, param); 801 | 802 | // check that the results are equal 803 | // double mse10 = ffm_vector_mean_squared_error(pFix->y, y_5_plus_5_iter); 804 | double mse10_55 = ffm_vector_mean_squared_error(y_10_iter, y_5_plus_5_iter); 805 | double mse15_55 = ffm_vector_mean_squared_error(y_15_iter, y_5_plus_5_iter); 806 | 807 | g_assert_cmpfloat(mse10_55, <, mse15_55); 808 | 809 | free_ffm_coef(coef); 810 | ffm_vector_free_all(y_10_iter, y_5_plus_5_iter); 811 | } 812 | 813 | int main(int argc, char **argv) { 814 | /* 815 | feenableexcept(FE_INVALID | 816 | FE_DIVBYZERO | 817 | FE_OVERFLOW | 818 | FE_UNDERFLOW); 819 | */ 820 | 821 | g_test_init(&argc, &argv, NULL); 822 | 823 | TestFixture_T Fixture; 824 | 825 | g_test_add("/als/update second-order error", TestFixture_T, &Fixture, 826 | TestFixtureContructorLong, test_update_second_order_error, 827 | TestFixtureDestructor); 828 | 829 | g_test_add("/als/eval second-order term", TestFixture_T, &Fixture, 830 | TestFixtureContructorLong, test_eval_second_order_term, 831 | TestFixtureDestructor); 832 | 833 | g_test_add("/als/update v_ij", TestFixture_T, &Fixture, 834 | TestFixtureContructorLong, test_sparse_update_v_ij, 835 | TestFixtureDestructor); 836 | 837 | g_test_add("/general/predict", TestFixture_T, &Fixture, 838 | TestFixtureContructorLong, test_sparse_predict, 839 | TestFixtureDestructor); 840 | 841 | g_test_add("/general/row_predict", TestFixture_T, &Fixture, 842 | TestFixtureContructorLong, test_row_predict, 843 | TestFixtureDestructor); 844 | 845 | g_test_add("/general/col_predict", TestFixture_T, &Fixture, 846 | TestFixtureContructorLong, test_col_predict, 847 | TestFixtureDestructor); 848 | 849 | g_test_add("/als/zero order only", TestFixture_T, &Fixture, 850 | TestFixtureContructorSimple, test_sparse_als_zero_order_only, 851 | TestFixtureDestructor); 852 | 853 | g_test_add("/als/first order only", TestFixture_T, &Fixture, 854 | TestFixtureContructorSimple, test_sparse_als_first_order_only, 855 | TestFixtureDestructor); 856 | 857 | g_test_add("/als/second order only", TestFixture_T, &Fixture, 858 | TestFixtureContructorSimple, test_sparse_als_second_order_only, 859 | TestFixtureDestructor); 860 | 861 | g_test_add("/als/all interactions", TestFixture_T, &Fixture, 862 | TestFixtureContructorSimple, test_sparse_als_all_interactions, 863 | TestFixtureDestructor); 864 | 865 | g_test_add("/als/first order", TestFixture_T, &Fixture, 866 | TestFixtureContructorLong, 867 | test_sparse_als_first_order_interactions, TestFixtureDestructor); 868 | 869 | g_test_add("/als/second order", TestFixture_T, &Fixture, 870 | TestFixtureContructorLong, test_sparse_als_second_interactions, 871 | TestFixtureDestructor); 872 | 873 | g_test_add("/mcmc/second order", TestFixture_T, &Fixture, 874 | TestFixtureContructorLong, test_sparse_mcmc_second_interactions, 875 | TestFixtureDestructor); 876 | 877 | g_test_add("/mcmc/second order classification", TestFixture_T, &Fixture, 878 | TestFixtureContructorLong, 879 | test_sparse_mcmc_second_interactions_classification, 880 | TestFixtureDestructor); 881 | 882 | g_test_add("/general/train test different size", TestFixture_T, &Fixture, 883 | TestFixtureContructorLong, test_train_test_of_different_size, 884 | TestFixtureDestructor); 885 | 886 | g_test_add_func("/als/generated data", test_sparse_als_generated_data); 887 | 888 | g_test_add_func("/mcmc/MAP gibbs first order", 889 | test_sparse_map_gibbs_first_order_interactions); 890 | 891 | g_test_add_func("/mcmc/hyperparameter sampling", test_hyerparameter_sampling); 892 | 893 | g_test_add_func("/mcmc/MAP gibbs second order", 894 | test_sparse_map_gibbs_second_interactions); 895 | 896 | g_test_add_func("/general/numerical stability", test_numerical_stability); 897 | 898 | g_test_add_func("/mcmc/map update target", test_map_update_target); 899 | 900 | g_test_add_func("/als/classification", test_sparse_als_classification); 901 | g_test_add_func("/als/classification path", 902 | test_sparse_als_classification_path); 903 | g_test_add_func("/mcmc/classification", test_sparse_mcmc_classification); 904 | 905 | g_test_add("/als/warm_start", TestFixture_T, &Fixture, 906 | TestFixtureContructorSimple, test_als_warm_start, 907 | TestFixtureDestructor); 908 | 909 | g_test_add("/mcmc/warm_start", TestFixture_T, &Fixture, 910 | TestFixtureContructorSimple, test_mcmc_warm_start, 911 | TestFixtureDestructor); 912 | 913 | return g_test_run(); 914 | } 915 | --------------------------------------------------------------------------------