├── .github └── workflows │ └── unit_tests.yml ├── .gitignore ├── LICENSE ├── README.md ├── benchmarks └── ml100k │ ├── README.md │ ├── convert.nim │ ├── convex_factorization_machine.nim │ ├── factorization_machine.nim │ ├── factorization_machine_adagrad.nim │ ├── factorization_machine_adagrad_multi.nim │ ├── factorization_machine_sgd.nim │ ├── factorization_machine_sgd_multi.nim │ ├── factorization_machine_sgd_stream.nim │ ├── factorization_machine_sgd_stream_multi.nim │ ├── factorization_machine_stream.nim │ ├── ffm_adagrad.nim │ ├── ffm_sgd.nim │ ├── higher_order_factorization_machine.nim │ ├── linear_model.nim │ ├── make_ml100k_dataset.nim │ ├── make_ml100k_dataset_field.nim │ ├── matrix_factorization.nim │ ├── sparse_fm.nim │ ├── sparse_fm_bcd.nim │ ├── sparse_fm_fista.nim │ ├── sparse_fm_katyusha.nim │ ├── sparse_fm_l1.nim │ ├── sparse_fm_mbpsgd.nim │ ├── sparse_fm_nmapgd.nim │ ├── sparse_fm_pgd.nim │ ├── sparse_fm_psgd.nim │ ├── sparse_fm_squaredl12.nim │ ├── user_item_bias.nim │ └── utils.nim ├── nimfm.nimble ├── src ├── nimfm.nim ├── nimfm │ ├── dataset.nim │ ├── extmath.nim │ ├── kernels.nim │ ├── loss.nim │ ├── metrics.nim │ ├── model.nim │ ├── model │ │ ├── convex_factorization_machine.nim │ │ ├── factorization_machine.nim │ │ ├── field_aware_factorization_machine.nim │ │ ├── fm_base.nim │ │ ├── models.nim │ │ └── params.nim │ ├── modules.nim │ ├── optimizer.nim │ ├── optimizer │ │ ├── adagrad.nim │ │ ├── adagrad_ffm.nim │ │ ├── adagrad_ffm_multi.nim │ │ ├── adagrad_multi.nim │ │ ├── cd.nim │ │ ├── fista.nim │ │ ├── fit_linear.nim │ │ ├── greedy_cd.nim │ │ ├── hazan.nim │ │ ├── katyusha.nim │ │ ├── minibatch_psgd.nim │ │ ├── nmapgd.nim │ │ ├── optimizer_base.nim │ │ ├── optimizers.nim │ │ ├── pbcd.nim │ │ ├── pcd.nim │ │ ├── pgd.nim │ │ ├── psgd.nim │ │ ├── sgd.nim │ │ ├── sgd_ffm.nim │ │ ├── sgd_ffm_multi.nim │ │ ├── sgd_multi.nim │ │ └── utils.nim │ ├── regularizer.nim │ ├── regularizer │ │ ├── l1.nim │ │ ├── l21.nim │ │ ├── omegacs.nim │ │ ├── omegati.nim │ │ ├── regularizers.nim │ │ ├── squaredl12.nim │ │ ├── squaredl21.nim │ │ └── utils.nim │ ├── tensor.nim │ ├── tensor │ │ ├── sparse.nim │ │ ├── sparse_stream.nim │ │ └── tensor.nim │ └── utils.nim ├── nimfm_cfm.nim └── nimfm_sparsefm.nim └── tests ├── comb.nim ├── config.nims ├── kernels_slow.nim ├── model ├── cfm_slow.nim ├── ffm_slow.nim └── fm_slow.nim ├── optimizer ├── adagrad_ffm_slow.nim ├── adagrad_slow.nim ├── cd_slow.nim ├── fit_linear_slow.nim ├── greedy_cd_slow.nim ├── hazan_slow.nim ├── pbcd_slow.nim ├── pcd_slow.nim ├── psgd_slow.nim ├── sgd_ffm_slow.nim └── sgd_slow.nim ├── regularizer ├── l1_slow.nim ├── l21_slow.nim ├── omegacs_slow.nim ├── omegati_slow.nim ├── regularizers.nim ├── squaredl12_slow.nim ├── squaredl21_slow.nim └── utils.nim ├── test_adagrad.nim ├── test_adagrad_ffm.nim ├── test_cd.nim ├── test_dataset.nim ├── test_greedy_cd.nim ├── test_hazan.nim ├── test_kernels.nim ├── test_label_encoder.nim ├── test_metrics.nim ├── test_pbcd_l1.nim ├── test_pbcd_l21.nim ├── test_pbcd_omegacs.nim ├── test_pbcd_squaredl21.nim ├── test_pcd_l1.nim ├── test_pcd_squaredl12.nim ├── test_pcd_ti.nim ├── test_psgd_l1.nim ├── test_psgd_l21.nim ├── test_psgd_squaredl12.nim ├── test_psgd_squaredl21.nim ├── test_sgd.nim ├── test_sgd_ffm.nim ├── test_squaredl12.nim └── utils.nim /.github/workflows/unit_tests.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | on: [push] 3 | jobs: 4 | build: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - uses: actions/checkout@v2 8 | 9 | - name: Install lapack 10 | run: | 11 | sudo apt update 12 | sudo apt install gfortran 13 | sudo apt install libblas-dev liblapack-dev libatlas-base-dev 14 | 15 | - name: Cache choosenim 16 | id: cache-choosenim 17 | uses: actions/cache@v2 18 | with: 19 | path: ~/.choosenim 20 | key: ${{ runner.os }}-choosenim-1.0.6 21 | 22 | - name: Cache nimble 23 | id: cache-nimble 24 | uses: actions/cache@v2 25 | with: 26 | path: ~/.nimble 27 | key: ${{ runner.os }}-nimble-1.0.6 28 | 29 | - name: Install Nim 30 | if: steps.cache-choosenim.outputs.cache-hit != 'true' || steps.cache-nimble.outputs.cache-hit != 'true' 31 | run: | 32 | export CHOOSENIM_CHOOSE_VERSION="1.0.6" 33 | curl https://nim-lang.org/choosenim/init.sh -sSf > init.sh 34 | sh init.sh -y 35 | 36 | - name: Install project 37 | run: | 38 | export PATH=$HOME/.nimble/bin:$PATH 39 | nimble install -y 40 | echo $HOME/.nimble/bin >> $GITHUB_PATH 41 | 42 | - name: Unit testing 43 | run: | 44 | nimble test 45 | - name: Make and create binary 46 | run: | 47 | nimble make 48 | 49 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | nimcache/ 2 | nimblecache/ 3 | htmldocs/ 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Kyohei Atarashi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /benchmarks/ml100k/README.md: -------------------------------------------------------------------------------- 1 | # Benchmarks on MoviLens 100K dataset 2 | [MovieLens 100K](https://grouplens.org/datasets/movielens/100k/) is a dataset 3 | for the movie recommendation task. It has 943 users, 1,682 items, 4 | and 100,000 ratings. It also provides some additional information and 5 | factorization machines can leverage them. 6 | 7 | We provide codes for some baseline methods [1], factorization machines [2], 8 | and higher-order factorization machines [3] on the MovieLens 100K dataset. 9 | The baseline methods implemented in this benchmark are represented as the 10 | factorization machines by changing input features and 11 | hyperparameter setting (see [2]). 12 | 13 | In addition, this benchmark provides factorization machines with sparse regularization [4,5,6,7]. 14 | 15 | ## Dependencies 16 | - [zip library in Nim](https://github.com/nim-lang/zip) 17 | 18 | ## Usage 19 | 1. Compile and run `make_ml100k_dataset.nim` with `-d:ssl` option: 20 | 21 | 22 | nim c --run --d:ssl make_ml100k_dataset.nim 23 | 24 | Then, `ml-100k.zip` will be downloaded and uncompressed, and following 25 | files will be created in `dataset` directory: 26 | - `ml-100k_user_item_all.svm` 27 | - `ml-100k_user_item_train.svm` 28 | - `ml-100k_user_item_test.svm` 29 | - `ml-100k_user_item_feature_all.svm` 30 | - `ml-100k_user_item_feature_train.svm` 31 | - `ml-100k_user_item_feature_test.svm` 32 | 33 | First three files are svmlight format dataset files for 34 | `matrix_factorization.nim` and `user_item_bias.nim`. 35 | `matrix_factorization.nim` provides matrix factorization (MF) (a.k.a latent 36 | factor (feature) model) methods [1]. `user_item_bias.nim` provides the linear 37 | regression with user-id and item-id as input. It predicts the rating as 38 | overall_bias + user_bias + item_bias. 39 | 40 | If you want to run field-aware factorization machines examples (`ffm_sgd.nim` and `ffm_adagrad.nim`), please compile and rune `make_ml100k_dataset_field.nim`. 41 | 42 | 2. Compile other nim files with `-d:release` and `-d:danger`, 43 | and run them. For example, 44 | 45 | nim c --run --d:release --d:danger matrix_factorization.nim 46 | 47 | `factorization_machine.nim`, 48 | `factorization_machine_sgd.nim`, `factorization_machines_adagrad.nim`, `higher_order_factorization_machine.nim`, 49 | and `linear_model.nim` use not only user-id and item-id but also 50 | - age, occupation, sex, and zipcode of user (dimension: 49), 51 | - released year and genre of item (dimension: 29). 52 | 53 | For more details about feature encoding, please see [3]. 54 | 55 | `factorization_machine_stream.nim` and `factorization_machine_sgd_stream.nim` are examples using our binary data format. 56 | Before running them, you must run `convert.nim`. 57 | It outputs binary versions of `ml-100k_user_item_feature_train.svm` and `ml-100k_user_item_feature_test.svm`. 58 | 59 | `sparse*` provides factorization machines with sparse regularization [4,5,6,7]. 60 | 61 | When compiling `factorization_machine_sgd_multi`, `factorization_machine_stream_multi`, `factorization_machine_adagrad_multi`, `ffm_sgd`, or `ffm_adagrad`, use `--threads:on` flag. They use multiple threads. 62 | 63 | ## References 64 | 1. Y. Koren. Factorization meets the neighborhood: a multifaceted collaborative filtering model. In KDD, pp. 426--434, 2008. 65 | 66 | 2. S. Rendle. Factorization machines. In ICDM, pp. 995--1000, 2010. 67 | 68 | 3. M. Blondel, A. Fujino, N. Ueda, M. Ishihata. Higher-order factorization machines. In NeurIPS, pp. 3351--3359, 2016. 69 | 70 | 4. Z. Pan, E. Chen, Q. Liu, T. Xu, H. Ma, and H. Lin. Sparse factorization machines for click-through rate prediction. In ICDM, pp. 400--409, 2016. 71 | 72 | 5. J Xu, K Lin, P. N. Tan, and J. Zhou. Synergies that matter: Efficient interaction selection via sparse factorization machine. In SDM, pp. 1008-–0116, 2016. 73 | 74 | 6. H. Zhao, Q. Yao, J. Li, Y. Song, and D. L. Lee. Meta-graph based recommendation fusion over heterogeneous information networks. In KDD, pp. 635–-644, 2017 75 | 76 | 7. K. Atarashi, S. Oyama, and M. Kurihara. Factorization machines with regularization for sparse feature interactions. preprint. 77 | -------------------------------------------------------------------------------- /benchmarks/ml100k/convert.nim: -------------------------------------------------------------------------------- 1 | import nimfm 2 | 3 | 4 | when isMainModule: 5 | convertSVMLightFile("dataset/ml-100k_user_item_feature_train.svm", 6 | "dataset/ml-100k_user_item_feature_train_samples", 7 | "dataset/ml-100k_train_labels") 8 | convertSVMLightFile("dataset/ml-100k_user_item_feature_test.svm", 9 | "dataset/ml-100k_user_item_feature_test_samples", 10 | "dataset/ml-100k_test_labels") 11 | transposeFile("dataset/ml-100k_user_item_feature_train_samples", 12 | "dataset/ml-100k_user_item_feature_train_samples_csc", 13 | cachesize=5) 14 | transposeFile("dataset/ml-100k_user_item_feature_train_samples_csc", 15 | "dataset/ml-100k_user_item_feature_train_samples_csc_transpose", 16 | cachesize=5) -------------------------------------------------------------------------------- /benchmarks/ml100k/convex_factorization_machine.nim: -------------------------------------------------------------------------------- 1 | import nimfm/dataset, nimfm/model 2 | import nimfm/optimizer/hazan, nimfm/optimizer/greedy_cd 3 | 4 | 5 | when isMainModule: 6 | var XTr, XTe: CSCDataset 7 | var yTr, yTe: seq[float64] 8 | loadSVMLightFile("dataset/ml-100k_user_item_train.svm", 9 | XTr, yTr, nFeatures=2625) 10 | loadSVMLightFile("dataset/ml-100k_user_item_test.svm", 11 | XTe, yTe, nFeatures=2625) 12 | 13 | var cfm = newConvexFactorizationMachine( 14 | task=regression, maxComponents=50, ignoreDiag=true, fitLinear=true, 15 | fitIntercept=true, warmStart=false) 16 | 17 | var optimGCD = newGreedyCD( 18 | maxIter=30, maxIterInner=10, maxIterPower=100, 19 | beta=1e-4, alpha0=1e-8, alpha=1e-8, 20 | refitFully=false, nRefitting=10, verbose=1) 21 | echo("Training CFM by GreedyCD.") 22 | optimGCD.fit(Xtr, yTr, cfm) 23 | echo("Train RMSE: ", cfm.score(Xtr, yTr)) 24 | echo("Test RMSE: ", cfm.score(Xte, yTe)) 25 | echo() 26 | 27 | var optimHazan = newHazan( 28 | maxIter=100, maxIterPower=100, optimal=true, verbose=1, 29 | eta=600, nTol=100) 30 | echo("Training CFM by Hazan's Algorothm.") 31 | optimHazan.fit(Xtr, yTr, cfm) 32 | echo("Train RMSE: ", cfm.score(Xtr, yTr)) 33 | echo("Test RMSE: ", cfm.score(Xte, yTe)) -------------------------------------------------------------------------------- /benchmarks/ml100k/factorization_machine.nim: -------------------------------------------------------------------------------- 1 | import nimfm/dataset, nimfm/model 2 | import nimfm/optimizer/cd 3 | 4 | 5 | when isMainModule: 6 | var XTr, XTe: CSCDataset 7 | var yTr, yTe: seq[float64] 8 | loadSVMLightFile("dataset/ml-100k_user_item_feature_train.svm", 9 | XTr, yTr, nFeatures=2703) 10 | loadSVMLightFile("dataset/ml-100k_user_item_feature_test.svm", 11 | XTe, yTe, nFeatures=2703) 12 | 13 | var fm = newFactorizationMachine(task=regression) 14 | var optim = newCD(maxIter=100, beta=1e-3, alpha0=1e-10, alpha=1e-10) 15 | optim.fit(Xtr, yTr, fm) 16 | 17 | echo("Train RMSE: ", fm.score(Xtr, yTr)) 18 | echo("Test RMSE: ", fm.score(Xte, yTe)) 19 | -------------------------------------------------------------------------------- /benchmarks/ml100k/factorization_machine_adagrad.nim: -------------------------------------------------------------------------------- 1 | import nimfm/dataset, nimfm/model 2 | import nimfm/optimizer/adagrad 3 | 4 | 5 | when isMainModule: 6 | var XTr, XTe: CSRDataset # Use CSRDataset 7 | var yTr, yTe: seq[float64] 8 | loadSVMLightFile("dataset/ml-100k_user_item_feature_train.svm", 9 | XTr, yTr, nFeatures=2703) 10 | loadSVMLightFile("dataset/ml-100k_user_item_feature_test.svm", 11 | XTe, yTe, nFeatures=2703) 12 | 13 | var fm = newFactorizationMachine(task=regression) 14 | var optim = newAdaGraD(eta0=0.1, maxIter=100, beta=1e-3, 15 | alpha0=1e-10, alpha=1e-10) 16 | optim.fit(Xtr, yTr, fm) 17 | 18 | echo("Train RMSE: ", fm.score(Xtr, yTr)) 19 | echo("Test RMSE: ", fm.score(Xte, yTe)) 20 | -------------------------------------------------------------------------------- /benchmarks/ml100k/factorization_machine_adagrad_multi.nim: -------------------------------------------------------------------------------- 1 | import nimfm/dataset, nimfm/model, nimfm/loss 2 | import nimfm/optimizer/adagrad_multi 3 | 4 | 5 | when isMainModule: 6 | var XTr, XTe: CSRDataset # Use CSRDataset 7 | var yTr, yTe: seq[float64] 8 | loadSVMLightFile("dataset/ml-100k_user_item_feature_train.svm", 9 | XTr, yTr, nFeatures=2703) 10 | loadSVMLightFile("dataset/ml-100k_user_item_feature_test.svm", 11 | XTe, yTe, nFeatures=2703) 12 | 13 | var fm = newFactorizationMachine(task=regression) 14 | var optim = newAdaGrad(eta0=0.1, maxIter=100, beta=1e-3, 15 | alpha0=1e-10, alpha=1e-10) 16 | optim.fit(XTr, yTr, fm, maxThreads=4) 17 | 18 | echo("Train RMSE: ", fm.score(XTr, yTr)) 19 | echo("Test RMSE: ", fm.score(Xte, yTe)) 20 | -------------------------------------------------------------------------------- /benchmarks/ml100k/factorization_machine_sgd.nim: -------------------------------------------------------------------------------- 1 | import nimfm/dataset, nimfm/model 2 | import nimfm/optimizer/sgd 3 | 4 | 5 | when isMainModule: 6 | var XTr, XTe: CSRDataset # Use CSRDataset for SGD solver 7 | var yTr, yTe: seq[float64] 8 | let scheduling: SchedulingKind = constant 9 | loadSVMLightFile("dataset/ml-100k_user_item_feature_train.svm", 10 | XTr, yTr, nFeatures=2703) 11 | loadSVMLightFile("dataset/ml-100k_user_item_feature_test.svm", 12 | XTe, yTe, nFeatures=2703) 13 | 14 | var fm = newFactorizationMachine(task=regression) 15 | var optim = newSGD(eta0=0.01, scheduling=scheduling, maxIter=100, 16 | beta=1e-3, alpha0=1e-10, alpha=1e-10, shuffle=false) 17 | optim.fit(Xtr, yTr, fm) 18 | 19 | echo("Train RMSE: ", fm.score(Xtr, yTr)) 20 | echo("Test RMSE: ", fm.score(Xte, yTe)) 21 | -------------------------------------------------------------------------------- /benchmarks/ml100k/factorization_machine_sgd_multi.nim: -------------------------------------------------------------------------------- 1 | import nimfm/dataset, nimfm/model 2 | import nimfm/optimizer/sgd_multi 3 | 4 | 5 | when isMainModule: 6 | var XTr, XTe: CSRDataset # Use CSRDataset for SGD solver 7 | var yTr, yTe: seq[float64] 8 | let scheduling: SchedulingKind = optimal 9 | loadSVMLightFile("dataset/ml-100k_user_item_feature_train.svm", 10 | XTr, yTr, nFeatures=2703) 11 | loadSVMLightFile("dataset/ml-100k_user_item_feature_test.svm", 12 | XTe, yTe, nFeatures=2703) 13 | var fm = newFactorizationMachine(task=regression) 14 | var optim = newSGD(eta0=0.01, scheduling=scheduling, maxIter=100, tol = -10, 15 | beta=1e-3, alpha0=1e-10, alpha=1e-10, shuffle=true) 16 | optim.fit(XTr, yTr, fm, maxThreads=4) 17 | 18 | echo("Train RMSE: ", fm.score(XTr, yTr)) 19 | echo("Test RMSE: ", fm.score(Xte, yTe)) 20 | -------------------------------------------------------------------------------- /benchmarks/ml100k/factorization_machine_sgd_stream.nim: -------------------------------------------------------------------------------- 1 | import nimfm/dataset, nimfm/model 2 | import nimfm/optimizer/sgd 3 | 4 | 5 | when isMainModule: 6 | var XTr = newStreamCSRDataset("dataset/ml-100k_user_item_feature_train_samples", cacheSize=5) 7 | var XTe = newStreamCSRDataset("dataset/ml-100k_user_item_feature_test_samples") 8 | var yTr = loadStreamLabel("dataset/ml-100k_train_labels") 9 | var yTe = loadStreamLabel("dataset/ml-100k_test_labels") 10 | var fm = newFactorizationMachine(task=regression) 11 | let scheduling: SchedulingKind = constant 12 | var optim = newSGD(eta0=0.01, scheduling=scheduling, maxIter=100, 13 | beta=1e-3, alpha0=1e-10, alpha=1e-10, shuffle=false) 14 | 15 | optim.fit(Xtr, yTr, fm) 16 | 17 | echo("Train RMSE: ", fm.score(Xtr, yTr)) 18 | echo("Test RMSE: ", fm.score(Xte, yTe)) 19 | -------------------------------------------------------------------------------- /benchmarks/ml100k/factorization_machine_sgd_stream_multi.nim: -------------------------------------------------------------------------------- 1 | import nimfm/dataset, nimfm/model 2 | import nimfm/optimizer/sgd_multi 3 | 4 | 5 | when isMainModule: 6 | var XTr = newStreamCSRDataset("dataset/ml-100k_user_item_feature_train_samples", 7 | cacheSize=5) 8 | var XTe = newStreamCSRDataset("dataset/ml-100k_user_item_feature_test_samples") 9 | var yTr = loadStreamLabel("dataset/ml-100k_train_labels") 10 | var yTe = loadStreamLabel("dataset/ml-100k_test_labels") 11 | var fm = newFactorizationMachine(task=regression) 12 | let scheduling: SchedulingKind = optimal 13 | var optim = newSGD(eta0=0.01, scheduling=scheduling, maxIter=100, 14 | beta=1e-3, alpha0=1e-10, alpha=1e-10, shuffle=false) 15 | 16 | optim.fit(XTr, yTr, fm, maxThreads=4) 17 | 18 | echo("Train RMSE: ", fm.score(XTr, yTr)) 19 | echo("Test RMSE: ", fm.score(Xte, yTe)) 20 | -------------------------------------------------------------------------------- /benchmarks/ml100k/factorization_machine_stream.nim: -------------------------------------------------------------------------------- 1 | import nimfm/dataset, nimfm/model 2 | import nimfm/optimizer/cd 3 | 4 | 5 | when isMainModule: 6 | var XTr = newStreamCSCDataset("dataset/ml-100k_user_item_feature_train_samples_csc", 7 | cacheSize=5) 8 | var XTe = newStreamCSRDataset("dataset/ml-100k_user_item_feature_test_samples") 9 | var yTr = loadStreamLabel("dataset/ml-100k_train_labels") 10 | var yTe = loadStreamLabel("dataset/ml-100k_test_labels") 11 | var fm = newFactorizationMachine(task=regression) 12 | var optim = newCD(maxIter=100, beta=1e-3, alpha0=1e-10, alpha=1e-10) 13 | optim.fit(Xtr, yTr, fm) 14 | 15 | echo("Train RMSE: ", fm.score(Xtr, yTr)) 16 | echo("Test RMSE: ", fm.score(Xte, yTe)) -------------------------------------------------------------------------------- /benchmarks/ml100k/ffm_adagrad.nim: -------------------------------------------------------------------------------- 1 | import nimfm/dataset, nimfm/model, nimfm/loss 2 | # import nimfm/optimizer/adagrad_ffm # single-threading fit 3 | import nimfm/optimizer/adagrad_ffm_multi 4 | 5 | when isMainModule: 6 | var XTr, XTe: CSRFieldDataset # Use CSRFieldDataset for AdaGrad solver 7 | var yTr, yTe: seq[float64] 8 | loadFFMFile("dataset/ml-100k_user_item_feature_train.ffm", 9 | XTr, yTr, nFeatures=2703, nFields=8) 10 | loadFFMFile("dataset/ml-100k_user_item_feature_test.ffm", 11 | XTe, yTe, nFeatures=2703, nFields=8) 12 | 13 | var ffm = newFieldAwareFactorizationMachine(task=regression) 14 | var optim = newAdaGrad(eta0=1.0, maxIter=100, beta=1e-3, alpha0=1e-10, 15 | alpha=1e-10) 16 | #optim.fit(XTr, yTr, ffm) # single-threading fit 17 | optim.fit(XTr, yTr, ffm, maxThreads=4) 18 | 19 | echo("Train RMSE: ", ffm.score(XTr, yTr)) 20 | echo("Test RMSE: ", ffm.score(Xte, yTe)) 21 | -------------------------------------------------------------------------------- /benchmarks/ml100k/ffm_sgd.nim: -------------------------------------------------------------------------------- 1 | import nimfm/dataset, nimfm/model 2 | #import nimfm/optimizer/sgd_ffm # single-threading fit 3 | import nimfm/optimizer/sgd_ffm_multi 4 | 5 | when isMainModule: 6 | var XTr, XTe: CSRFieldDataset # Use CSRFieldDataset for SGD solver 7 | var yTr, yTe: seq[float64] 8 | let scheduling: SchedulingKind = optimal 9 | loadFFMFile("dataset/ml-100k_user_item_feature_train.ffm", 10 | XTr, yTr, nFeatures=2703, nFields=8) 11 | loadFFMFile("dataset/ml-100k_user_item_feature_test.ffm", 12 | XTe, yTe, nFeatures=2703, nFields=8) 13 | 14 | var ffm = newFieldAwareFactorizationMachine(task=regression) 15 | var optim = newSGD(eta0=0.01, scheduling=scheduling, maxIter=100, 16 | beta=1e-3, alpha0=1e-10, alpha=1e-10) 17 | # optim.fit(XTr, yTr, ffm) # single-threading fit 18 | optim.fit(XTr, yTr, ffm, maxThreads=4) 19 | echo("Train RMSE: ", ffm.score(XTr, yTr)) 20 | echo("Test RMSE: ", ffm.score(Xte, yTe)) 21 | -------------------------------------------------------------------------------- /benchmarks/ml100k/higher_order_factorization_machine.nim: -------------------------------------------------------------------------------- 1 | import nimfm/dataset, nimfm/model 2 | import nimfm/optimizer/cd 3 | 4 | 5 | when isMainModule: 6 | var XTr, XTe: CSCDataset 7 | var yTr, yTe: seq[float64] 8 | loadSVMLightFile("dataset/ml-100k_user_item_feature_train.svm", 9 | XTr, yTr, nFeatures=2703) 10 | loadSVMLightFile("dataset/ml-100k_user_item_feature_test.svm", 11 | XTe, yTe, nFeatures=2703) 12 | 13 | var fm = newFactorizationMachine(task=regression, degree=3) 14 | var optim = newCD(maxIter=100, beta=1e-3, alpha0=1e-10, alpha=1e-10) 15 | optim.fit(Xtr, yTr, fm) 16 | 17 | echo("Train RMSE: ", fm.score(Xtr, yTr)) 18 | echo("Test RMSE: ", fm.score(Xte, yTe)) 19 | -------------------------------------------------------------------------------- /benchmarks/ml100k/linear_model.nim: -------------------------------------------------------------------------------- 1 | import nimfm/dataset, nimfm/model 2 | import nimfm/optimizer/cd 3 | 4 | 5 | when isMainModule: 6 | var XTr, XTe: CSCDataset 7 | var yTr, yTe: seq[float64] 8 | loadSVMLightFile("dataset/ml-100k_user_item_feature_train.svm", 9 | XTr, yTr, nFeatures=2703) 10 | loadSVMLightFile("dataset/ml-100k_user_item_feature_test.svm", 11 | XTe, yTe, nFeatures=2703) 12 | 13 | var fm = newFactorizationMachine(task=regression, degree=1) 14 | var optim = newCD(maxIter=1000) 15 | optim.fit(Xtr, yTr, fm) 16 | 17 | echo("Train RMSE: ", fm.score(Xtr, yTr)) 18 | echo("Test RMSE: ", fm.score(Xte, yTe)) -------------------------------------------------------------------------------- /benchmarks/ml100k/make_ml100k_dataset.nim: -------------------------------------------------------------------------------- 1 | import httpclient, os, streams, tables, strutils, sequtils, parseutils 2 | import zip/zipfiles, random 3 | import nimfm/dataset, nimfm/utils 4 | 5 | const nUsers = 943 6 | const nItems = 1682 7 | const nRatings = 100_000 8 | const fileurl = "http://files.grouplens.org/datasets/movielens/ml-100k.zip" 9 | const nGenres = 19 10 | 11 | 12 | proc round(x: int): int = 13 | result = ((x+5) div 10)*10 14 | 15 | 16 | proc createUserItemDataset(indices: openarray[int]) = 17 | var X: CSRDataset 18 | var y: seq[float64] 19 | loadUserItemRatingFile("ml-100k/u.data", X, y) 20 | echo(" Number of samples : ", X.nSamples) 21 | echo(" Number of features: ", X.nFeatures) 22 | let nTrain = int(8*X.nSamples/10) 23 | var 24 | XTr, XTe: CSRDataset 25 | yTr, yTe: seq[float64] 26 | 27 | (XTr, yTr) = shuffle(X, y, indices[0..= 1.0.6", "cligen >= 0.9.43", "nimlapack >= 0.2.0" 11 | 12 | # Compile and create binary in ./bin for end users 13 | task make, "builds nimfm": 14 | exec "mkdir -p bin" 15 | exec "nim c -o:bin/nimfm -d:release -d:danger --threads:on ./src/nimfm.nim" 16 | exec "nim c -o:bin/nimfm_cfm -d:release -d:danger --threads:on ./src/nimfm_cfm.nim" 17 | exec "nim c -o:bin/nimfm_sparsefm -d:release -d:danger --threads:on ./src/nimfm_sparsefm.nim" 18 | 19 | -------------------------------------------------------------------------------- /src/nimfm/extmath.nim: -------------------------------------------------------------------------------- 1 | import strformat, math 2 | import tensor/tensor, tensor/sparse, tensor/sparse_stream, dataset 3 | 4 | type 5 | RowData = RowDataset|RowMatrix|StreamRowMatrix 6 | 7 | ColData = ColDataset|ColMatrix|StreamColMatrix 8 | 9 | 10 | proc matmul*[T: RowData](D: Matrix, S: T, R: var Matrix) = 11 | let 12 | n1 = D.shape[1] 13 | n2 = S.shape[0] 14 | if n1 != n2: 15 | let msg = fmt"D.shape[1] {n1} != shape[0] {n2}." 16 | raise newException(ValueError, msg) 17 | 18 | R[0..^1, 0..^1] = 0.0 19 | for m in 0.. 0: result = -2*y*z 39 | else: result = 0.0 40 | 41 | 42 | proc ddloss*(self: SquaredHinge, y, p: float64): float64 = 43 | let z = 1-p*y 44 | if z > 0: result = 2.0 45 | else: result = 0.0 46 | 47 | 48 | proc mu*(self: SquaredHinge): float64 = 2.0 49 | 50 | 51 | proc newLogistic*(): Logistic = new(Logistic) 52 | 53 | 54 | proc loss*(self: Logistic, y, p: float64): float64 = 55 | let z = p * y 56 | if z > 0: 57 | result = ln(1+exp(-z)) 58 | else: 59 | result = ln(exp(z)+1) - z 60 | 61 | 62 | proc dloss*(self: Logistic, y, p: float64): float64 = 63 | let z = p * y 64 | if z > 0: 65 | result = -y * exp(-z) / (1+exp(-z)) 66 | else: 67 | result = -y / (exp(z)+1) 68 | 69 | 70 | proc ddloss*(self: Logistic, y, p: float64): float64 = 71 | let z = p*y 72 | if z > 0: 73 | result = exp(-z) / ((1+exp(-z))^2) 74 | else: 75 | result = exp(z) / ((1+exp(z))^2) 76 | 77 | 78 | proc mu*(self: Logistic): float64 = 0.25 79 | 80 | 81 | proc newHuber*(threshold=1.0): Huber = Huber(threshold: threshold) 82 | 83 | 84 | proc loss*(self: Huber, y, p: float64): float64 = 85 | let z = abs(y - p) 86 | if z < self.threshold: result = 0.5 * z^2 87 | else: result = self.threshold * (z - 0.5*self.threshold) 88 | 89 | 90 | proc dloss*(self: Huber, y, p: float64): float64 = 91 | let z = abs(y-p) 92 | if z < self.threshold: result = y - p 93 | else: result = self.threshold 94 | 95 | 96 | proc ddloss*(self: Huber, y, p: float64): float64 = 97 | let z = abs(y-p) 98 | if z < self.threshold: result = 1.0 99 | else: result = 0.0 100 | 101 | 102 | proc mu*(self: Huber): float64 = 1.0 103 | -------------------------------------------------------------------------------- /src/nimfm/metrics.nim: -------------------------------------------------------------------------------- 1 | import math, sequtils, algorithm, sugar 2 | import utils 3 | 4 | 5 | proc rmse*(yTrue, yScore: seq[float64]): float64 = 6 | ## Returns root mean squared error. 7 | if len(yTrue) != len(yScore): 8 | let msg = "len(yScore)=" & $len(yScore) & ", but len(yTrue)=" & $len(yTrue) 9 | raise newException(ValueError, msg) 10 | result = 0.0 11 | for (val1, val2) in zip(yScore, yTrue): 12 | result += pow(val1-val2, 2) 13 | result = sqrt(result / float(len(yTrue))) 14 | 15 | 16 | proc r2*(yTrue, yScore: seq[float64]): float64 = 17 | ## Returns r2 score (the coefficient of determination). 18 | if len(yTrue) != len(yScore): 19 | let msg = "len(yScore)=" & $len(yScore) & ", but len(yTrue)=" & $len(yTrue) 20 | raise newException(ValueError, msg) 21 | 22 | let nSamples = yTrue.len 23 | var res = 0.0 24 | for (target, score) in zip(yTrue, yScore): 25 | res += (target-score)^2 26 | 27 | if res == 0: 28 | result = 1.0 29 | else: 30 | let mean = sum(yTrue) / float(nSamples) 31 | let tot = sum(yTrue.map(x=>(x-mean)^2)) 32 | if tot != 0.0: 33 | result = 1.0 - res / tot 34 | else: 35 | echo("All instances have same target value.") 36 | result = 0.0 37 | 38 | 39 | proc accuracy*(yTrue, yPred: seq[int]): float64 = 40 | ## Returns accuracy. 41 | if len(yPred) != len(yTrue): 42 | let msg = "len(yPred)=" & $len(yPred) & ", but len(yTrue)=" & $len(yTrue) 43 | raise newException(ValueError, msg) 44 | result = 0.0 45 | for (val1, val2) in zip(yPred, yTrue): 46 | result += float(val1 == val2) 47 | result /= float(len(yPred)) 48 | 49 | 50 | proc precisionRecallFscore*(yTrue, yPred: seq[int], pos=1): 51 | tuple[prec, recall, fscore: float64] = 52 | ## Returns precision, recall, and F1-score for "binary classification". 53 | var 54 | tp, fp, tn, fn: float64 55 | if len(yPred) != len(yTrue): 56 | let msg = "len(yPred)=" & $len(yPred) & ", but len(yTrue)=" & $len(yTrue) 57 | raise newException(ValueError, msg) 58 | 59 | let nUnique = len(deduplicate(yTrue)) 60 | if nUnique > 2: 61 | echo("yTrue has " & $nUnique & " unique values. " & 62 | "All values that are not " & $pos & " are regarded as.") 63 | for (target, pred) in zip(yTrue, yPred): 64 | if target == pos: 65 | if pred == pos: tp += 1.0 66 | else: fn += 1.0 67 | else: 68 | if pred == pos: fp += 1.0 69 | else: tn += 1.0 70 | let prec = if (tp+fp) != 0: tp / (tp+fp) else: 0.0 71 | let recall = if (tp+fn) != 0: tp / (tp+fn) else: 0.0 72 | let fscore = if (prec+recall) != 0: 2*prec*recall/(prec+recall) else: 0 73 | result = (prec, recall, fscore) 74 | 75 | 76 | proc rocauc*(yTrue: seq[int], yScore: seq[float64], pos:int = 1): float64 = 77 | ## Returns the area under the receiver operating characteristic curve 78 | ## for "binary classification". 79 | let indicesSorted = argsort(yScore, SortOrder.Descending) 80 | result = 0.0 81 | if len(yTrue) != len(yScore): 82 | let msg = "len(yScore)=" & $len(yScore) & ", but len(yTrue)=" & $len(yTrue) 83 | raise newException(ValueError, msg) 84 | var 85 | fp, tp, fpPrev, tpPrev: int 86 | scorePrev: float64 = NegInf 87 | np, nn: int 88 | for i in indicesSorted: 89 | if yScore[i] != scorePrev: 90 | result += float((fp - fpPrev) * (tp + tpPrev)) / 2.0 91 | scorePrev = yScore[i] 92 | fpPrev = fp 93 | tpPrev = tp 94 | 95 | if yTrue[i] == pos: 96 | np += 1 97 | tp += 1 98 | else: 99 | nn += 1 100 | fp += 1 101 | 102 | result += float((fp - fpPrev) * (tp + tpPrev)) / 2.0 103 | result /= float(nn*np) 104 | -------------------------------------------------------------------------------- /src/nimfm/model.nim: -------------------------------------------------------------------------------- 1 | import ./model/models 2 | export models -------------------------------------------------------------------------------- /src/nimfm/model/convex_factorization_machine.nim: -------------------------------------------------------------------------------- 1 | import ../tensor/tensor, ../kernels, fm_base 2 | import strutils, parseutils, sequtils, algorithm, typetraits 3 | 4 | 5 | type 6 | ConvexFactorizationMachineObj* = object 7 | task*: TaskKind ## regression or classification. 8 | degree*: int ## Degree of the polynomial, 2. 9 | maxComponents*: int ## Maximum number of basis vectors. 10 | fitIntercept*: bool ## Whether to fit intercept (a.k.a bias) term. 11 | fitLinear*: bool ## Whether to fit linear term. 12 | ignoreDiag*: bool ## Whether ignored diag (FM) or not (PN). 13 | warmStart*: bool ## Whether to do warwm start fitting. 14 | isInitialized*: bool 15 | P*: Matrix ## Weights for the polynomial. 16 | ## shape (nComponents, nFeatures) 17 | lams*: Vector ## Weight for vectors in basis. 18 | ## shape: (nComponents) 19 | w*: Vector ## Weigths for linear term, shape: (nFeatures) 20 | intercept*: float64 ## Intercept term. 21 | 22 | ConvexFactorizationMachine* = ref ConvexFactorizationMachineObj 23 | 24 | 25 | proc newConvexFactorizationMachine*( 26 | task: TaskKind, maxComponents = 30, fitIntercept = true, fitLinear = true, 27 | ignoreDiag=true, warmStart = false): ConvexFactorizationMachine = 28 | ## Create a new ConvexFactorizationMachine. 29 | ## task: classification or regression. 30 | ## maxComponents: Maximum number of basis vectors. 31 | ## fitIntercept: Whether to fit intercept (a.k.a bias) term or not. 32 | ## fitLinear: Whether to fit linear term or not. 33 | ## warmStart: Whether to do warwm start fitting or not. 34 | new(result) 35 | result.task = task 36 | result.degree = 2 37 | if maxComponents < 1: 38 | raise newException(ValueError, "maxComponents < 1.") 39 | result.maxComponents = maxComponents 40 | result.fitIntercept = fitIntercept 41 | result.fitLinear = fitLinear 42 | result.ignoreDiag = ignoreDiag 43 | result.warmStart = warmStart 44 | result.degree = 2 45 | result.isInitialized = false 46 | result.lams = zeros([0]) 47 | 48 | 49 | proc init*[Dataset](self: ConvexFactorizationMachine, X: Dataset, 50 | force=false) = 51 | ## Initializes the factorization machine. 52 | ## self will not be initialized if force=false, self is already initialized, 53 | ## and warmStart=true. 54 | if force or not (self.warmStart and self.isInitialized): 55 | let nFeatures: int = X.nFeatures 56 | self.w = zeros([nFeatures]) 57 | self.P = zeros([0, nFeatures]) 58 | self.lams = zeros([0]) 59 | self.intercept = 0.0 60 | self.isInitialized = true 61 | 62 | 63 | proc decisionFunction*[Dataset](self: ConvexFactorizationMachine, 64 | X: Dataset): seq[float64] = 65 | ## Returns the model outputs as seq[float64]. 66 | self.checkInitialized() 67 | let nSamples: int = X.nSamples 68 | let nFeatures = X.nFeatures 69 | var A = zeros([nSamples, 3]) 70 | result = newSeqWith(nSamples, 0.0) 71 | 72 | linear(X, self.w, result) 73 | for i in 0..sgn(x)) 21 | 22 | 23 | proc predictProba*[Dataset, FM](self: FM, X: Dataset): seq[float64] = 24 | ## Returns probabilities that each instance belongs to positive class. 25 | ## It shoud be used only when task=classification. 26 | result = self.decisionFunction(X).map(expit) 27 | 28 | 29 | proc checkTarget*[FM](self: FM, y: seq[SomeNumber]): seq[float64] = 30 | ## Transforms targets vector to float for regression or 31 | ## to sign for classification. 32 | case self.task 33 | of classification: 34 | result = y.map(x => float(sgn(x))) 35 | of regression: 36 | result = y.map(x => float(x)) 37 | 38 | 39 | proc score*[FM, Dataset](self: FM, X: Dataset, y: seq[float64]): float64 = 40 | ## Returns the score between the model outputs and true targets. 41 | ## Computes root mean squared error when task=regression (lower is better). 42 | ## Computes accuracy when task=classification (higher is better). 43 | let yPred = self.decisionFunction(X) 44 | case self.task 45 | of regression: 46 | result = rmse(y, yPred) 47 | of classification: 48 | result = accuracy(y.map(x=>sgn(x)), yPred.map(x=>sgn(x))) -------------------------------------------------------------------------------- /src/nimfm/model/models.nim: -------------------------------------------------------------------------------- 1 | import 2 | factorization_machine, convex_factorization_machine, fm_base, 3 | field_aware_factorization_machine 4 | export 5 | factorization_machine, convex_factorization_machine, fm_base, 6 | field_aware_factorization_machine 7 | -------------------------------------------------------------------------------- /src/nimfm/model/params.nim: -------------------------------------------------------------------------------- 1 | import ../tensor/tensor 2 | 3 | 4 | type 5 | Params* = ref object 6 | P*: Tensor 7 | w*: Vector 8 | intercept*: float64 9 | fitLinear*: bool 10 | fitIntercept*: bool 11 | 12 | 13 | proc newParams*(shape_P: array[3, int], len_w: int, 14 | fitLinear, fitIntercept: bool): Params = 15 | new(result) 16 | result.fitLinear = fitLinear 17 | result.fitIntercept = fitIntercept 18 | result.P = zeros(shape_P) 19 | result.w = zeros([len_w]) 20 | result.intercept = 0.0 21 | 22 | 23 | proc newParams*(P: var Tensor, w: var Vector, intercept: float64, 24 | fitLinear, fitIntercept: bool): Params = 25 | new(result) 26 | result.fitLinear = fitLinear 27 | result.fitIntercept = fitIntercept 28 | result.P = P 29 | result.w = w 30 | result.intercept =intercept 31 | 32 | 33 | proc add*(self: Params, grad: Params, eta_intercept, eta_w, eta_P: float64) = 34 | if self.P.shape != grad.P.shape: 35 | raise newException(ValueError, "self.P.shape != grad.P.shape.") 36 | for i in 0..void = nil) = 14 | ## Fits the factorization machine on X and y by stochastic gradient descent. 15 | ffm.init(X) 16 | 17 | let y = ffm.checkTarget(y) 18 | let 19 | nSamples = X.nSamples 20 | fitLinear = ffm.fitLinear 21 | fitIntercept = ffm.fitIntercept 22 | var 23 | indices = toSeq(0.. 0 and self.it mod self.nCalls == 0: 45 | if not callback.isNil: 46 | finalize(self, ffm.P, ffm.w, ffm.intercept, fitLinear, fitIntercept) 47 | callback(self, ffm) 48 | inc(self.it) 49 | 50 | # one epoch done 51 | runningLoss /= float(nSamples) 52 | if not callback.isNil: 53 | finalize(self, ffm.P, ffm.w, ffm.intercept, fitLinear, fitIntercept) 54 | callback(self, ffm) 55 | 56 | let isContinue = stoppingCriterion( 57 | ffm.P, ffm.w, ffm.intercept, self.alpha0, self.alpha, self.beta, 58 | runningLoss, viol, self.tol, self.verbose, epoch, self.maxIter, 59 | isConverged) 60 | if not isContinue: break 61 | 62 | if not isConverged and self.verbose > 0: 63 | echo("Objective did not converge. Increase maxIter.") 64 | 65 | # finalize 66 | finalize(self, ffm.P, ffm.w, ffm.intercept, fitLinear, fitIntercept) -------------------------------------------------------------------------------- /src/nimfm/optimizer/adagrad_ffm_multi.nim: -------------------------------------------------------------------------------- 1 | import ../dataset, ../tensor/tensor, ../loss 2 | import ../model/field_aware_factorization_machine, ../model/params 3 | from ../model/fm_base import checkTarget 4 | from sgd import stoppingCriterion 5 | from adagrad import AdaGrad, newAdaGrad, init, finalize, updateG, update 6 | export adagrad.AdaGrad, adagrad.newAdaGrad 7 | from sgd_ffm import predictWithGrad 8 | from sgd_multi import nThreads 9 | import sequtils, math, random, sugar, threadpool 10 | 11 | 12 | var 13 | dA {.threadvar.}: Tensor # zeros(P.shape) 14 | 15 | 16 | proc epochSub[L](self: ptr AdaGrad[L], X: ptr RowDataset, P: ptr Tensor, 17 | w: ptr Vector, intercept: ptr float64, y: ptr Vector, 18 | nAugments: int, fitLinear, fitIntercept: bool, 19 | indices: ptr seq[int], s, t: int): (float64, float64) = 20 | if dA.isNil or dA.shape != P[].shape: 21 | dA = zeros(P[].shape) 22 | 23 | for ii in s..void = nil) = 39 | ## Fits the factorization machine on X and y by stochastic gradient descent. 40 | ffm.init(X) 41 | 42 | let y = ffm.checkTarget(y) 43 | let 44 | nSamples = X.nSamples 45 | fitLinear = ffm.fitLinear 46 | fitIntercept = ffm.fitIntercept 47 | nThreads = nThreads(maxThreads) 48 | var 49 | indices = toSeq(0.. 0: 69 | X.readCache(nSamples-nRest) 70 | borders[0] = nSamples - nRest 71 | for th in 0.. 0: 101 | echo("Objective did not converge. Increase maxIter.") 102 | 103 | # finalize 104 | finalize(self, ffm.P, ffm.w, ffm.intercept, fitLinear, fitIntercept) -------------------------------------------------------------------------------- /src/nimfm/optimizer/adagrad_multi.nim: -------------------------------------------------------------------------------- 1 | import ../dataset, ../tensor/tensor, ../model/factorization_machine 2 | import ../model/params, ../loss 3 | from ../model/fm_base import checkTarget 4 | from sgd import predictWithGrad, stoppingCriterion, transpose 5 | from sgd_multi import nThreads 6 | from adagrad import AdaGrad, newAdaGrad, updateG, update, finalize, init 7 | export adagrad.AdaGrad, adagrad.newAdaGrad 8 | import sequtils, math, random, sugar, threadpool 9 | 10 | var 11 | A {.threadvar.}: Matrix # zeros([nComponents, degree+1]) 12 | dA {.threadvar.}: Tensor # zeros(P.shape) 13 | 14 | 15 | proc epochSub[L](self: ptr AdaGrad[L], X: ptr RowDataset, P: ptr Tensor, 16 | w: ptr Vector, intercept: ptr float64, 17 | y: ptr Vector, nComponents, degree, nAugments: int, 18 | fitLinear, fitIntercept: bool, 19 | indices: ptr seq[int], s, t: int): (float64, float64) = 20 | if A.isNil or A.shape != [nComponents, degree+1]: 21 | A = zeros([nComponents, degree+1]) 22 | if dA.isNil or dA.shape != P[].shape: 23 | dA = zeros(P[].shape) 24 | 25 | for ii in s..void = nil) = 42 | ## Fits the factorization machine on X and y by stochastic gradient descent. 43 | fm.init(X) 44 | 45 | let y = fm.checkTarget(y) 46 | let 47 | nSamples = X.nSamples 48 | nComponents = fm.P.shape[1] 49 | nOrders = fm.P.shape[0] 50 | degree = fm.degree 51 | nAugments = fm.nAugments 52 | fitLinear = fm.fitLinear 53 | fitIntercept = fm.fitIntercept 54 | nThreads = nThreads(maxThreads) 55 | var 56 | indices = toSeq(0.. 0: 79 | X.readCache(nSamples-nRest) 80 | borders[0] = nSamples - nRest 81 | for th in 0.. 0: 111 | echo("Objective did not converge. Increase maxIter.") 112 | 113 | # finalize 114 | finalize(self, P, fm.w, fm.intercept, fitLinear, fitIntercept) 115 | transpose(fm.P, P) -------------------------------------------------------------------------------- /src/nimfm/optimizer/fista.nim: -------------------------------------------------------------------------------- 1 | import ../dataset, ../tensor/tensor, ../loss 2 | import ../model/factorization_machine, ../model/fm_base, ../model/params 3 | import optimizer_base, utils 4 | from ../regularizer/regularizers import newSquaredL12 5 | from pgd import predictAll, predictAllWithGrad, linesearch, finalize 6 | import math, sugar 7 | 8 | 9 | type 10 | FISTA*[L, R] = ref object of BaseCSROptimizer 11 | gamma*: float64 12 | loss*: L 13 | reg*: R 14 | rho: float64 15 | sigma: float64 16 | maxSearch: int 17 | t: float64 18 | 19 | 20 | proc newFISTA*[L, R](maxIter=100, alpha0=1e-6, alpha=1e-3, beta=1e-4, 21 | gamma=1e-4, loss: L=newSquared(), reg: R=newSquaredL12(), 22 | rho=0.5, sigma=1.0, maxSearch = -1, verbose = 1, 23 | tol = 1e-6): FISTA[L, R] = 24 | ## Creates new FISTA. 25 | ## maxIter: Maximum number of iteration. At each iteration, 26 | ## all parameters are updated once by using all samples. 27 | ## alpha0: Regularization-strength for intercept. 28 | ## alpha: Regularization-strength for linear term. 29 | ## beta: Regularization-strength for higher-order weights. 30 | ## gamma: Sparsity-inducing-regularization-strength for higher-order weights. 31 | ## loss: Loss function. It must have mu: float64 field and 32 | ## loss/dloss proc: (float64, float64)->float64. 33 | ## reg: Sparsity-inducing regularization. 34 | ## rho: Paraneter for line search. (0, 1) 35 | ## sigma: Parameter for line search. (0, 1] 36 | ## maxSearch: Maximum number of iterations in line search. If <= 0, 37 | ## line search runs until the stopping condition is satisfied. 38 | ## verbose: Whether to print information on optimization processes. 39 | ## tol: Tolerance hyperparameter for stopping criterion. 40 | result = FISTA[L, R]( 41 | maxIter: maxIter, alpha0: alpha0, alpha: alpha, beta: beta, gamma: gamma, 42 | loss: loss, reg: reg, rho: rho, sigma: sigma, maxSearch: maxSearch, 43 | tol: tol, verbose: verbose, t: 0) 44 | 45 | 46 | proc extrapolate*(z_params, params, old_params: Params, coef: float64) = 47 | z_params <- params 48 | z_params.add(params, coef) 49 | z_params.add(old_params, -coef) 50 | 51 | 52 | proc fit*[L, R](self: FISTA[L, R], X: RowDataset, y: seq[float64], 53 | sfm: FactorizationMachine, 54 | callback: (FISTA[L, R], FactorizationMachine)->void = nil) = 55 | ## Fits the sparse factorization machine on X and y by accelerated pgd. 56 | sfm.init(X) 57 | let y = sfm.checkTarget(y) 58 | let 59 | nSamples = X.nSamples 60 | nFeatures = X.nFeatures 61 | nComponents = sfm.P.shape[1] 62 | nOrders = sfm.P.shape[0] 63 | degree = sfm.degree 64 | fitLinear = sfm.fitLinear 65 | fitIntercept = sfm.fitIntercept 66 | nAugments = sfm.nAugments 67 | var 68 | yPred: Vector = zeros([nSamples]) 69 | dL: Vector = zeros([nSamples]) 70 | P: Tensor = zeros([nOrders, sfm.P.shape[2], nComponents]) 71 | params: Params 72 | old_params = newParams(P.shape, sfm.w.len, fitLinear, fitIntercept) 73 | z_params = newParams(P.shape, sfm.w.len, fitLinear, fitIntercept) 74 | grads = newParams(P.shape, sfm.w.len, fitLinear, fitIntercept) 75 | A: Matrix = zeros([nComponents, degree+1]) 76 | dA: Tensor = zeros(P.shape) 77 | isConverged = false 78 | 79 | # copy for fast training 80 | for order in 0.. 0: # echo header 94 | echoHeader(self.maxIter, viol=true) 95 | 96 | # perform optimization 97 | var lossVal = Inf 98 | var regVal = Inf 99 | for it in 0.. 0 or self.verbose > 0: 127 | viol = computeViol(params, old_params) 128 | 129 | if self.verbose > 0: 130 | echoInfo(it+1, self.maxIter, viol, lossVal, regVal) 131 | 132 | if viol < self.tol: 133 | if self.verbose > 0: echo("Converged at epoch ", it+1, ".") 134 | isConverged = true 135 | break 136 | 137 | if not isConverged and self.verbose > 0: 138 | echo("Objective did not converge. Increase maxIter.") 139 | 140 | # finalize 141 | finalize(sfm, params) -------------------------------------------------------------------------------- /src/nimfm/optimizer/fit_linear.nim: -------------------------------------------------------------------------------- 1 | import ../dataset, ../tensor/tensor, ../loss 2 | import math 3 | 4 | 5 | proc fitLinearCD*[L](w: var Vector, X: ColDataset, y: seq[float64], 6 | yPred: var Vector, colNormSq: Vector, 7 | alpha: float64, loss: L): float64 = 8 | result = 0.0 9 | let nFeatures = X.nFeatures 10 | var 11 | update = 0.0 12 | invStepSize = 0.0 13 | 14 | for j in 0..void = nil) = 52 | ## Fits the factorization machine on X and y by stochastic gradient descent. 53 | ffm.init(X) 54 | 55 | let y = ffm.checkTarget(y) 56 | let 57 | nSamples = X.nSamples 58 | fitLinear = ffm.fitLinear 59 | fitIntercept = ffm.fitIntercept 60 | var 61 | scaling_w = 1.0 62 | scaling_P = 1.0 63 | scalings_w = ones([len(ffm.w)]) 64 | scalings_P = ones([ffm.P.shape[1]]) 65 | indices = toSeq(0.. 0 and self.it mod self.nCalls == 0: 83 | if not callback.isNil: 84 | finalize(ffm.P, ffm.w, scaling_P, scaling_w, scalings_P, scalings_w, 85 | fitLinear) 86 | callback(self, ffm) 87 | inc(self.it) 88 | 89 | # one epoch done 90 | runningLoss /= float(nSamples) 91 | if not callback.isNil and self.nCalls <= 0: 92 | finalize(ffm.P, ffm.w, scaling_P, scaling_w, scalings_P, scalings_w, 93 | fitLinear) 94 | callback(self, ffm) 95 | let isContinue = stoppingCriterion( 96 | ffm.P, ffm.w, ffm.intercept, self.alpha0, self.alpha, self.beta, 97 | runningLoss, viol, self.tol, self.verbose, epoch, self.maxIter, 98 | isConverged) 99 | if not isContinue: break 100 | 101 | if not isConverged and self.verbose > 0: 102 | echo("Objective did not converge. Increase maxIter.") 103 | 104 | # finalize 105 | finalize(ffm.P, ffm.w, scaling_P, scaling_w, scalings_P, scalings_w, 106 | fitLinear) -------------------------------------------------------------------------------- /src/nimfm/optimizer/sgd_ffm_multi.nim: -------------------------------------------------------------------------------- 1 | import ../dataset, ../tensor/tensor 2 | import ../model/field_aware_factorization_machine 3 | from ../model/fm_base import checkTarget 4 | import sequtils, math, random, sugar, threadpool 5 | from sgd import 6 | SGD, newSGD, finalize, stoppingCriterion, init, SchedulingKind 7 | export sgd.SGD, sgd.newSGD, sgd.SchedulingKind 8 | from sgd_multi import nThreads 9 | from sgd_ffm import step 10 | 11 | var 12 | dA {.threadvar.}: Tensor # zeros(P.shape) 13 | 14 | 15 | proc epochSub[L](self: ptr SGD[L], X: ptr RowDataset, P: ptr Tensor, 16 | w: ptr Vector, intercept: ptr float64, 17 | y: ptr Vector, scaling_P, scaling_w: ptr float64, 18 | scalings_P, scalings_w: ptr Vector, 19 | nAugments: int, fitLinear, fitIntercept: bool, 20 | indices: ptr seq[int], s, t: int): (float64, float64) = 21 | if dA.isNil or dA.shape != P[].shape: 22 | dA = zeros(P[].shape) 23 | for ii in s..void = nil) = 34 | ## Fits the factorization machine on X and y by stochastic gradient descent. 35 | ffm.init(X) 36 | 37 | let y = ffm.checkTarget(y) 38 | let 39 | nSamples = X.nSamples 40 | fitLinear = ffm.fitLinear 41 | fitIntercept = ffm.fitIntercept 42 | nThreads = nThreads(maxThreads) 43 | var 44 | scaling_w = 1.0 45 | scaling_P = 1.0 46 | scalings_w = ones([len(ffm.w)]) 47 | scalings_P = ones([ffm.P.shape[1]]) 48 | indices = toSeq(0.. 0: 67 | X.readCache(nSamples-nRest) 68 | borders[0] = nSamples - nRest 69 | for th in 0.. 0: 101 | echo("Objective did not converge. Increase maxIter.") 102 | # finalize 103 | finalize(ffm.P, ffm.w, scaling_P, scaling_w, scalings_P, scalings_w, fitLinear) -------------------------------------------------------------------------------- /src/nimfm/optimizer/sgd_multi.nim: -------------------------------------------------------------------------------- 1 | import ../dataset, ../tensor/tensor, ../model/factorization_machine 2 | from ../model/fm_base import checkTarget 3 | import sequtils, math, random, sugar, threadpool, cpuinfo 4 | from sgd import 5 | SGD, init, stoppingCriterion, transpose, finalize, step, SchedulingKind 6 | export sgd.SGD, sgd.newSGD, sgd.SchedulingKind 7 | 8 | var 9 | A {.threadvar.}: Matrix # zeros([nComponents, degree+1]) 10 | dA {.threadvar.}: Tensor # zeros(P.shape) 11 | 12 | 13 | proc nThreads*(maxThreads: int):int = 14 | if maxThreads < 0: 15 | result = countProcessors() * 2 16 | else: 17 | result = maxThreads 18 | result = min(result, MaxThreadPoolSize) 19 | 20 | 21 | proc epochSub[L](self: ptr SGD[L], X: ptr RowDataset, P: ptr Tensor, 22 | w: ptr Vector, intercept: ptr float64, 23 | y: ptr Vector, scaling_P, scaling_w: ptr float64, 24 | scalings_P, scalings_w: ptr Vector, 25 | nComponents, degree, nAugments: int, 26 | fitLinear, fitIntercept: bool, 27 | indices: ptr seq[int], s, t: int): (float64, float64) = 28 | if A.isNil or A.shape != [nComponents, degree+1]: 29 | A = zeros([nComponents, degree+1]) 30 | if dA.isNil or dA.shape != P[].shape: 31 | dA = zeros(P[].shape) 32 | for ii in s..void = nil) = 43 | ## Fits the factorization machine on X and y by stochastic gradient descent. 44 | fm.init(X) 45 | 46 | let y = fm.checkTarget(y) 47 | let 48 | nSamples = X.nSamples 49 | nComponents = fm.P.shape[1] 50 | nOrders = fm.P.shape[0] 51 | degree = fm.degree 52 | nAugments = fm.nAugments 53 | fitLinear = fm.fitLinear 54 | fitIntercept = fm.fitIntercept 55 | nThreads = nThreads(maxThreads) 56 | var 57 | scaling_w = 1.0 58 | scaling_P = 1.0 59 | scalings_w = ones([len(fm.w)]) 60 | scalings_P = ones([fm.P.shape[2]]) 61 | indices = toSeq(0.. 0: 84 | X.readCache(nSamples-nRest) 85 | borders[0] = nSamples - nRest 86 | for th in 0.. 0: 117 | echo("Objective did not converge. Increase maxIter.") 118 | # finalize 119 | finalize(P, fm.w, scaling_P, scaling_w, scalings_P, scalings_w, fitLinear) 120 | transpose(fm.P, P) -------------------------------------------------------------------------------- /src/nimfm/optimizer/utils.nim: -------------------------------------------------------------------------------- 1 | import ../tensor/tensor, ../model/params, ../loss 2 | import math, strformat, strutils 3 | 4 | 5 | proc computeViol*(P, old_P: Tensor, w, old_w: Vector, 6 | intercept, intercept_old: float64, 7 | fitLinear, fitIntercept: bool): float64 {.inline.} = 8 | result = 0.0 9 | for order in 0..= 0: 47 | stdout.write(fmt" {viol:<10.4e}") 48 | if loss >= 0: 49 | stdout.write(fmt" {loss:<10.4e}") 50 | if regul >= 0: 51 | stdout.write(fmt" {regul:<10.4e}") 52 | stdout.write("\n") 53 | stdout.flushFile() 54 | 55 | 56 | proc regularization*[T](P: T, w: Vector, intercept: float64, 57 | alpha0, alpha, beta: float64): float64 = 58 | result = 0.5 * alpha0 * intercept^2 + 0.5 * alpha * norm(w, 2)^2 59 | result += 0.5 * beta * norm(P, 2)^2 60 | 61 | 62 | proc regularization*(params: Params, alpha0, alpha, beta: float64): float64 = 63 | result = regularization(params.P, params.w, params.intercept, 64 | alpha0, alpha, beta) 65 | 66 | 67 | proc objective*[L, T](y: seq[float64], yPred: Vector, P: T, w: Vector, 68 | intercept: float64, alpha0, alpha, beta: float64, 69 | loss: L): (float64, float64) = 70 | result[0] = 0.0 71 | let nSamples = len(y) 72 | for i in 0.. lam: pj *= (1.0 - lam / norm) 28 | else: 29 | pj[0.. ^1] = 0.0 30 | 31 | 32 | # for psgd/pgd/minibatch-psgd 33 | proc prox*(self: L21, P: var Matrix, gamma: float64, degree: int) {.inline.} = 34 | for j in 0.. lam*self.dcache[degree]: 83 | pj *= 1.0 - lam*self.dcache[degree] / norm 84 | else: 85 | pj[0..^1] = 0.0 86 | -------------------------------------------------------------------------------- /src/nimfm/regularizer/omegati.nim: -------------------------------------------------------------------------------- 1 | import ../tensor/tensor, utils, math 2 | 3 | 4 | type 5 | OmegaTI* = ref object 6 | dcache: Vector 7 | cache: Vector 8 | absp: Vector 9 | p: Vector 10 | value*: float64 11 | 12 | 13 | proc newOmegaTI*(): OmegaTI = 14 | new(result) 15 | 16 | 17 | proc eval*(self: OmegaTI, P: Matrix, degree: int): float64 = 18 | let nFeatures = P.shape[0] 19 | let nComponents = P.shape[1] 20 | var cache = zeros([degree+1, nComponents]) 21 | cache[0, 0..^1] = 1.0 22 | for j in 0.. lamScaled: 41 | pj *= 1.0 - lamScaled / norm 42 | else: 43 | pj[.. ^1] = 0.0 44 | 45 | 46 | # for pgd/psgd/minibatch-psgd 47 | # P.shape: [nFeatures, nComponents] 48 | proc prox*(self: SquaredL21, P: var Matrix, 49 | lam: float64, degree: int) {.inline.} = 50 | if not self.transpose: 51 | norm(P, self.norms, 2, 1) 52 | for i in 0..= pivot: 25 | cumsum_cache += v[j] 26 | candidates[nG] = j 27 | nG += 1 28 | else: 29 | candidates[n+nL] = j 30 | nL += 1 31 | # discard greaters from candidates 32 | if ((cumsum + cumsumCache) - float(rho+nG)*pivot) < z: 33 | nCandidates = nL 34 | offset = n 35 | cumsum += cumsum_cache + pivot 36 | candidates[nG] = pivot_idx 37 | nG += 1 38 | rho += nG 39 | else: # discard lessers from candidates 40 | nCandidates = nG 41 | offset = 0 42 | 43 | theta = (cumsum - z) / float(rho) 44 | for i in 0.. a[result]: result = i 31 | 32 | 33 | proc expit*(x: float64): float64 = exp(min(0.0, x)) / (1.0 + exp(-abs(x))) 34 | 35 | 36 | proc expit*(a: openarray[float64]): seq[float64] = a.map(expit) 37 | 38 | 39 | proc newLabelEncoder*[T](): LabelEncoder[T] = 40 | result = new(LabelEncoder[T]) 41 | result.table = newTable[T, int]() 42 | result.invTable = newTable[int, T]() 43 | result.classes = newSeq[T]() 44 | 45 | 46 | proc fit*[T](le: LabelEncoder[T], y: openArray[T]) = 47 | # initialization 48 | var nClasses: int = 0 49 | clear(le.table) 50 | clear(le.invTable) 51 | le.classes.setLen(0) 52 | 53 | for val in y: 54 | if not le.table.hasKey(val): 55 | le.table[val] = nClasses 56 | le.classes.add(val) 57 | inc(nClasses) 58 | le.classes.sort() 59 | # sorted transformation 60 | for i, val in le.classes: 61 | le.table[val] = i 62 | le.invTable[i] = val 63 | 64 | 65 | proc transform*[T](le: LabelEncoder[T], y: openArray[T], yEnc: var seq[int]) = 66 | yEnc = newSeq[int](len(y)) 67 | for i, val in y: 68 | if not le.table.haskey(val): 69 | raise newException(KeyError, "Key " & $val & " is unknown.") 70 | yEnc[i] = le.table[val] 71 | 72 | 73 | proc transformed*[T](le: LabelEncoder[T], y: openArray[T]): seq[int] = 74 | transform(le, y, result) 75 | 76 | 77 | proc inverseTransform*[T](le: LabelEncoder[T], y: openArray[int], 78 | yEnc: var seq[T]) = 79 | yEnc.setLen(len(y)) 80 | for i, val in y: 81 | if not le.invTable.haskey(val): 82 | raise newException(KeyError, "Label " & $val & " is unknown.") 83 | yEnc[i] = le.invTable[val] 84 | 85 | 86 | proc inverseTransformed*[T](le: LabelEncoder[T], y: openArray[int]): seq[T] = 87 | inverseTransform(le, y, result) 88 | -------------------------------------------------------------------------------- /tests/comb.nim: -------------------------------------------------------------------------------- 1 | proc comb*(n, m: int, k=0): seq[seq[int]] = 2 | result = @[] 3 | if m == 1: 4 | for i in k..sgn(x)) 70 | 71 | 72 | proc checkTarget*(self: CFMSlow, y: seq[SomeNumber]): seq[float64] = 73 | case self.task 74 | of classification: 75 | result = y.map(x => float(sgn(x))) 76 | of regression: 77 | result = y.map(x => float(x)) 78 | 79 | 80 | proc score*(self: CFMSlow, X: Matrix, y: seq[float64]): float64 = 81 | let yPred = self.decisionFunction(X) 82 | case self.task 83 | of regression: 84 | result = rmse(y, yPred) 85 | of classification: 86 | result = accuracy(y.map(x=>sgn(x)), yPred.map(x=>sgn(x))) -------------------------------------------------------------------------------- /tests/model/ffm_slow.nim: -------------------------------------------------------------------------------- 1 | import nimfm/tensor/tensor, nimfm/metrics 2 | import nimfm/model/fm_base 3 | from nimfm/model/field_aware_factorization_machine import 4 | FieldAwareFactorizationMachineObj 5 | import sugar, random, sequtils, math 6 | 7 | 8 | type 9 | FFMSlow* = ref FieldAwareFactorizationMachineObj 10 | 11 | NotFittedError = object of Exception 12 | 13 | 14 | proc checkInitialized*(self: FFMSlow) = 15 | if not self.isInitialized: 16 | raise newException(NotFittedError, "Factorization machines is not fitted.") 17 | 18 | 19 | proc nAugments*(self: FFMSlow): int = 0 20 | 21 | 22 | proc newFFMSlow*(task: TaskKind, n_components = 10, fitIntercept = true, 23 | fitLinear = true, warmStart = false, randomState = 1, 24 | scale = 0.01): FFMSlow = 25 | new(result) 26 | result.task = task 27 | if n_components < 1: 28 | raise newException(ValueError, "nComponents < 1.") 29 | result.n_components = n_components 30 | result.fitIntercept = fitIntercept 31 | result.fitLinear = fitLinear 32 | result.warmStart = warmStart 33 | result.randomState = randomState 34 | result.scale = scale 35 | result.isInitialized = false 36 | 37 | 38 | proc decisionFunction*(self: FFMSlow, X: Matrix, fields: seq[int], 39 | i: int): float64 = 40 | let nFeatures = X.shape[1] 41 | let nFields = max(fields) + 1 42 | let nAugments = self.nAugments 43 | 44 | result = self.intercept 45 | for j in 0..sgn(x)) 73 | 74 | 75 | proc init*(self: FFMSlow, X: Matrix, fields: seq[int]) = 76 | if not (self.warmStart and self.isInitialized): 77 | let nFeatures: int = X.shape[1] 78 | randomize(self.randomState) 79 | let nFields = max(fields) + 1 80 | let nAugments = self.nAugments 81 | self.w = zeros([nFeatures]) 82 | if nAugments > 0: 83 | self.P = randomNormal([nFields + 1, nFeatures+nAugments, self.nComponents], 84 | scale = self.scale) 85 | else: 86 | self.P = randomNormal([nFields, nFeatures+nAugments, self.nComponents], 87 | scale = self.scale) 88 | 89 | self.intercept = 0.0 90 | self.isInitialized = true 91 | 92 | 93 | proc checkTarget*(self: FFMSlow, y: seq[SomeNumber]): seq[float64] = 94 | case self.task 95 | of classification: 96 | result = y.map(x => float(sgn(x))) 97 | of regression: 98 | result = y.map(x => float(x)) 99 | 100 | 101 | proc score*(self: FFMSlow, X: Matrix, fields: seq[int], y: seq[float64]): float64 = 102 | let yPred = self.decisionFunction(X, fields) 103 | case self.task 104 | of regression: 105 | result = rmse(y, yPred) 106 | of classification: 107 | result = accuracy(y.map(x=>sgn(x)), yPred.map(x=>sgn(x))) 108 | 109 | 110 | proc computeGrad*(self: FFMSlow, X: Matrix, fields: seq[int], i: int, 111 | dL: float64, grad: var Tensor) = 112 | let 113 | nFeatures = X.shape[1] 114 | nAugments = self.nAugments 115 | nComponents = self.nComponents 116 | nFields = max(fields)+1 117 | 118 | for j1 in 0..<(nFeatures+nAugments): 119 | let f1 = if j1 < nFeatures: fields[j1] else: nFields 120 | let val1 = if j1 < nFeatures: X[i, j1] else: 1.0 121 | for j2 in (j1+1)..<(nFeatures+nAugments): 122 | let f2 = if j2 < nFeatures: fields[j2] else: nFields 123 | let val2 = if j2 < nFeatures: X[i, j2] else: 1.0 124 | let interaction = val1 * val2 125 | for s in 0..sgn(x)) 78 | 79 | 80 | proc init*(self: FMSlow, X: Matrix) = 81 | if not (self.warmStart and self.isInitialized): 82 | let nFeatures: int = X.shape[1] 83 | randomize(self.randomState) 84 | 85 | self.w = zeros([nFeatures]) 86 | let nOrders = self.nOrders 87 | let nAugments = self.nAugments 88 | self.P = randomNormal([nOrders, self.nComponents, nFeatures+nAugments], 89 | scale = self.scale) 90 | self.intercept = 0.0 91 | self.isInitialized = true 92 | 93 | 94 | proc checkTarget*(self: FMSlow, y: seq[SomeNumber]): seq[float64] = 95 | case self.task 96 | of classification: 97 | result = y.map(x => float(sgn(x))) 98 | of regression: 99 | result = y.map(x => float(x)) 100 | 101 | 102 | proc score*(self: FMSlow, X: Matrix, y: seq[float64]): float64 = 103 | let yPred = self.decisionFunction(X) 104 | case self.task 105 | of regression: 106 | result = rmse(y, yPred) 107 | of classification: 108 | result = accuracy(y.map(x=>sgn(x)), yPred.map(x=>sgn(x))) 109 | 110 | 111 | proc computeGrad*(self: FMSlow, X: Matrix, i: int, dL: float64, 112 | grad: var Tensor) = 113 | let 114 | nFeatures = X.shape[1] 115 | nComponents = self.P.shape[1] 116 | nAugments = self.nAugments 117 | 118 | for order in 0..= cfm.maxComponents: 109 | break 110 | 111 | predict(yPredQuad, yPredLinear, K, X, cfm.P, cfm.lams, cfm.w, 112 | cfm.intercept, ignoreDiag) 113 | residual = y - yPredQuad - yPredLinear 114 | 115 | # fit P 116 | var gradJ = matmul(X.T*residual, X) 117 | if ignoreDiag: 118 | for i in 0..= cfm.maxComponents: # replace old p 127 | s = argmin(cfm.lams) # replace old p whose lambda is minimum 128 | cfm.P[s] = p 129 | 130 | for i in 0.. self.eta: 154 | cfm.lams *= self.eta / sum(cfm.lams) 155 | 156 | # fit w 157 | predict(yPredQuad, yPredLinear, K, X, cfm.P, cfm.lams, cfm.w, 158 | cfm.intercept, ignoreDiag) 159 | residual = y - yPredQuad 160 | yPredLinear <- 0.0 161 | if fitLinear: 162 | vmmul(residual, Z, resZ) 163 | let maggrad = norm(resZ, 1) 164 | let tolCG = 1e-5 * maggrad 165 | w *= colNormSq # since we use left-right preconditioning 166 | cg(ZTZ, resZ, w, maxIter=1000, init=false, tol=tolCG, 167 | preconditioner=Preconditioner) 168 | cfm.w = w[0..= self.maxSearch: break 54 | # update! 55 | alpha *= self.rho 56 | for s in 0.. lam: 35 | shrink = 1.0 - lam / norm 36 | for s in 0.. strength: 43 | shrink = 1.0 - strength / norm 44 | for s in 0..abs(x)), order=Descending) 13 | var S = 2.0 * lam * cumsummed(absp) 14 | for i in 0.. 2: 62 | raise newException(ValueError, "degree > 2") 63 | if self.transpose: 64 | raise newException(ValueError, "transpose=true is not supported in PBCD.") 65 | else: 66 | proxSquaredL12Slow(P[j], lam, degree) 67 | 68 | 69 | # for gd/sgd 70 | # P.shape: [nComponents, nFeatures] 71 | proc prox*(self: SquaredL12Slow, P: var Matrix, 72 | lam: float64, degree: int) {.inline.} = 73 | if degree > 2: 74 | raise newException(ValueError, "degree > 2") 75 | 76 | if self.transpose: 77 | for s in 0.. lamScaled: 41 | shrink = 1.0 - lamScaled / norms[j] 42 | for s in 0..= pivot: 25 | cumsum_cache += v[j] 26 | candidates[nG] = j 27 | nG += 1 28 | else: 29 | candidates[n+nL] = j 30 | nL += 1 31 | # discard greaters from candidates 32 | if ((cumsum + cumsumCache) - float(rho+nG)*pivot) < z: 33 | nCandidates = nL 34 | offset = n 35 | cumsum += cumsum_cache + pivot 36 | candidates[nG] = pivot_idx 37 | nG += 1 38 | rho += nG 39 | else: # discard lessers from candidates 40 | nCandidates = nG 41 | offset = 0 42 | 43 | theta = (cumsum - z) / float(rho) 44 | for i in 0..toFloat(x))) == 1 25 | check rocauc(yTrue, zeros) == 0.5 26 | check rocauc(yTrue, ones) == 0.5 27 | check rocauc(yTrue, inverse.map(x=>toFloat(x))) == 0 28 | check rocauc(yTrue, inverse01.map(x=>toFloat(x))) == 0 29 | 30 | check rocauc(yTrue01, yScore1) == 0.75 31 | check rocauc(yTrue01, yScore2) == 0.5 32 | check rocauc(yTrue01, yTrue.map(x=>toFloat(x))) == 1 33 | check rocauc(yTrue01, zeros) == 0.5 34 | check rocauc(yTrue01, ones) == 0.5 35 | check rocauc(yTrue01, inverse.map(x=>toFloat(x))) == 0 36 | check rocauc(yTrue01, inverse01.map(x=>toFloat(x))) == 0 37 | 38 | 39 | test "Test accuracy": 40 | check accuracy(yTrue, yScore1.map(x=>sgn(x-0.5))) == 0.75 41 | check accuracy(yTrue, yScore2.map(x=>sgn(x))) == 0.5 42 | check accuracy(yTrue, yTrue) == 1.0 43 | check accuracy(yTrue, zeros.map(x=>toInt(x))) == 0 44 | check accuracy(yTrue, ones.map(x=>toInt(x))) == 0.5 45 | 46 | check accuracy(yTrue01, yScore1.map(x=>int((sgn(x-0.5)+1)/2))) == 0.75 47 | check accuracy(yTrue01, yScore2.map(x=>int((sgn(x)+1)/2))) == 0.5 48 | check accuracy(yTrue01, yTrue01) == 1 49 | check accuracy(yTrue, zeros.map(x=>toInt(x))) == 0 50 | check accuracy(yTrue, ones.map(x=>toInt(x))) == 0.5 51 | 52 | 53 | test "Test precision, recall, f-score": 54 | var actual: (float, float, float) 55 | actual = precisionRecallFscore(yTrue, yScore1.map(x=>sgn(x-0.5))) 56 | check actual == (prec1, recall1, fscore1) 57 | actual = precisionRecallFscore(yTrue, yScore2.map(x=>sgn(x))) 58 | check actual == (prec2, recall2, fscore2) 59 | actual = precisionRecallFscore(yTrue, yTrue) 60 | check actual == (1.0, 1.0, 1.0) 61 | actual = precisionRecallFscore(yTrue, zeros.map(x=>toInt(x))) 62 | check actual == (0.0, 0.0, 0.0) 63 | actual = precisionRecallFscore(yTrue, ones.map(x=>toInt(x))) 64 | check actual == (0.5, 1.0, 1.0/1.5) 65 | actual = precisionRecallFscore(yTrue01, yScore1.map(x=>sgn(x-0.5))) 66 | check actual == (prec1, recall1, fscore1) 67 | actual = precisionRecallFscore(yTrue01, yScore2.map(x=>sgn(x))) 68 | check actual == (prec2, recall2, fscore2) 69 | check precisionRecallFscore(yTrue01, yTrue) == (1.0, 1.0, 1.0) 70 | actual = precisionRecallFscore(yTrue01, zeros.map(x=>toInt(x))) 71 | check actual == (0.0, 0.0, 0.0) 72 | actual = precisionRecallFscore(yTrue01, ones.map(x=>toInt(x))) 73 | check actual == (0.5, 1.0, 1.0/1.5) 74 | actual = precisionRecallFscore( 75 | zeros.map(x=>toInt(x)), zeros.map(x=>toInt(x))) 76 | check actual == (0.0, 0.0, 0.0) 77 | actual = precisionRecallFscore( 78 | ones.map(x=>toInt(x)), ones.map(x=>toInt(x))) 79 | check actual == (1.0, 1.0, 1.0) 80 | actual = precisionRecallFscore( 81 | zeros.map(x=>toInt(x)), zeros.map(x=>toInt(x)), 82 | pos=0) 83 | check actual == (1.0, 1.0, 1.0) 84 | actual = precisionRecallFscore( 85 | ones.map(x=>toInt(x)), ones.map(x=>toInt(x)), 86 | pos=0) 87 | check actual == (0.0, 0.0, 0.0) 88 | 89 | test "Test RMSE": 90 | check rmse(yScore1, yScore2) == 0.5984354601792912 91 | check rmse(yScore2, yScore1) == 0.5984354601792912 92 | check rmse(yScore1, yScore1) == 0.0 93 | check rmse(yScore2, yScore2) == 0.0 94 | check rmse(zeros, ones) == 1.0 95 | check rmse(ones, zeros) == 1.0 96 | check rmse(ones, ones) == 0.0 97 | check rmse(zeros, zeros) == 0.0 98 | 99 | 100 | test "Test r2 score": 101 | check r2(yScore1, yScore2) == -4.687344913151364 102 | check r2(yScore2, yScore1) == -0.9163879598662203 103 | check r2(yScore1, yScore1) == 1.0 104 | check r2(yScore2, yScore2) == 1.0 105 | check r2(zeros, yScore1) == 0.0 106 | check r2(zeros, yScore2) == 0.0 107 | check r2(zeros, zeros) == 1.0 108 | check r2(ones, yScore1) == 0.0 109 | check r2(ones, yScore2) == 0.0 110 | check r2(ones, ones) == 1.0 -------------------------------------------------------------------------------- /tests/test_sgd_ffm.nim: -------------------------------------------------------------------------------- 1 | import unittest 2 | import utils, optimizer/sgd_ffm_slow 3 | import nimfm/loss, nimfm/dataset, nimfm/tensor/tensor 4 | import nimfm/model/field_aware_factorization_machine, nimfm/model/fm_base 5 | import nimfm/optimizer/sgd_ffm 6 | import model/ffm_slow 7 | 8 | 9 | suite "Test stochastic gradient descent for FFM": 10 | let 11 | n = 80 12 | d = 20 13 | nFields = 5 14 | nComponents = 4 15 | 16 | test "Test fitLinear": 17 | for fitIntercept in [true, false]: 18 | var 19 | X: CSRFieldDataset 20 | y: seq[float64] 21 | fieldDict: seq[int] 22 | createFFMDataset(X, y, fieldDIct, n, d, nFields, nComponents, 42, 23 | false, fitIntercept) 24 | # fit fast version 25 | var ffm = newFieldAwareFactorizationMachine( 26 | task = regression, nComponents = nComponents, 27 | fitLinear = false, 28 | fitIntercept = fitIntercept, randomState = 1) 29 | var sgd = newSGD(maxIter = 10, verbose = 0, tol = 0) 30 | sgd.fit(X, y, ffm) 31 | for j in 0..= normStrong -------------------------------------------------------------------------------- /tests/test_squaredl12.nim: -------------------------------------------------------------------------------- 1 | import unittest, sequtils 2 | import nimfm/regularizer/regularizers 3 | import regularizer/squaredl12_slow, random 4 | 5 | 6 | suite "Test proximal operator of SquaredL12": 7 | let 8 | d = 100 9 | 10 | test "Test proximal operation": 11 | var 12 | reg = newSquaredL12() 13 | 14 | reg.initSGD(nFeatures = d, nComponents=4, degree=2) 15 | var q = newSeqWith(d, 0.0) 16 | for i in 0..<1000: 17 | for j in 0..