├── .github
    └── workflows
    │   └── unit_tests.yml
├── .gitignore
├── LICENSE
├── README.md
├── benchmarks
    └── ml100k
    │   ├── README.md
    │   ├── convert.nim
    │   ├── convex_factorization_machine.nim
    │   ├── factorization_machine.nim
    │   ├── factorization_machine_adagrad.nim
    │   ├── factorization_machine_adagrad_multi.nim
    │   ├── factorization_machine_sgd.nim
    │   ├── factorization_machine_sgd_multi.nim
    │   ├── factorization_machine_sgd_stream.nim
    │   ├── factorization_machine_sgd_stream_multi.nim
    │   ├── factorization_machine_stream.nim
    │   ├── ffm_adagrad.nim
    │   ├── ffm_sgd.nim
    │   ├── higher_order_factorization_machine.nim
    │   ├── linear_model.nim
    │   ├── make_ml100k_dataset.nim
    │   ├── make_ml100k_dataset_field.nim
    │   ├── matrix_factorization.nim
    │   ├── sparse_fm.nim
    │   ├── sparse_fm_bcd.nim
    │   ├── sparse_fm_fista.nim
    │   ├── sparse_fm_katyusha.nim
    │   ├── sparse_fm_l1.nim
    │   ├── sparse_fm_mbpsgd.nim
    │   ├── sparse_fm_nmapgd.nim
    │   ├── sparse_fm_pgd.nim
    │   ├── sparse_fm_psgd.nim
    │   ├── sparse_fm_squaredl12.nim
    │   ├── user_item_bias.nim
    │   └── utils.nim
├── nimfm.nimble
├── src
    ├── nimfm.nim
    ├── nimfm
    │   ├── dataset.nim
    │   ├── extmath.nim
    │   ├── kernels.nim
    │   ├── loss.nim
    │   ├── metrics.nim
    │   ├── model.nim
    │   ├── model
    │   │   ├── convex_factorization_machine.nim
    │   │   ├── factorization_machine.nim
    │   │   ├── field_aware_factorization_machine.nim
    │   │   ├── fm_base.nim
    │   │   ├── models.nim
    │   │   └── params.nim
    │   ├── modules.nim
    │   ├── optimizer.nim
    │   ├── optimizer
    │   │   ├── adagrad.nim
    │   │   ├── adagrad_ffm.nim
    │   │   ├── adagrad_ffm_multi.nim
    │   │   ├── adagrad_multi.nim
    │   │   ├── cd.nim
    │   │   ├── fista.nim
    │   │   ├── fit_linear.nim
    │   │   ├── greedy_cd.nim
    │   │   ├── hazan.nim
    │   │   ├── katyusha.nim
    │   │   ├── minibatch_psgd.nim
    │   │   ├── nmapgd.nim
    │   │   ├── optimizer_base.nim
    │   │   ├── optimizers.nim
    │   │   ├── pbcd.nim
    │   │   ├── pcd.nim
    │   │   ├── pgd.nim
    │   │   ├── psgd.nim
    │   │   ├── sgd.nim
    │   │   ├── sgd_ffm.nim
    │   │   ├── sgd_ffm_multi.nim
    │   │   ├── sgd_multi.nim
    │   │   └── utils.nim
    │   ├── regularizer.nim
    │   ├── regularizer
    │   │   ├── l1.nim
    │   │   ├── l21.nim
    │   │   ├── omegacs.nim
    │   │   ├── omegati.nim
    │   │   ├── regularizers.nim
    │   │   ├── squaredl12.nim
    │   │   ├── squaredl21.nim
    │   │   └── utils.nim
    │   ├── tensor.nim
    │   ├── tensor
    │   │   ├── sparse.nim
    │   │   ├── sparse_stream.nim
    │   │   └── tensor.nim
    │   └── utils.nim
    ├── nimfm_cfm.nim
    └── nimfm_sparsefm.nim
└── tests
    ├── comb.nim
    ├── config.nims
    ├── kernels_slow.nim
    ├── model
        ├── cfm_slow.nim
        ├── ffm_slow.nim
        └── fm_slow.nim
    ├── optimizer
        ├── adagrad_ffm_slow.nim
        ├── adagrad_slow.nim
        ├── cd_slow.nim
        ├── fit_linear_slow.nim
        ├── greedy_cd_slow.nim
        ├── hazan_slow.nim
        ├── pbcd_slow.nim
        ├── pcd_slow.nim
        ├── psgd_slow.nim
        ├── sgd_ffm_slow.nim
        └── sgd_slow.nim
    ├── regularizer
        ├── l1_slow.nim
        ├── l21_slow.nim
        ├── omegacs_slow.nim
        ├── omegati_slow.nim
        ├── regularizers.nim
        ├── squaredl12_slow.nim
        ├── squaredl21_slow.nim
        └── utils.nim
    ├── test_adagrad.nim
    ├── test_adagrad_ffm.nim
    ├── test_cd.nim
    ├── test_dataset.nim
    ├── test_greedy_cd.nim
    ├── test_hazan.nim
    ├── test_kernels.nim
    ├── test_label_encoder.nim
    ├── test_metrics.nim
    ├── test_pbcd_l1.nim
    ├── test_pbcd_l21.nim
    ├── test_pbcd_omegacs.nim
    ├── test_pbcd_squaredl21.nim
    ├── test_pcd_l1.nim
    ├── test_pcd_squaredl12.nim
    ├── test_pcd_ti.nim
    ├── test_psgd_l1.nim
    ├── test_psgd_l21.nim
    ├── test_psgd_squaredl12.nim
    ├── test_psgd_squaredl21.nim
    ├── test_sgd.nim
    ├── test_sgd_ffm.nim
    ├── test_squaredl12.nim
    └── utils.nim


/.github/workflows/unit_tests.yml:
--------------------------------------------------------------------------------
 1 | name: Build
 2 | on: [push]
 3 | jobs:
 4 |   build:
 5 |     runs-on: ubuntu-latest
 6 |     steps:
 7 |     - uses: actions/checkout@v2
 8 | 
 9 |     - name: Install lapack
10 |       run: |
11 |         sudo apt update
12 |         sudo apt install gfortran
13 |         sudo apt install libblas-dev liblapack-dev libatlas-base-dev
14 | 
15 |     - name: Cache choosenim
16 |       id: cache-choosenim
17 |       uses: actions/cache@v2
18 |       with:
19 |         path: ~/.choosenim
20 |         key: ${{ runner.os }}-choosenim-1.0.6
21 | 
22 |     - name: Cache nimble
23 |       id: cache-nimble
24 |       uses: actions/cache@v2
25 |       with:
26 |         path: ~/.nimble
27 |         key: ${{ runner.os }}-nimble-1.0.6
28 | 
29 |     - name: Install Nim
30 |       if: steps.cache-choosenim.outputs.cache-hit != 'true' || steps.cache-nimble.outputs.cache-hit != 'true'
31 |       run: |
32 |         export CHOOSENIM_CHOOSE_VERSION="1.0.6"
33 |         curl https://nim-lang.org/choosenim/init.sh -sSf > init.sh
34 |         sh init.sh -y
35 | 
36 |     - name: Install project
37 |       run: |
38 |         export PATH=$HOME/.nimble/bin:$PATH
39 |         nimble install -y
40 |         echo $HOME/.nimble/bin >> $GITHUB_PATH
41 | 
42 |     - name: Unit testing
43 |       run: |
44 |         nimble test
45 |     - name: Make and create binary
46 |       run: |
47 |         nimble make
48 |       
49 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | nimcache/
2 | nimblecache/
3 | htmldocs/
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Kyohei Atarashi
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/benchmarks/ml100k/README.md:
--------------------------------------------------------------------------------
 1 | # Benchmarks on MoviLens 100K dataset
 2 | [MovieLens 100K](https://grouplens.org/datasets/movielens/100k/) is a dataset
 3 | for the movie recommendation task. It has 943 users, 1,682 items, 
 4 | and 100,000 ratings. It also provides some additional information and
 5 | factorization machines can leverage them.
 6 | 
 7 | We provide codes for some baseline methods [1], factorization machines [2],
 8 | and higher-order factorization machines [3] on the MovieLens 100K dataset.
 9 | The baseline methods implemented in this benchmark are represented as the 
10 | factorization machines by changing input features and
11 | hyperparameter setting (see [2]).
12 | 
13 | In addition, this benchmark provides factorization machines with sparse regularization [4,5,6,7].
14 | 
15 | ## Dependencies
16 |  - [zip library in Nim](https://github.com/nim-lang/zip)
17 | 
18 | ## Usage
19 | 1. Compile and run `make_ml100k_dataset.nim` with `-d:ssl` option:
20 | 
21 | 
22 |     nim c --run --d:ssl make_ml100k_dataset.nim
23 | 
24 |  Then, `ml-100k.zip` will be downloaded and uncompressed, and following
25 |  files will be created in `dataset` directory:
26 |    - `ml-100k_user_item_all.svm`
27 |    - `ml-100k_user_item_train.svm`
28 |    - `ml-100k_user_item_test.svm`
29 |    - `ml-100k_user_item_feature_all.svm`
30 |    - `ml-100k_user_item_feature_train.svm`
31 |    - `ml-100k_user_item_feature_test.svm`
32 | 
33 |   First three files are svmlight format dataset files for 
34 |   `matrix_factorization.nim` and `user_item_bias.nim`. 
35 |   `matrix_factorization.nim` provides matrix factorization (MF) (a.k.a latent
36 |   factor (feature) model) methods [1]. `user_item_bias.nim` provides the linear
37 |   regression with user-id and item-id as input. It predicts the rating as
38 |   overall_bias + user_bias + item_bias.
39 |   
40 |   If you want to run field-aware factorization machines examples (`ffm_sgd.nim` and `ffm_adagrad.nim`), please compile and rune `make_ml100k_dataset_field.nim`.
41 | 
42 | 2. Compile other nim files with `-d:release` and `-d:danger`, 
43 |    and run them. For example,
44 | 
45 |    nim c --run --d:release --d:danger matrix_factorization.nim
46 | 
47 |   `factorization_machine.nim`, 
48 |   `factorization_machine_sgd.nim`, `factorization_machines_adagrad.nim`, `higher_order_factorization_machine.nim`,
49 |   and `linear_model.nim` use not only user-id and item-id but also
50 |    - age, occupation, sex, and zipcode of user (dimension: 49),
51 |    - released year and genre of item (dimension: 29).
52 | 
53 |    For more details about feature encoding, please see [3].
54 | 
55 |    `factorization_machine_stream.nim` and `factorization_machine_sgd_stream.nim` are examples using our binary data format.
56 |    Before running them, you must run `convert.nim`.
57 |    It outputs binary versions of `ml-100k_user_item_feature_train.svm` and `ml-100k_user_item_feature_test.svm`.
58 | 
59 |    `sparse*` provides factorization machines with sparse regularization [4,5,6,7].
60 | 
61 |    When compiling `factorization_machine_sgd_multi`, `factorization_machine_stream_multi`, `factorization_machine_adagrad_multi`, `ffm_sgd`, or `ffm_adagrad`, use `--threads:on` flag. They use multiple threads.
62 | 
63 | ## References
64 | 1. Y. Koren. Factorization meets the neighborhood: a multifaceted collaborative filtering model. In KDD, pp. 426--434, 2008.
65 | 
66 | 2. S. Rendle. Factorization machines. In ICDM, pp. 995--1000, 2010.
67 | 
68 | 3. M. Blondel, A. Fujino, N. Ueda, M. Ishihata. Higher-order factorization machines. In NeurIPS, pp. 3351--3359, 2016.
69 | 
70 | 4. Z. Pan, E. Chen, Q. Liu, T. Xu, H. Ma, and H. Lin. Sparse factorization machines for click-through rate prediction. In ICDM, pp. 400--409, 2016.
71 | 
72 | 5. J Xu, K Lin, P. N. Tan, and J. Zhou. Synergies that matter: Efficient interaction selection via sparse factorization machine. In SDM, pp. 1008-–0116, 2016.
73 | 
74 | 6. H. Zhao, Q. Yao, J. Li, Y. Song, and D. L. Lee. Meta-graph based recommendation fusion over heterogeneous information networks. In KDD, pp. 635–-644, 2017
75 | 
76 | 7. K. Atarashi, S. Oyama, and M. Kurihara. Factorization machines with regularization for sparse feature interactions. preprint.
77 | 


--------------------------------------------------------------------------------
/benchmarks/ml100k/convert.nim:
--------------------------------------------------------------------------------
 1 | import nimfm
 2 | 
 3 | 
 4 | when isMainModule:
 5 |   convertSVMLightFile("dataset/ml-100k_user_item_feature_train.svm",
 6 |                       "dataset/ml-100k_user_item_feature_train_samples",
 7 |                       "dataset/ml-100k_train_labels")
 8 |   convertSVMLightFile("dataset/ml-100k_user_item_feature_test.svm",
 9 |                       "dataset/ml-100k_user_item_feature_test_samples",
10 |                       "dataset/ml-100k_test_labels")
11 |   transposeFile("dataset/ml-100k_user_item_feature_train_samples",
12 |                 "dataset/ml-100k_user_item_feature_train_samples_csc",
13 |                 cachesize=5)
14 |   transposeFile("dataset/ml-100k_user_item_feature_train_samples_csc",
15 |                 "dataset/ml-100k_user_item_feature_train_samples_csc_transpose",
16 |                 cachesize=5)


--------------------------------------------------------------------------------
/benchmarks/ml100k/convex_factorization_machine.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/dataset, nimfm/model
 2 | import nimfm/optimizer/hazan, nimfm/optimizer/greedy_cd
 3 | 
 4 | 
 5 | when isMainModule:
 6 |   var XTr, XTe: CSCDataset
 7 |   var yTr, yTe: seq[float64]
 8 |   loadSVMLightFile("dataset/ml-100k_user_item_train.svm",
 9 |                     XTr, yTr, nFeatures=2625)
10 |   loadSVMLightFile("dataset/ml-100k_user_item_test.svm",
11 |                     XTe, yTe, nFeatures=2625)
12 | 
13 |   var cfm = newConvexFactorizationMachine(
14 |     task=regression, maxComponents=50, ignoreDiag=true, fitLinear=true,
15 |     fitIntercept=true, warmStart=false)
16 | 
17 |   var optimGCD = newGreedyCD(
18 |     maxIter=30, maxIterInner=10, maxIterPower=100,
19 |     beta=1e-4, alpha0=1e-8, alpha=1e-8,
20 |     refitFully=false, nRefitting=10, verbose=1)
21 |   echo("Training CFM by GreedyCD.")
22 |   optimGCD.fit(Xtr, yTr, cfm)
23 |   echo("Train RMSE: ", cfm.score(Xtr, yTr))
24 |   echo("Test RMSE: ", cfm.score(Xte, yTe))
25 |   echo()
26 | 
27 |   var optimHazan = newHazan(
28 |     maxIter=100, maxIterPower=100, optimal=true, verbose=1,
29 |     eta=600, nTol=100)
30 |   echo("Training CFM by Hazan's Algorothm.")
31 |   optimHazan.fit(Xtr, yTr, cfm)
32 |   echo("Train RMSE: ", cfm.score(Xtr, yTr))
33 |   echo("Test RMSE: ", cfm.score(Xte, yTe))


--------------------------------------------------------------------------------
/benchmarks/ml100k/factorization_machine.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/dataset, nimfm/model
 2 | import nimfm/optimizer/cd
 3 | 
 4 | 
 5 | when isMainModule:
 6 |   var XTr, XTe: CSCDataset
 7 |   var yTr, yTe: seq[float64]
 8 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_train.svm",
 9 |                     XTr, yTr, nFeatures=2703)
10 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_test.svm",
11 |                     XTe, yTe, nFeatures=2703)
12 | 
13 |   var fm = newFactorizationMachine(task=regression)
14 |   var optim = newCD(maxIter=100,  beta=1e-3, alpha0=1e-10, alpha=1e-10)
15 |   optim.fit(Xtr, yTr, fm)
16 | 
17 |   echo("Train RMSE: ", fm.score(Xtr, yTr))
18 |   echo("Test RMSE: ", fm.score(Xte, yTe))
19 | 


--------------------------------------------------------------------------------
/benchmarks/ml100k/factorization_machine_adagrad.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/dataset, nimfm/model
 2 | import nimfm/optimizer/adagrad
 3 | 
 4 | 
 5 | when isMainModule:
 6 |   var XTr, XTe: CSRDataset  # Use CSRDataset
 7 |   var yTr, yTe: seq[float64]
 8 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_train.svm",
 9 |                     XTr, yTr, nFeatures=2703)
10 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_test.svm",
11 |                     XTe, yTe, nFeatures=2703)
12 | 
13 |   var fm = newFactorizationMachine(task=regression)
14 |   var optim = newAdaGraD(eta0=0.1, maxIter=100, beta=1e-3,
15 |                          alpha0=1e-10, alpha=1e-10)
16 |   optim.fit(Xtr, yTr, fm)
17 | 
18 |   echo("Train RMSE: ", fm.score(Xtr, yTr))
19 |   echo("Test RMSE: ", fm.score(Xte, yTe))
20 | 


--------------------------------------------------------------------------------
/benchmarks/ml100k/factorization_machine_adagrad_multi.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/dataset, nimfm/model, nimfm/loss
 2 | import nimfm/optimizer/adagrad_multi
 3 | 
 4 | 
 5 | when isMainModule:
 6 |   var XTr, XTe: CSRDataset  # Use CSRDataset
 7 |   var yTr, yTe: seq[float64]
 8 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_train.svm",
 9 |                     XTr, yTr, nFeatures=2703)
10 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_test.svm",
11 |                     XTe, yTe, nFeatures=2703)
12 | 
13 |   var fm = newFactorizationMachine(task=regression)
14 |   var optim = newAdaGrad(eta0=0.1, maxIter=100, beta=1e-3,
15 |                          alpha0=1e-10, alpha=1e-10)
16 |   optim.fit(XTr, yTr, fm, maxThreads=4)
17 | 
18 |   echo("Train RMSE: ", fm.score(XTr, yTr))
19 |   echo("Test RMSE: ", fm.score(Xte, yTe))
20 | 


--------------------------------------------------------------------------------
/benchmarks/ml100k/factorization_machine_sgd.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/dataset, nimfm/model
 2 | import nimfm/optimizer/sgd
 3 | 
 4 | 
 5 | when isMainModule:
 6 |   var XTr, XTe: CSRDataset  # Use CSRDataset for SGD solver
 7 |   var yTr, yTe: seq[float64]
 8 |   let scheduling: SchedulingKind = constant
 9 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_train.svm",
10 |                     XTr, yTr, nFeatures=2703)
11 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_test.svm",
12 |                     XTe, yTe, nFeatures=2703)
13 | 
14 |   var fm = newFactorizationMachine(task=regression)
15 |   var optim = newSGD(eta0=0.01, scheduling=scheduling, maxIter=100,
16 |                      beta=1e-3, alpha0=1e-10, alpha=1e-10, shuffle=false)
17 |   optim.fit(Xtr, yTr, fm)
18 | 
19 |   echo("Train RMSE: ", fm.score(Xtr, yTr))
20 |   echo("Test RMSE: ", fm.score(Xte, yTe))
21 | 


--------------------------------------------------------------------------------
/benchmarks/ml100k/factorization_machine_sgd_multi.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/dataset, nimfm/model
 2 | import nimfm/optimizer/sgd_multi
 3 | 
 4 | 
 5 | when isMainModule:
 6 |   var XTr, XTe: CSRDataset  # Use CSRDataset for SGD solver
 7 |   var yTr, yTe: seq[float64]
 8 |   let scheduling: SchedulingKind = optimal
 9 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_train.svm",
10 |                     XTr, yTr, nFeatures=2703)
11 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_test.svm",
12 |                     XTe, yTe, nFeatures=2703)
13 |   var fm = newFactorizationMachine(task=regression)
14 |   var optim = newSGD(eta0=0.01, scheduling=scheduling, maxIter=100, tol = -10,
15 |                     beta=1e-3, alpha0=1e-10, alpha=1e-10, shuffle=true)
16 |   optim.fit(XTr, yTr, fm, maxThreads=4)
17 | 
18 |   echo("Train RMSE: ", fm.score(XTr, yTr))
19 |   echo("Test RMSE: ", fm.score(Xte, yTe))
20 | 


--------------------------------------------------------------------------------
/benchmarks/ml100k/factorization_machine_sgd_stream.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/dataset, nimfm/model
 2 | import nimfm/optimizer/sgd
 3 | 
 4 | 
 5 | when isMainModule:
 6 |   var XTr = newStreamCSRDataset("dataset/ml-100k_user_item_feature_train_samples", cacheSize=5)
 7 |   var XTe = newStreamCSRDataset("dataset/ml-100k_user_item_feature_test_samples")
 8 |   var yTr = loadStreamLabel("dataset/ml-100k_train_labels")
 9 |   var yTe = loadStreamLabel("dataset/ml-100k_test_labels")
10 |   var fm = newFactorizationMachine(task=regression)
11 |   let scheduling: SchedulingKind = constant
12 |   var optim = newSGD(eta0=0.01, scheduling=scheduling, maxIter=100,
13 |                      beta=1e-3, alpha0=1e-10, alpha=1e-10, shuffle=false)
14 | 
15 |   optim.fit(Xtr, yTr, fm)
16 | 
17 |   echo("Train RMSE: ", fm.score(Xtr, yTr))
18 |   echo("Test RMSE: ", fm.score(Xte, yTe))
19 | 


--------------------------------------------------------------------------------
/benchmarks/ml100k/factorization_machine_sgd_stream_multi.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/dataset, nimfm/model
 2 | import nimfm/optimizer/sgd_multi
 3 | 
 4 | 
 5 | when isMainModule:
 6 |   var XTr = newStreamCSRDataset("dataset/ml-100k_user_item_feature_train_samples",
 7 |                                 cacheSize=5)
 8 |   var XTe = newStreamCSRDataset("dataset/ml-100k_user_item_feature_test_samples")
 9 |   var yTr = loadStreamLabel("dataset/ml-100k_train_labels")
10 |   var yTe = loadStreamLabel("dataset/ml-100k_test_labels")
11 |   var fm = newFactorizationMachine(task=regression)
12 |   let scheduling: SchedulingKind = optimal
13 |   var optim = newSGD(eta0=0.01, scheduling=scheduling, maxIter=100,
14 |                      beta=1e-3, alpha0=1e-10, alpha=1e-10, shuffle=false)
15 | 
16 |   optim.fit(XTr, yTr, fm, maxThreads=4)
17 | 
18 |   echo("Train RMSE: ", fm.score(XTr, yTr))
19 |   echo("Test RMSE: ", fm.score(Xte, yTe))
20 | 


--------------------------------------------------------------------------------
/benchmarks/ml100k/factorization_machine_stream.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/dataset, nimfm/model
 2 | import nimfm/optimizer/cd
 3 | 
 4 | 
 5 | when isMainModule:
 6 |   var XTr = newStreamCSCDataset("dataset/ml-100k_user_item_feature_train_samples_csc",
 7 |                                  cacheSize=5)
 8 |   var XTe = newStreamCSRDataset("dataset/ml-100k_user_item_feature_test_samples")
 9 |   var yTr = loadStreamLabel("dataset/ml-100k_train_labels")
10 |   var yTe = loadStreamLabel("dataset/ml-100k_test_labels")
11 |   var fm = newFactorizationMachine(task=regression)
12 |   var optim = newCD(maxIter=100,  beta=1e-3, alpha0=1e-10, alpha=1e-10)
13 |   optim.fit(Xtr, yTr, fm)
14 | 
15 |   echo("Train RMSE: ", fm.score(Xtr, yTr))
16 |   echo("Test RMSE: ", fm.score(Xte, yTe))


--------------------------------------------------------------------------------
/benchmarks/ml100k/ffm_adagrad.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/dataset, nimfm/model, nimfm/loss
 2 | # import nimfm/optimizer/adagrad_ffm # single-threading fit
 3 | import nimfm/optimizer/adagrad_ffm_multi
 4 | 
 5 | when isMainModule:
 6 |   var XTr, XTe: CSRFieldDataset  # Use CSRFieldDataset for AdaGrad solver
 7 |   var yTr, yTe: seq[float64]
 8 |   loadFFMFile("dataset/ml-100k_user_item_feature_train.ffm",
 9 |                XTr, yTr, nFeatures=2703, nFields=8)
10 |   loadFFMFile("dataset/ml-100k_user_item_feature_test.ffm",
11 |                XTe, yTe, nFeatures=2703, nFields=8)
12 | 
13 |   var ffm = newFieldAwareFactorizationMachine(task=regression)
14 |   var optim = newAdaGrad(eta0=1.0, maxIter=100, beta=1e-3, alpha0=1e-10,
15 |                          alpha=1e-10)
16 |   #optim.fit(XTr, yTr, ffm) # single-threading fit
17 |   optim.fit(XTr, yTr, ffm, maxThreads=4)
18 | 
19 |   echo("Train RMSE: ", ffm.score(XTr, yTr))
20 |   echo("Test RMSE: ", ffm.score(Xte, yTe))
21 | 


--------------------------------------------------------------------------------
/benchmarks/ml100k/ffm_sgd.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/dataset, nimfm/model
 2 | #import nimfm/optimizer/sgd_ffm # single-threading fit
 3 | import nimfm/optimizer/sgd_ffm_multi
 4 | 
 5 | when isMainModule:
 6 |   var XTr, XTe: CSRFieldDataset  # Use CSRFieldDataset for SGD solver
 7 |   var yTr, yTe: seq[float64]
 8 |   let scheduling: SchedulingKind = optimal
 9 |   loadFFMFile("dataset/ml-100k_user_item_feature_train.ffm",
10 |                XTr, yTr, nFeatures=2703, nFields=8)
11 |   loadFFMFile("dataset/ml-100k_user_item_feature_test.ffm",
12 |                XTe, yTe, nFeatures=2703, nFields=8)
13 | 
14 |   var ffm = newFieldAwareFactorizationMachine(task=regression)
15 |   var optim = newSGD(eta0=0.01, scheduling=scheduling, maxIter=100,
16 |                      beta=1e-3, alpha0=1e-10, alpha=1e-10)
17 |   # optim.fit(XTr, yTr, ffm) # single-threading fit
18 |   optim.fit(XTr, yTr, ffm, maxThreads=4)
19 |   echo("Train RMSE: ", ffm.score(XTr, yTr))
20 |   echo("Test RMSE: ", ffm.score(Xte, yTe))
21 | 


--------------------------------------------------------------------------------
/benchmarks/ml100k/higher_order_factorization_machine.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/dataset, nimfm/model
 2 | import nimfm/optimizer/cd
 3 | 
 4 | 
 5 | when isMainModule:
 6 |   var XTr, XTe: CSCDataset
 7 |   var yTr, yTe: seq[float64]
 8 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_train.svm",
 9 |                     XTr, yTr, nFeatures=2703)
10 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_test.svm",
11 |                     XTe, yTe, nFeatures=2703)
12 | 
13 |   var fm = newFactorizationMachine(task=regression, degree=3)
14 |   var optim = newCD(maxIter=100, beta=1e-3, alpha0=1e-10, alpha=1e-10)
15 |   optim.fit(Xtr, yTr, fm)
16 | 
17 |   echo("Train RMSE: ", fm.score(Xtr, yTr))
18 |   echo("Test RMSE: ", fm.score(Xte, yTe))
19 | 


--------------------------------------------------------------------------------
/benchmarks/ml100k/linear_model.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/dataset, nimfm/model
 2 | import nimfm/optimizer/cd
 3 | 
 4 | 
 5 | when isMainModule:
 6 |   var XTr, XTe: CSCDataset
 7 |   var yTr, yTe: seq[float64]
 8 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_train.svm",
 9 |                     XTr, yTr, nFeatures=2703)
10 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_test.svm",
11 |                     XTe, yTe, nFeatures=2703)
12 | 
13 |   var fm = newFactorizationMachine(task=regression, degree=1)
14 |   var optim = newCD(maxIter=1000)
15 |   optim.fit(Xtr, yTr, fm)
16 | 
17 |   echo("Train RMSE: ", fm.score(Xtr, yTr))
18 |   echo("Test RMSE: ", fm.score(Xte, yTe))


--------------------------------------------------------------------------------
/benchmarks/ml100k/make_ml100k_dataset.nim:
--------------------------------------------------------------------------------
  1 | import httpclient, os, streams, tables, strutils, sequtils, parseutils
  2 | import zip/zipfiles, random
  3 | import nimfm/dataset, nimfm/utils
  4 | 
  5 | const nUsers = 943
  6 | const nItems = 1682
  7 | const nRatings = 100_000
  8 | const fileurl = "http://files.grouplens.org/datasets/movielens/ml-100k.zip"
  9 | const nGenres = 19
 10 | 
 11 | 
 12 | proc round(x: int): int =
 13 |   result = ((x+5) div 10)*10
 14 | 
 15 | 
 16 | proc createUserItemDataset(indices: openarray[int]) =
 17 |   var X: CSRDataset
 18 |   var y: seq[float64]
 19 |   loadUserItemRatingFile("ml-100k/u.data", X, y)
 20 |   echo("  Number of samples : ", X.nSamples)
 21 |   echo("  Number of features: ", X.nFeatures)
 22 |   let nTrain = int(8*X.nSamples/10)
 23 |   var
 24 |     XTr, XTe: CSRDataset
 25 |     yTr, yTe: seq[float64]
 26 |   
 27 |   (XTr, yTr) = shuffle(X, y, indices[0..<nTrain])
 28 |   (XTe, yTe) = shuffle(X, y, indices[nTrain..^1])
 29 | 
 30 |   dumpSVMLightFile("dataset/ml-100k_user_item_all.svm", X, y)
 31 |   dumpSVMLightFile("dataset/ml-100k_user_item_train.svm", Xtr, yTr)
 32 |   dumpSVMLightFile("dataset/ml-100k_user_item_test.svm", XTe, yTe)
 33 | 
 34 | 
 35 | proc createUserFeatureMatrix(): CSRDataset = 
 36 |   var
 37 |     ages: array[nUsers, int]
 38 |     genders: array[nUsers, string]
 39 |     occupations: array[nUsers, string]
 40 |     zipcodes: array[nUsers, char]
 41 |     ageEncoder = newLabelEncoder[int]()
 42 |     genderEncoder = newLabelEncoder[string]()
 43 |     occupationEncoder = newLabelEncoder[string]()
 44 |     zipcodeEncoder = newLabelEncoder[char]()
 45 | 
 46 |   var i = 0
 47 |   for line in "ml-100k/u.user".lines:
 48 |     let infos = line.split("|")
 49 |     ages[i] = round(parseInt(infos[1]))
 50 |     genders[i] = infos[2]
 51 |     occupations[i] = infos[3]
 52 |     zipcodes[i] = infos[4][0]
 53 |     i += 1
 54 |   ageEncoder.fit(ages)
 55 |   genderEncoder.fit(genders)
 56 |   occupationEncoder.fit(occupations)
 57 |   zipcodeEncoder.fit(zipcodes)
 58 | 
 59 |   var nFeaturesUser = 0
 60 |   nFeaturesUser += len(ageEncoder.classes)
 61 |   nFeaturesUser += len(genderEncoder.classes)
 62 |   nFeaturesUser += len(occupationEncoder.classes)
 63 |   nFeaturesUser += len(zipcodeEncoder.classes)
 64 |   echo("   Number of user features: ", nFeaturesUser)
 65 |   var XUser = newSeqWith(nRatings, newSeqWith(nFeaturesUser, 0.0))
 66 |   let agesEnc = ageEncoder.transformed(ages)
 67 |   let gendersEnc = genderEncoder.transformed(genders)
 68 |   let occupationsEnc = occupationEncoder.transformed(occupations)
 69 |   let zipcodesEnc = zipcodeEncoder.transformed(zipcodes)
 70 |   i = 0
 71 |   for line in "ml-100k/u.data".lines:
 72 |     if len(line) < 5:
 73 |       continue
 74 |     let j = parseInt(line.split("\t")[0])
 75 |     var offset = 0
 76 |     XUser[i][agesEnc[j-1]+offset] = 1.0
 77 |     offset += len(ageEncoder.classes)
 78 |     XUser[i][gendersEnc[j-1]+offset] = 1.0
 79 |     offset += len(genderEncoder.classes)
 80 |     XUser[i][occupationsEnc[j-1]+offset] = 1.0
 81 |     offset += len(occupationEncoder.classes)
 82 |     XUser[i][zipcodesEnc[j-1]+offset] = 1.0
 83 |     i += 1
 84 |   result = toCSRDataset(XUser)
 85 |  
 86 | 
 87 | proc createItemFeatureMatrix(): CSRDataset  = 
 88 |   var i = 0
 89 |   var years = newSeqWith(nItems, 0)
 90 |   for line in "ml-100k/u.item".lines:
 91 |     if len(line) < 5:
 92 |       continue
 93 |     let infos = line.split("|")
 94 |     try:
 95 |       years[i] = round(parseInt(infos[2].split("-")[^1]))
 96 |     except:
 97 |       years[i] = 0
 98 |     i += 1
 99 | 
100 |   var yearEncoder = newLabelEncoder[int]()
101 |   yearEncoder.fit(years)
102 |   echo("   Number of item features: ", len(yearEncoder.classes)+nGenres)
103 |   let yearsEnc = yearEncoder.transformed(years)
104 |   let nFeaturesItems = nGenres + len(yearEncoder.classes)
105 | 
106 |   var Genres = newSeqWith(nItems, newSeqWith(nGenres, 0.0))
107 |   i = 0
108 |   for line in "ml-100k/u.item".lines:
109 |     if len(line) < 5:
110 |       continue
111 |     let infos = line.split("|")
112 |     for j, val in infos[^nGenres..^1]:
113 |       Genres[i][j] = parseFloat(val)
114 |     i += 1
115 | 
116 |   var XItems = newSeqWith(nRatings, newSeqWith(nFeaturesItems, 0.0))
117 |   i = 0
118 |   for line in "ml-100k/u.data".lines:
119 |     if len(line) < 5:
120 |       continue
121 |     let item = parseInt(line.split("\t")[1])
122 |     XItems[i][yearsEnc[item-1]] = 1
123 |     for jj, val in Genres[item-1]:
124 |       XItems[i][jj+len(yearEncoder.classes)] = val
125 |     i += 1
126 |   result = toCSRDataset(XItems)
127 | 
128 | 
129 | proc createUserItemFeatureDataset(indices: openarray[int]) =
130 |   var
131 |     X: CSRDataset
132 |     y: seq[float64]
133 |     XTr, XTe: CSRDataset
134 |     yTr, yTe: seq[float64]
135 | 
136 |   loadUserItemRatingFile("ml-100k/u.data", X, y)
137 |   echo("  Create user feature matrix...")
138 |   let XUser = createUserFeatureMatrix()
139 |   echo("  Done.")
140 |   
141 |   echo("  Create item feature matrix...")
142 |   let XItem = createItemFeatureMatrix()
143 |   echo("  Done.")
144 |   
145 |   let XAll = hstack(X, XUser, XItem)
146 |   let nTrain = int(8*X.nSamples/10)
147 |   (XTr, yTr) = shuffle(XAll, y, indices[0..<nTrain])
148 |   (XTe, yTe) = shuffle(XAll, y, indices[nTrain..^1])
149 |   dumpSVMLightFile("dataset/ml-100k_user_item_feature_all.svm", XAll, y)
150 |   dumpSVMLightFile("dataset/ml-100k_user_item_feature_train.svm", Xtr, yTr)
151 |   dumpSVMLightFile("dataset/ml-100k_user_item_feature_test.svm", Xte, yTe)
152 | 
153 | 
154 | when isMainModule:
155 |   var client = newHttpClient()
156 |   if not existsFile("dataset/ml-100k.svm"):
157 |     if not existsFile("ml-100k.zip"):
158 |       echo("Download ml-100k data...")
159 |       client.downloadFile(fileurl, "ml-100k.zip")
160 |       echo("Done.")
161 | 
162 |     echo("Unzip...")
163 |     var z: ZipArchive
164 |     if not z.open("ml-100k.zip", fmRead):
165 |       echo "Open ml-100k.zip failed"
166 |       quit(1)
167 |     z.extractAll(".")
168 |     z.close()
169 |     echo("Done.")
170 |     
171 |     discard execShellCmd("mkdir -p dataset")
172 |     randomize(1)
173 |     var indices = toSeq(0..<nRatings)
174 |     shuffle(indices)
175 | 
176 |     echo("Create user-item rating matrix and transform it to svmlight format...")    
177 |     createUserItemDataset(indices)
178 |     echo("Done.")
179 | 
180 |     echo("Create dataset for factorization machines...")
181 |     createUserItemFeatureDataset(indices)
182 |     echo("Done.")
183 |     


--------------------------------------------------------------------------------
/benchmarks/ml100k/make_ml100k_dataset_field.nim:
--------------------------------------------------------------------------------
  1 | import httpclient, os, streams, tables, strutils, sequtils, parseutils
  2 | import zip/zipfiles, random
  3 | import nimfm/dataset, nimfm/utils
  4 | 
  5 | const nUsers = 943
  6 | const nItems = 1682
  7 | const nRatings = 100_000
  8 | const fileurl = "http://files.grouplens.org/datasets/movielens/ml-100k.zip"
  9 | const nGenres = 19
 10 | 
 11 | 
 12 | proc round(x: int): int =
 13 |   result = ((x+5) div 10)*10
 14 | 
 15 | 
 16 | proc createUserFeatureMatrix(): CSRFieldDataset = 
 17 |   var
 18 |     ages: array[nUsers, int]
 19 |     genders: array[nUsers, string]
 20 |     occupations: array[nUsers, string]
 21 |     zipcodes: array[nUsers, char]
 22 |     ageEncoder = newLabelEncoder[int]()
 23 |     genderEncoder = newLabelEncoder[string]()
 24 |     occupationEncoder = newLabelEncoder[string]()
 25 |     zipcodeEncoder = newLabelEncoder[char]()
 26 | 
 27 |   var i = 0
 28 |   for line in "ml-100k/u.user".lines:
 29 |     let infos = line.split("|")
 30 |     ages[i] = round(parseInt(infos[1]))
 31 |     genders[i] = infos[2]
 32 |     occupations[i] = infos[3]
 33 |     zipcodes[i] = infos[4][0]
 34 |     i += 1
 35 |   ageEncoder.fit(ages)
 36 |   genderEncoder.fit(genders)
 37 |   occupationEncoder.fit(occupations)
 38 |   zipcodeEncoder.fit(zipcodes)
 39 | 
 40 |   var nFeaturesUser = 0
 41 |   nFeaturesUser += len(ageEncoder.classes)
 42 |   nFeaturesUser += len(genderEncoder.classes)
 43 |   nFeaturesUser += len(occupationEncoder.classes)
 44 |   nFeaturesUser += len(zipcodeEncoder.classes)
 45 |   echo("   Number of user features: ", nFeaturesUser)
 46 |   var indices = newSeqWith(nRatings*4, 0)
 47 |   var indptr = newSeqWith(nRatings+1, 0)
 48 |   var fields = newSeqWith(nRatings*4, 0)
 49 |   var data = newSeqWith(nRatings*4, 1.0)
 50 |   indptr[0] = 0
 51 | 
 52 |   let agesEnc = ageEncoder.transformed(ages)
 53 |   let gendersEnc = genderEncoder.transformed(genders)
 54 |   let occupationsEnc = occupationEncoder.transformed(occupations)
 55 |   let zipcodesEnc = zipcodeEncoder.transformed(zipcodes)
 56 |   i = 0
 57 |   for line in "ml-100k/u.data".lines:
 58 |     if len(line) < 5:
 59 |       continue
 60 |     let j = parseInt(line.split("\t")[0])
 61 |     var offset = 0
 62 |     indices[i*4] = agesEnc[j-1]
 63 |     fields[i*4] = 0
 64 |     offset += len(ageEncoder.classes)
 65 | 
 66 |     indices[i*4+1] = gendersEnc[j-1]+offset
 67 |     fields[i*4+1] = 1
 68 |     offset += len(genderEncoder.classes)
 69 | 
 70 |     indices[i*4+2] = occupationsEnc[j-1] + offset
 71 |     fields[i*4+2] = 2
 72 |     offset += len(occupationEncoder.classes)
 73 | 
 74 |     indices[i*4+3] = zipcodesEnc[j-1]+offset
 75 |     fields[i*4+3] = 3
 76 |     i += 1
 77 |     indptr[i] = indptr[i-1] + 4
 78 |   result = newCSRFieldDataset(data=data, indices=indices, indptr=indptr,
 79 |                               fields=fields, nFields=4, nSamples=nRatings,
 80 |                               nFeatures=nFeaturesUser)
 81 | 
 82 | proc createItemFeatureMatrix(): CSRFieldDataset  = 
 83 |   var i = 0
 84 |   var years = newSeqWith(nItems, 0)
 85 |   for line in "ml-100k/u.item".lines:
 86 |     if len(line) < 5:
 87 |       continue
 88 |     let infos = line.split("|")
 89 |     try:
 90 |       years[i] = round(parseInt(infos[2].split("-")[^1]))
 91 |     except:
 92 |       years[i] = 0
 93 |     i += 1
 94 | 
 95 |   var yearEncoder = newLabelEncoder[int]()
 96 |   yearEncoder.fit(years)
 97 |   echo("   Number of item features: ", len(yearEncoder.classes)+nGenres)
 98 |   let yearsEnc = yearEncoder.transformed(years)
 99 |   let nFeaturesItems = nGenres + len(yearEncoder.classes)
100 | 
101 |   var Genres = newSeqWith(nItems, newSeqWith(nGenres, 0.0))
102 |   i = 0
103 |   for line in "ml-100k/u.item".lines:
104 |     if len(line) < 5:
105 |       continue
106 |     let infos = line.split("|")
107 |     for j, val in infos[^nGenres..^1]:
108 |       Genres[i][j] = parseFloat(val)
109 |     i += 1    
110 | 
111 |   var indptr = newSeqWith(nRatings+1, 0)
112 |   var indices = newSeqWith[int](0, 0)
113 |   var fields = newSeqWith[int](0, 0)
114 |   i = 0
115 |   var nnz = 0
116 |   for line in "ml-100k/u.data".lines:
117 |     if len(line) < 5:
118 |       continue
119 |     let item = parseInt(line.split("\t")[1])
120 |     fields.add(0)
121 |     indices.add(yearsEnc[item-1])
122 |     inc(nnz)
123 | 
124 |     for jj, val in Genres[item-1]:
125 |       if val != 0:
126 |         fields.add(1)
127 |         indices.add(jj+len(yearEncoder.classes))
128 |         inc(nnz)
129 |     inc(i)
130 |     indptr[i] = nnz
131 | 
132 |   var data = newSeqWith(nnz, 1.0)
133 |   result = newCSRFieldDataset(data=data, indices=indices, indptr=indptr,
134 |                               fields=fields, nFields=2, nSamples=nRatings,
135 |                               nFeatures=nFeaturesItems)
136 | 
137 | 
138 | proc createUserItemFeatureDataset(indices: openarray[int]) =
139 |   var
140 |     X: CSRDataset
141 |     y: seq[float64]
142 |     XField: CSRFieldDataset
143 |     XTr, XTe: CSRFieldDataset
144 |     yTr, yTe: seq[float64]
145 | 
146 |   loadUserItemRatingFile("ml-100k/u.data", X, y)
147 |   var indices = X.data.indices
148 |   var indptr = X.data.indptr
149 |   var data = X.data.data
150 |   var fields = newSeqWith(X.nSamples*2, 0)
151 |   for i in 0..<X.nSamples:
152 |     fields[2*i+1] = 1
153 |   XField = newCSRFieldDataset(data=data, indices=indices, indptr=indptr,
154 |                               fields=fields, nFields=2, nFeatures=X.nFeatures,
155 |                               nSamples=X.nSamples)
156 |   echo("  Create user feature matrix...")
157 |   let XUser = createUserFeatureMatrix()
158 |   echo("  Done.")
159 |   
160 |   echo("  Create item feature matrix...")
161 |   let XItem = createItemFeatureMatrix()
162 |   echo("  Done.")
163 | 
164 |   echo(XField.nFields, " ", XUser.nFields, " ", XItem.nFields)
165 |   let XAll = hstack(XField, XUser, XItem)
166 |   echo(XAll.nFields)
167 |   let nTrain = int(8*X.nSamples/10)
168 | 
169 |   var indicesShuffle = toSeq(0..<nRatings)
170 |   (XTr, yTr) = shuffle(XAll, y, indicesShuffle[0..<nTrain])
171 |   (XTe, yTe) = shuffle(XAll, y, indicesShuffle[nTrain..^1])
172 |   dumpFFMFile("dataset/ml-100k_user_item_feature_all.ffm", XAll, y)
173 |   dumpFFMFile("dataset/ml-100k_user_item_feature_train.ffm", XTr, yTr)
174 |   dumpFFMFile("dataset/ml-100k_user_item_feature_test.ffm", Xte, yTe)
175 | 
176 | 
177 | when isMainModule:
178 |   var client = newHttpClient()
179 |   if not existsFile("ml-100k.zip"):
180 |     echo("Download ml-100k data...")
181 |     client.downloadFile(fileurl, "ml-100k.zip")
182 |     echo("Done.")
183 | 
184 |   echo("Unzip...")
185 |   var z: ZipArchive
186 |   if not z.open("ml-100k.zip", fmRead):
187 |     echo "Open ml-100k.zip failed"
188 |     quit(1)
189 |   z.extractAll(".")
190 |   z.close()
191 |   echo("Done.")
192 |   
193 |   randomize(1)
194 |   var indices = toSeq(0..<nRatings)
195 |   shuffle(indices)
196 | 
197 |   echo("Create dataset for factorization machines...")
198 |   createUserItemFeatureDataset(indices)
199 |   echo("Done.")
200 |   


--------------------------------------------------------------------------------
/benchmarks/ml100k/matrix_factorization.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/dataset, nimfm/model
 2 | import nimfm/optimizer/cd
 3 | 
 4 | 
 5 | when isMainModule:
 6 |   var XTr, XTe: CSCDataset
 7 |   var yTr, yTe: seq[float64]
 8 |   loadSVMLightFile("dataset/ml-100k_user_item_train.svm", XTr, yTr, nFeatures=2625)
 9 |   loadSVMLightFile("dataset/ml-100k_user_item_test.svm", XTe, yTe, nFeatures=2625)
10 | 
11 |   var fm = newFactorizationMachine(task=regression)
12 |   var optim = newCD(maxIter=100,  beta=3e-4, alpha=1e-10, alpha0=1e-10)
13 |   optim.fit(Xtr, yTr, fm)
14 | 
15 |   echo("Train RMSE: ", fm.score(Xtr, yTr))
16 |   echo("Test RMSE: ", fm.score(Xte, yTe))
17 | 


--------------------------------------------------------------------------------
/benchmarks/ml100k/sparse_fm.nim:
--------------------------------------------------------------------------------
 1 | import utils
 2 | import nimfm/dataset, nimfm/model, nimfm/regularizer, nimfm/tensor, nimfm/tensor
 3 | import nimfm/optimizer/pcd
 4 | 
 5 | 
 6 | when isMainModule:
 7 |   var XTr, XTe: CSCDataset
 8 |   var yTr, yTe: seq[float64]
 9 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_train.svm",
10 |                     XTr, yTr, nFeatures=2703)
11 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_test.svm",
12 |                     XTe, yTe, nFeatures=2703)
13 | 
14 |   var reg = newSquaredL12()
15 |   let beta = 1e-5
16 |   let gamma = 1e-5
17 | 
18 |   var fm = newFactorizationMachine(
19 |     task=regression, warmStart=false, degree=2)
20 |   var optim = newPCD(
21 |     maxIter=100, verbose=1, beta=beta, alpha0=1e-10, alpha=1e-5,
22 |     gamma=gamma, reg=reg)
23 |   optim.fit(Xtr, yTr, fm)
24 | 
25 |   echo("L1 norm of the interaction matrix: ", norm(matmul(fm.P[0].T, fm.P[0]), 1))
26 |   echo("Number of used interactions: ", countInteractions(fm.P[0].T))
27 |   echo("Number of used features: ", countFeatures(fm.P[0].T))
28 |   echo("Train RMSE: ", fm.score(Xtr, yTr))
29 |   echo("Test RMSE: ", fm.score(Xte, yTe))
30 | 
31 |   #[
32 |   L1 norm of the interaction matrix: 2445.891815236551
33 |   Number of used interactions: 259713
34 |   Number of used features: 1316
35 |   Train RMSE: 0.8499253341126667
36 |   Test RMSE: 0.9190020120369393
37 | 
38 |   real	0m29.862s
39 |   user	0m29.613s
40 |   sys	0m0.149s
41 |   ]#
42 |  


--------------------------------------------------------------------------------
/benchmarks/ml100k/sparse_fm_bcd.nim:
--------------------------------------------------------------------------------
 1 | import utils
 2 | import nimfm/dataset, nimfm/model, nimfm/regularizer, nimfm/tensor
 3 | import nimfm/optimizer/pbcd
 4 | 
 5 | 
 6 | when isMainModule:
 7 |   var XTr, XTe: CSCDataset
 8 |   var yTr, yTe: seq[float64]
 9 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_train.svm",
10 |                     XTr, yTr, nFeatures=2703)
11 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_test.svm",
12 |                     XTe, yTe, nFeatures=2703)
13 | 
14 |   var reg = newSquaredL21()
15 |   let beta = 1e-5
16 |   let gamma = 1e-7
17 | 
18 |   var fm = newFactorizationMachine(task=regression)
19 |   var optim = newPBCD(
20 |     maxIter=100, shrink=false, maxSearch=0, verbose=1,
21 |     beta=beta, alpha0=1e-10, alpha=1e-6,
22 |     gamma=gamma, reg=reg)
23 |   optim.fit(Xtr, yTr, fm)
24 | 
25 |   echo("L1 norm of the interaction matrix: ", norm(matmul(fm.P[0].T, fm.P[0]), 1))
26 |   echo("Number of used interactions: ", countInteractions(fm.P[0].T))
27 |   echo("Number of used features: ", countFeatures(fm.P[0].T))
28 |   echo("Train RMSE: ", fm.score(Xtr, yTr))
29 |   echo("Test RMSE: ", fm.score(Xte, yTe))


--------------------------------------------------------------------------------
/benchmarks/ml100k/sparse_fm_fista.nim:
--------------------------------------------------------------------------------
 1 | import utils
 2 | import nimfm/dataset, nimfm/model, nimfm/regularizer, nimfm/tensor
 3 | import nimfm/optimizer/fista
 4 | 
 5 | 
 6 | when isMainModule:
 7 |   var XTr, XTe: CSRDataset
 8 |   var yTr, yTe: seq[float64]
 9 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_train.svm",
10 |                     XTr, yTr, nFeatures=2703)
11 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_test.svm",
12 |                     XTe, yTe, nFeatures=2703)
13 | 
14 |   var reg = newSquaredL12()
15 |   let beta = 1e-5
16 |   let gamma = 1e-5
17 | 
18 |   var fm = newFactorizationMachine(task=regression)
19 |   var optim = newFISTA(
20 |     maxIter=100, beta=beta, alpha0=1e-10, alpha=1e-10,
21 |     gamma=gamma, reg=reg)
22 |   optim.fit(Xtr, yTr, fm)
23 | 
24 |   echo("L1 norm of the interaction matrix: ", norm(matmul(fm.P[0].T, fm.P[0]), 1))
25 |   echo("Number of used interactions: ", countInteractions(fm.P[0].T))
26 |   echo("Number of used features: ", countFeatures(fm.P[0].T))
27 |   echo("Train RMSE: ", fm.score(Xtr, yTr))
28 |   echo("Test RMSE: ", fm.score(Xte, yTe))
29 | 
30 |   #[
31 |   L1 norm of the interaction matrix: 336.9777059560769
32 |   Number of used interactions: 2203550
33 |   Number of used features: 2660
34 |   Train RMSE: 0.9440180403202018
35 |   Test RMSE: 0.9478301070654702
36 | 
37 |   real	0m40.545s
38 |   user	0m40.420s
39 |   sys	0m0.050s
40 |   ]#
41 | 
42 |  


--------------------------------------------------------------------------------
/benchmarks/ml100k/sparse_fm_katyusha.nim:
--------------------------------------------------------------------------------
 1 | import utils
 2 | import nimfm/dataset, nimfm/model, nimfm/regularizer, nimfm/tensor
 3 | import nimfm/optimizer/katyusha
 4 | 
 5 | 
 6 | when isMainModule:
 7 |   var XTr, XTe: CSRDataset
 8 |   var yTr, yTe: seq[float64]
 9 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_train.svm",
10 |                     XTr, yTr, nFeatures=2703)
11 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_test.svm",
12 |                     XTe, yTe, nFeatures=2703)
13 | 
14 |   var reg = newSquaredL12()
15 |   let beta = 1e-5
16 |   let gamma = 1e-5
17 | 
18 |   var fm = newFactorizationMachine(task=regression)
19 |   var optim = newKatyusha(
20 |     eta=0.1, maxIter=30, beta=beta, alpha0=1e-10,
21 |     alpha=1e-10, gamma=gamma, reg=reg)
22 |   optim.fit(Xtr, yTr, fm)
23 | 
24 |   echo("L1 norm of the interaction matrix: ", norm(matmul(fm.P[0].T, fm.P[0]), 1))
25 |   echo("Number of used interactions: ", countInteractions(fm.P[0].T))
26 |   echo("Number of used features: ", countFeatures(fm.P[0].T))
27 |   echo("Train RMSE: ", fm.score(Xtr, yTr))
28 |   echo("Test RMSE: ", fm.score(Xte, yTe))
29 | 
30 |   #[
31 |   Number of used interactions: 3651753
32 |   Number of used features: 2703
33 |   Train RMSE: 0.9371118541076972
34 |   Test RMSE: 0.9422047401514687
35 | 
36 |   real	0m56.922s
37 |   user	0m56.695s
38 |   sys	0m0.080s
39 |   ]#


--------------------------------------------------------------------------------
/benchmarks/ml100k/sparse_fm_l1.nim:
--------------------------------------------------------------------------------
 1 | import utils
 2 | import nimfm/dataset, nimfm/model, nimfm/regularizer, nimfm/tensor
 3 | import nimfm/optimizer/pcd
 4 | import strformat
 5 | 
 6 | when isMainModule:
 7 |   var XTr, XTe: CSCDataset
 8 |   var yTr, yTe: seq[float64]
 9 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_train.svm",
10 |                     XTr, yTr, nFeatures=2703)
11 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_test.svm",
12 |                     XTe, yTe, nFeatures=2703)
13 | 
14 |   var reg = newL1()
15 |   let beta = 1e-5
16 |   for gamma in [1e-5, 2e-5, 4e-5, 8e-5, 16e-5]:
17 |     var fm = newFactorizationMachine(
18 |       task=regression, warmStart=false, degree=2)
19 |     var optim = newPCD(
20 |       maxIter=100, verbose=0, beta=beta, alpha0=1e-10, alpha=1e-5,
21 |       gamma=gamma, reg=reg)
22 |     optim.fit(Xtr, yTr, fm)
23 | 
24 |     echo(fmt"gamma = {gamma}")
25 |     echo("L1 norm of the interaction matrix: ", norm(matmul(fm.P[0].T, fm.P[0]), 1))
26 |     echo("Number of used interactions: ", countInteractions(fm.P[0].T))
27 |     echo("Number of used features: ", countFeatures(fm.P[0].T))
28 |     echo("Train RMSE: ", fm.score(Xtr, yTr))
29 |     echo("Test RMSE: ", fm.score(Xte, yTe))
30 |     echo()
31 | 
32 |     #[
33 |     gamma = 1e-05
34 |     L1 norm of the interaction matrix: 1309723.455536645
35 |     Number of used interactions: 3109179
36 |     Number of used features: 2587
37 |     Train RMSE: 0.4609503168064058
38 |     Test RMSE: 1.114549341939459
39 | 
40 |     gamma = 2e-05
41 |     L1 norm of the interaction matrix: 581084.7126608792
42 |     Number of used interactions: 2806571
43 |     Number of used features: 2583
44 |     Train RMSE: 0.5822531018523225
45 |     Test RMSE: 1.054355837686803
46 | 
47 |     gamma = 4e-05
48 |     L1 norm of the interaction matrix: 151744.6942830395
49 |     Number of used interactions: 2332458
50 |     Number of used features: 2541
51 |     Train RMSE: 0.7349862895593922
52 |     Test RMSE: 0.956525304081841
53 | 
54 |     gamma = 8.000000000000001e-05
55 |     L1 norm of the interaction matrix: 12329.76350545176
56 |     Number of used interactions: 1508208
57 |     Number of used features: 2340
58 |     Train RMSE: 0.8417132217173198
59 |     Test RMSE: 0.9303608420224291
60 | 
61 |     gamma = 0.00016
62 |     L1 norm of the interaction matrix: 81.99300072742886
63 |     Number of used interactions: 2080
64 |     Number of used features: 65
65 |     Train RMSE: 0.9103939645491346
66 |     Test RMSE: 0.9302782353900708
67 |     ]#


--------------------------------------------------------------------------------
/benchmarks/ml100k/sparse_fm_mbpsgd.nim:
--------------------------------------------------------------------------------
 1 | import utils
 2 | import nimfm/dataset, nimfm/model, nimfm/regularizer, nimfm/tensor
 3 | import nimfm/optimizer/minibatch_psgd
 4 | 
 5 | 
 6 | when isMainModule:
 7 |   var XTr, XTe: CSRDataset
 8 |   var yTr, yTe: seq[float64]
 9 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_train.svm",
10 |                     XTr, yTr, nFeatures=2703)
11 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_test.svm",
12 |                     XTe, yTe, nFeatures=2703)
13 |   var reg = newSquaredL12()
14 |   let beta = 1e-5
15 |   let gamma = 1e-5
16 | 
17 |   var fm = newFactorizationMachine(task=regression)
18 |   var mbpsgd = newMBPSGD(
19 |     maxIter=30, beta=beta, alpha0=1e-10, alpha=1e-10,
20 |     gamma=gamma, reg=reg)
21 |   mbpsgd.fit(Xtr, yTr, fm)
22 | 
23 |   echo("L1 norm of the interaction matrix: ", norm(matmul(fm.P[0].T, fm.P[0]), 1))
24 |   echo("Number of used interactions: ", countInteractions(fm.P[0].T))
25 |   echo("Number of used features: ", countFeatures(fm.P[0].T))
26 |   echo("Train RMSE: ", fm.score(Xtr, yTr))
27 |   echo("Test RMSE: ", fm.score(Xte, yTe))
28 |   
29 |   #[
30 |   Number of used interactions: 2190654
31 |   Number of used features: 2147
32 |   Train RMSE: 0.9453493094003254
33 |   Test RMSE: 0.9487633539711476
34 | 
35 |   real	0m30.019s
36 |   user	0m29.920s
37 |   sys	0m0.057s
38 |   ]#


--------------------------------------------------------------------------------
/benchmarks/ml100k/sparse_fm_nmapgd.nim:
--------------------------------------------------------------------------------
 1 | import utils
 2 | import nimfm/dataset, nimfm/model, nimfm/regularizer, nimfm/tensor
 3 | import nimfm/optimizer/nmapgd
 4 | 
 5 | when isMainModule:
 6 |   var XTr, XTe: CSRDataset
 7 |   var yTr, yTe: seq[float64]
 8 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_train.svm",
 9 |                     XTr, yTr, nFeatures=2703)
10 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_test.svm",
11 |                     XTe, yTe, nFeatures=2703)
12 |   var reg = newSquaredL12()
13 |   let beta = 1e-5
14 |   let gamma = 1e-5
15 | 
16 |   var fm = newFactorizationMachine(task=regression)
17 |   var optim = newNMAPGD(
18 |     maxIter=100, beta=beta, alpha0=1e-10, alpha=1e-10,
19 |     gamma=gamma, reg=reg, eta=0.8, sigma=0.1)
20 |   optim.fit(Xtr, yTr, fm)
21 |   
22 |   echo("L1 norm of the interaction matrix: ", norm(matmul(fm.P[0].T, fm.P[0]), 1))
23 |   echo("Number of used interactions: ", countInteractions(fm.P[0].T))
24 |   echo("Number of used features: ", countFeatures(fm.P[0].T))
25 |   echo("Train RMSE: ", fm.score(Xtr, yTr))
26 |   echo("Test RMSE: ", fm.score(Xte, yTe))
27 |   
28 |   #[
29 |   L1 norm of the interaction matrix: 419.4712207824275
30 |   Number of used interactions: 1600921
31 |   Number of used features: 2563
32 |   Train RMSE: 0.9578042428692431
33 |   Test RMSE: 0.9569467738334844
34 | 
35 |   real	0m49.066s
36 |   user	0m48.916s
37 |   sys	0m0.063s
38 |   ]#


--------------------------------------------------------------------------------
/benchmarks/ml100k/sparse_fm_pgd.nim:
--------------------------------------------------------------------------------
 1 | import utils
 2 | import nimfm/dataset, nimfm/model, nimfm/regularizer, nimfm/tensor
 3 | import nimfm/optimizer/pgd
 4 | 
 5 | 
 6 | when isMainModule:
 7 |   var XTr, XTe: CSRDataset
 8 |   var yTr, yTe: seq[float64]
 9 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_train.svm",
10 |                     XTr, yTr, nFeatures=2703)
11 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_test.svm",
12 |                     XTe, yTe, nFeatures=2703)
13 | 
14 |   var reg = newSquaredL12()
15 |   let beta = 1e-5
16 |   let gamma = 1e-5
17 | 
18 |   var fm = newFactorizationMachine(task=regression)
19 |   var pgd = newPGD(
20 |     maxIter=100, beta=beta, alpha0=1e-10, alpha=1e-10,
21 |     gamma=gamma, reg=reg)
22 |   pgd.fit(Xtr, yTr, fm)
23 | 
24 |   echo("L1 norm of the interaction matrix: ", norm(matmul(fm.P[0].T, fm.P[0]), 1))
25 |   echo("Number of used interactions: ", countInteractions(fm.P[0].T))
26 |   echo("Number of used features: ", countFeatures(fm.P[0].T))
27 |   echo("Train RMSE: ", fm.score(Xtr, yTr))
28 |   echo("Test RMSE: ", fm.score(Xte, yTe))
29 |   #[
30 |   L1 norm of the interaction matrix: 269.9612424535608
31 |   Number of used interactions: 3124578
32 |   Number of used features: 2701
33 |   Train RMSE: 1.045277114672563
34 |   Test RMSE: 1.037980185747495
35 | 
36 |   real	0m55.505s
37 |   user	0m55.381s
38 |   sys	0m0.053s
39 |   ]#


--------------------------------------------------------------------------------
/benchmarks/ml100k/sparse_fm_psgd.nim:
--------------------------------------------------------------------------------
 1 | import utils
 2 | import nimfm/dataset, nimfm/model, nimfm/regularizer, nimfm/tensor
 3 | import nimfm/optimizer/psgd
 4 | 
 5 | 
 6 | when isMainModule:
 7 |   var XTr, XTe: CSRDataset
 8 |   var yTr, yTe: seq[float64]
 9 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_train.svm",
10 |                     XTr, yTr, nFeatures=2703)
11 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_test.svm",
12 |                     XTe, yTe, nFeatures=2703)
13 | 
14 |   var reg = newSquaredL12()
15 |   let beta = 1e-5
16 |   let gamma = 1e-5
17 | 
18 |   var fm = newFactorizationMachine(task=regression)
19 |   var optim = newPSGD(
20 |     maxIter=100, beta=beta, alpha0=1e-10, alpha=1e-10,
21 |     gamma=gamma, reg=reg)
22 |   optim.fit(Xtr, yTr, fm)
23 | 
24 |   echo("L1 norm of the interaction matrix: ", norm(matmul(fm.P[0].T, fm.P[0]), 1))
25 |   echo("Number of used interactions: ", countInteractions(fm.P[0].T))
26 |   echo("Number of used features: ", countFeatures(fm.P[0].T))
27 |   echo("Train RMSE: ", fm.score(Xtr, yTr))
28 |   echo("Test RMSE: ", fm.score(Xte, yTe))
29 |  


--------------------------------------------------------------------------------
/benchmarks/ml100k/sparse_fm_squaredl12.nim:
--------------------------------------------------------------------------------
 1 | import utils
 2 | import nimfm/dataset, nimfm/model, nimfm/regularizer, nimfm/tensor, nimfm/tensor
 3 | import nimfm/optimizer/pcd
 4 | import strformat
 5 | 
 6 | 
 7 | when isMainModule:
 8 |   var XTr, XTe: CSCDataset
 9 |   var yTr, yTe: seq[float64]
10 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_train.svm",
11 |                     XTr, yTr, nFeatures=2703)
12 |   loadSVMLightFile("dataset/ml-100k_user_item_feature_test.svm",
13 |                     XTe, yTe, nFeatures=2703)
14 | 
15 |   var reg = newSquaredL12()
16 |   let beta = 1e-5
17 |   for gamma in [0.25e-5, 0.5e-5, 1e-5, 2e-5, 4e-5]:
18 |     var fm = newFactorizationMachine(
19 |       task=regression, warmStart=false, degree=2)
20 |     var optim = newPCD(
21 |       maxIter=100, verbose=0, beta=beta, alpha0=1e-10, alpha=1e-5,
22 |       gamma=gamma, reg=reg)
23 |     optim.fit(Xtr, yTr, fm)
24 | 
25 |     echo(fmt"gamma = {gamma}")
26 |     echo("L1 norm of the interaction matrix: ", norm(matmul(fm.P[0].T, fm.P[0]), 1))
27 |     echo("Number of used interactions: ", countInteractions(fm.P[0].T))
28 |     echo("Number of used features: ", countFeatures(fm.P[0].T))
29 |     echo("Train RMSE: ", fm.score(Xtr, yTr))
30 |     echo("Test RMSE: ", fm.score(Xte, yTe))
31 |     echo()
32 | 
33 |   #[
34 |   gamma = 2.5e-06
35 |   L1 norm of the interaction matrix: 10705.65361680202
36 |   Number of used interactions: 1038370
37 |   Number of used features: 2185
38 |   Train RMSE: 0.804163491026079
39 |   Test RMSE: 0.929679006965786
40 | 
41 |   gamma = 5e-06
42 |   L1 norm of the interaction matrix: 5209.316795226093
43 |   Number of used interactions: 608759
44 |   Number of used features: 1831
45 |   Train RMSE: 0.8271503104946665
46 |   Test RMSE: 0.9230673890845948
47 | 
48 |   gamma = 1e-05
49 |   L1 norm of the interaction matrix: 2445.891815236551
50 |   Number of used interactions: 259713
51 |   Number of used features: 1316
52 |   Train RMSE: 0.8499253341126667
53 |   Test RMSE: 0.9190020120369393
54 | 
55 |   gamma = 2e-05
56 |   L1 norm of the interaction matrix: 823.0821924538147
57 |   Number of used interactions: 65933
58 |   Number of used features: 697
59 |   Train RMSE: 0.8771468870630396
60 |   Test RMSE: 0.9192673067881156
61 | 
62 |   gamma = 4e-05
63 |   L1 norm of the interaction matrix: 188.0835552498489
64 |   Number of used interactions: 8242
65 |   Number of used features: 222
66 |   Train RMSE: 0.8976343460718412
67 |   Test RMSE: 0.9243847426000633
68 |   ]#


--------------------------------------------------------------------------------
/benchmarks/ml100k/user_item_bias.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/dataset, nimfm/model
 2 | import nimfm/optimizer/cd
 3 | 
 4 | 
 5 | 
 6 | when isMainModule:
 7 |   var XTr, XTe: CSCDataset
 8 |   var yTr, yTe: seq[float64]
 9 |   loadSVMLightFile("dataset/ml-100k_user_item_train.svm", XTr, yTr, nFeatures=2625)
10 |   loadSVMLightFile("dataset/ml-100k_user_item_test.svm", XTe, yTe, nFeatures=2625)
11 | 
12 |   var fm = newFactorizationMachine(task=regression, degree=1)
13 |   var optim = newCD(maxIter=100, alpha=1e-10, alpha0=1e-10)
14 |   optim.fit(Xtr, yTr, fm)
15 | 
16 |   echo("Train RMSE: ", fm.score(Xtr, yTr))
17 |   echo("Test RMSE: ", fm.score(Xte, yTe))
18 | 


--------------------------------------------------------------------------------
/benchmarks/ml100k/utils.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/tensor
 2 | 
 3 | # P: (nFeatures, nComponents)
 4 | # Too slow.
 5 | # By using a hash table, one can compute this more efficiently
 6 | proc countInteractions*(P: Matrix): int =
 7 |   result = 0
 8 |   for j1 in 0..<P.shape[0]-1:
 9 |     for j2 in j1+1..<P.shape[0]:
10 |       if dot(P[j1], P[j2]) != 0.0:
11 |         inc(result)
12 | 
13 | 
14 | proc countFeatures*(P: Matrix): int =
15 |   result = 0
16 |   for j1 in 0..<P.shape[0]:
17 |     var isSelected = 0
18 |     for j2 in 0..<P.shape[0]:
19 |       if j1 != j2 and dot(P[j1], P[j2]) != 0.0:
20 |         isSelected = 1
21 |         break
22 |     inc(result, isSelected)


--------------------------------------------------------------------------------
/nimfm.nimble:
--------------------------------------------------------------------------------
 1 | # Package
 2 | version       = "0.3.0"
 3 | author        = "Kyohei Atarashi"
 4 | description   = "nimfm: A library for factorization machines in Nim."
 5 | license       = "MIT"
 6 | srcDir        = "src"
 7 | 
 8 | 
 9 | # Dependencies
10 | requires "nim >= 1.0.6", "cligen >= 0.9.43", "nimlapack >= 0.2.0"
11 | 
12 | # Compile and create binary in ./bin for end users
13 | task make, "builds nimfm":
14 |   exec "mkdir -p bin"
15 |   exec "nim c -o:bin/nimfm -d:release -d:danger --threads:on ./src/nimfm.nim"
16 |   exec "nim c -o:bin/nimfm_cfm -d:release -d:danger --threads:on ./src/nimfm_cfm.nim"
17 |   exec "nim c -o:bin/nimfm_sparsefm -d:release -d:danger --threads:on ./src/nimfm_sparsefm.nim"
18 | 
19 | 


--------------------------------------------------------------------------------
/src/nimfm/extmath.nim:
--------------------------------------------------------------------------------
  1 | import strformat, math
  2 | import tensor/tensor, tensor/sparse, tensor/sparse_stream, dataset
  3 | 
  4 | type
  5 |   RowData = RowDataset|RowMatrix|StreamRowMatrix
  6 | 
  7 |   ColData = ColDataset|ColMatrix|StreamColMatrix
  8 | 
  9 | 
 10 | proc matmul*[T: RowData](D: Matrix, S: T, R: var Matrix) =
 11 |   let
 12 |     n1 = D.shape[1]
 13 |     n2 = S.shape[0]
 14 |   if n1 != n2:
 15 |     let msg = fmt"D.shape[1] {n1} != shape[0] {n2}."
 16 |     raise newException(ValueError, msg)
 17 | 
 18 |   R[0..^1, 0..^1] = 0.0
 19 |   for m in 0..<D.shape[0]:
 20 |     for i in 0..<S.shape[0]:
 21 |       for (j, val) in S.getRow(i):
 22 |         R[m, j] += D[m, i] * val
 23 |       
 24 | 
 25 | proc matmul*[T: ColData](D: Matrix, S: T, R: var Matrix) =
 26 |   let
 27 |     n1 = D.shape[1]
 28 |     n2 = S.shape[0]
 29 |   if n1 != n2:
 30 |     let msg = fmt"D.shape[1] {n1} != S.shape[0] {n2}."
 31 |     raise newException(ValueError, msg)
 32 | 
 33 |   R[0..^1, 0..^1] = 0.0
 34 |   for m in 0..<D.shape[0]:
 35 |     for j in 0..<S.shape[1]:
 36 |       for (i, val) in S.getCol(j):
 37 |         R[m, j] += D[m, i] * val
 38 |  
 39 | 
 40 | proc matmul*[T: RowData](S: T, D: Matrix, R: var Matrix) =
 41 |   let
 42 |     n1 = D.shape[0]
 43 |     n2 = S.shape[1]
 44 |   if n1 != n2:
 45 |     let msg = fmt"S.shape[1] {n2} != D.shape[0] {n1}."
 46 |     raise newException(ValueError, msg) 
 47 |   R[0..^1, 0..^1] = 0.0
 48 | 
 49 |   for i in 0..<S.shape[0]:
 50 |     for (j, val) in S.getRow(i):
 51 |       for n in 0..<D.shape[1]:
 52 |         R[i, n] += val * D[j, n]
 53 | 
 54 | 
 55 | proc matmul*[T: ColData](S: T, D: Matrix, R: var Matrix) =
 56 |   let
 57 |     n1 = D.shape[0]
 58 |     n2 = S.shape[1]
 59 |   if n1 != n2:
 60 |     let msg = fmt"S.shape[1] {n2} != D.shape[0] {n1}."
 61 |     raise newException(ValueError, msg) 
 62 | 
 63 |   R[0..^1, 0..^1] = 0.0
 64 |   for j in 0..<S.shape[1]:
 65 |     for (i, val) in S.getCol(j):
 66 |       for n in 0..<D.shape[1]:
 67 |         R[i, n] += val * D[j, n]
 68 | 
 69 | 
 70 | proc matmul*[T: RowData|ColData](D: Matrix, S: T): Matrix =
 71 |   new(result)
 72 |   result = zeros([D.shape[0], S.shape[1]])
 73 |   matmul(D, S, result)
 74 | 
 75 | 
 76 | proc matmul*[T: RowData|ColData](S: T, D: Matrix): Matrix =
 77 |   new(result)
 78 |   result = zeros([S.shape[0], D.shape[1]])
 79 |   matmul(S, D, result)
 80 | 
 81 | 
 82 | proc mvmul*[T: RowData](S: T, vec: Vector, result: var Vector) =
 83 |   if S.shape[1] != len(vec):
 84 |     let msg = fmt"shape[1] {S.shape[1]} != len(vec){len(vec)}."
 85 |     raise newException(ValueError, msg)
 86 | 
 87 |   for i in 0..<S.shape[0]:
 88 |     result[i] = 0.0
 89 |     for (j, val) in S.getRow(i):
 90 |       result[i] += val * vec[j]
 91 | 
 92 | 
 93 | proc mvmul*[T: ColData](S: T, vec: Vector, result: var Vector) =
 94 |   if S.shape[1] != len(vec):
 95 |     let msg = fmt"shape[1] {S.shape[1]} != len(vec){len(vec)}."
 96 |     raise newException(ValueError, msg)
 97 | 
 98 |   result[0..^1] = 0.0
 99 |   for j in 0..<S.shape[1]:
100 |     for (i, val) in S.getCol(j):
101 |       result[i] += val * vec[j]
102 | 
103 | 
104 | proc vmmul*[T: RowData](vec: Vector, S: T, result: var Vector) =
105 |   if S.shape[0] != len(vec):
106 |     let msg = fmt"len(vec){len(vec)} != shape[0] {S.shape[0]}."
107 |     raise newException(ValueError, msg)
108 | 
109 |   result[0..^1] = 0.0
110 |   for i in 0..<S.shape[0]:
111 |     for (j, val) in S.getRow(i):
112 |       result[j] += val * vec[i]
113 | 
114 | 
115 | proc vmmul*[T: ColData](vec: Vector, S: T, result: var Vector) =
116 |   if S.shape[0] != len(vec):
117 |     let msg = fmt"len(vec){len(vec)} != shape[0] {S.shape[0]}."
118 |     raise newException(ValueError, msg)
119 | 
120 |   for j in 0..<S.shape[1]:
121 |     result[j] = 0
122 |     for (i, val) in S.getCol(j):
123 |       result[j] += val * vec[i]
124 | 
125 | 
126 | proc mvmul*[T: CSRDataset|CSCDataset|CSRMatrix|CSCMatrix](S: T, vec: Vector): Vector =
127 |   result = zeros([S.shape[0]])
128 |   mvmul(S, vec, result)
129 | 
130 | 
131 | proc vmmul*[T: CSRDataset|CSCDataset|CSRMatrix|CSCMatrix](vec: Vector, S: T): Vector =
132 |   result = zeros([S.shape[1]])
133 |   vmmul(vec, S, result)
134 | 
135 | 
136 | proc norm*[T: RowData](X: T, p=1, axis=0): Vector =
137 |   if axis == 0:
138 |     result = zeros([X.shape[1]])
139 |     for i in 0..<X.shape[0]:
140 |       for (j, val) in X.getRow(i):
141 |         result[j] += val ^ p
142 |   elif axis == 1:
143 |     result = zeros([X.shape[0]])
144 |     for i in 0..<X.shape[0]:
145 |       for (j, val) in X.getRow(i):
146 |         result[i] += val ^ p
147 |   for i in 0..<len(result):
148 |     result[i] = pow(result[i], 1.0 / float(p))
149 | 
150 | 
151 | proc norm*[T: ColData](X: T, p=1, axis=0): Vector =
152 |   if axis == 0:
153 |     result = zeros([X.shape[1]])
154 |     for j in 0..<X.shape[1]:
155 |       for (i, val) in X.getCol(j):
156 |         result[j] += val ^ p
157 |   elif axis == 1:
158 |     result = zeros([X.shape[0]])
159 |     for j in 0..<X.shape[1]:
160 |       for (i, val) in X.getCol(j):
161 |         result[i] += val ^ p
162 |   for i in 0..<len(result):
163 |     result[i] = pow(result[i], 1.0 / float(p))
164 |   


--------------------------------------------------------------------------------
/src/nimfm/kernels.nim:
--------------------------------------------------------------------------------
 1 | import dataset, tensor/tensor, math
 2 | 
 3 | 
 4 | proc linear*[T, U](X: ColDataset, w: T, kernel: var U) =
 5 |   let nFeatures = X.nFeatures
 6 |   let nSamples = X.nSamples
 7 |   for i in 0..<nSamples:
 8 |     kernel[i] = 0.0
 9 |   for j in 0..<nFeatures:
10 |     for (i, val) in X.getCol(j):
11 |       kernel[i] += val * w[j]
12 | 
13 | 
14 | proc linear*[T, U](X: RowDataset, w: T, kernel: var U) =
15 |   let nSamples = X.nSamples
16 |   for i in 0..<nSamples:
17 |     kernel[i] = 0.0
18 |     for (j, val) in X.getRow(i):
19 |       kernel[i] += w[j] * val
20 | 
21 | 
22 | proc anova*(X: ColDataset, P: Matrix, A: var Matrix,  
23 |             degree, s: int) =
24 |   let nSamples = X.nSamples
25 |   let nFeatures = X.nFeatures
26 |   for i in 0..<nSamples:
27 |     for t in 1..<degree+1:
28 |       A[i, t] = 0.0
29 |     A[i, 0] = 1.0
30 | 
31 |   if degree != 2:
32 |     for j in 0..<nFeatures:
33 |       for (i, val) in X.getCol(j):
34 |         for t in 0..<degree:
35 |           A[i, degree-t] += A[i, degree-t-1] * P[s, j] * val
36 |   else:
37 |     for j in 0..<nFeatures:
38 |       for (i, val) in X.getCol(j):
39 |         A[i, 1] += P[s, j] * val
40 |         A[i, 2] += (P[s, j]*val)^2
41 |     # finalize
42 |     for i in 0..<nSamples:
43 |       A[i, 2] = (A[i, 1]^2 - A[i, 2])/2.0
44 | 
45 | 
46 | proc anova*(X: RowDataset, P: Matrix, A: var Matrix, 
47 |             degree, s: int) =
48 |   let nSamples = X.nSamples
49 |   for i in 0..<nSamples:
50 |     for t in 1..<degree+1:
51 |       A[i, t] = 0.0
52 |     A[i, 0] = 1.0
53 |   
54 |   if degree != 2:
55 |     for i in 0..<nSamples:
56 |       for (j, val) in X.getRow(i):
57 |         for t in 0..<degree:
58 |           A[i, degree-t] += A[i, degree-t-1] * P[s, j] * val
59 |   else:
60 |     for i in 0..<nSamples:
61 |       for (j, val) in X.getRow(i):
62 |         A[i, 1] += P[s, j] * val
63 |         A[i, 2] += (P[s, j] * val)^2
64 |       A[i, 2] = (A[i, 1]^2 - A[i, 2])/2.0
65 | 
66 | 
67 | proc poly*(X: ColDataset, P: Matrix, A: var Matrix,  
68 |            degree, s: int) =
69 |   let nSamples = X.nSamples
70 |   let nFeatures = X.nFeatures
71 |   A[0..^1, 1..^1] = 0.0
72 |   A[0..^1, 0] = 1.0
73 |   for j in 0..<nFeatures:
74 |     for (i, val) in X.getCol(j):
75 |         A[i, 1] += P[s, j] * val
76 | 
77 |   for i in 0..<nSamples:
78 |     for order in 2..<degree+1:
79 |       A[i, order] = A[i, order-1]*A[i, 1]
80 | 
81 | 
82 | proc poly*(X: RowDataset, P: Matrix, A: var Matrix, 
83 |             degree, s: int) =
84 |   let nSamples = X.nSamples
85 | 
86 |   A[0..^1, 0..^1] = 0.0
87 |   A[0..^1, 0] = 1.0
88 | 
89 |   for i in 0..<nSamples:
90 |     for (j, val) in X.getRow(i):
91 |       A[i, 1] += P[s, j] * val
92 |   
93 |   for i in 0..<nSamples:
94 |     for order in 2..<degree+1:
95 |       A[i, order] = A[i, order-1]*A[i, 1]
96 | 


--------------------------------------------------------------------------------
/src/nimfm/loss.nim:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | type
  4 |   Squared* = ref object
  5 | 
  6 |   SquaredHinge* = ref object
  7 | 
  8 |   Logistic* = ref object
  9 | 
 10 | 
 11 |   Huber* = ref object
 12 |     threshold: float64
 13 | 
 14 | 
 15 | proc newSquared*(): Squared = new(Squared)
 16 | 
 17 | 
 18 | proc loss*(self: Squared, y, p: float64): float64 = 0.5 * (y-p)^2
 19 | 
 20 | 
 21 | proc dloss*(self: Squared, y, p: float64): float64 = p-y
 22 | 
 23 | 
 24 | proc ddloss*(self: Squared, y, p: float64): float64 = 1.0
 25 | 
 26 | 
 27 | proc mu*(self: Squared): float64 = 1.0
 28 | 
 29 | 
 30 | proc newSquaredHinge*(): SquaredHinge = new(SquaredHinge)
 31 | 
 32 | 
 33 | proc loss*(self: SquaredHinge, y, p: float64): float64 = max(1-p*y, 0)^2
 34 | 
 35 | 
 36 | proc dloss*(self: SquaredHinge, y, p: float64): float64 =
 37 |   let z = 1-p*y
 38 |   if z > 0: result = -2*y*z
 39 |   else: result = 0.0
 40 | 
 41 | 
 42 | proc ddloss*(self: SquaredHinge, y, p: float64): float64 =
 43 |   let z = 1-p*y
 44 |   if z > 0: result = 2.0
 45 |   else: result = 0.0
 46 | 
 47 | 
 48 | proc mu*(self: SquaredHinge): float64 = 2.0
 49 | 
 50 | 
 51 | proc newLogistic*(): Logistic = new(Logistic)
 52 | 
 53 | 
 54 | proc loss*(self: Logistic, y, p: float64): float64 =
 55 |   let z = p * y
 56 |   if z > 0:
 57 |     result = ln(1+exp(-z))
 58 |   else:
 59 |     result = ln(exp(z)+1) - z
 60 | 
 61 | 
 62 | proc dloss*(self: Logistic, y, p: float64): float64 =
 63 |   let z = p * y
 64 |   if z > 0:
 65 |     result =  -y * exp(-z) / (1+exp(-z))
 66 |   else:
 67 |     result =  -y / (exp(z)+1)
 68 | 
 69 | 
 70 | proc ddloss*(self: Logistic, y, p: float64): float64 =
 71 |   let z = p*y
 72 |   if z > 0:
 73 |     result =  exp(-z) / ((1+exp(-z))^2)
 74 |   else:
 75 |     result = exp(z) / ((1+exp(z))^2)
 76 | 
 77 | 
 78 | proc mu*(self: Logistic): float64 = 0.25
 79 | 
 80 | 
 81 | proc newHuber*(threshold=1.0): Huber = Huber(threshold: threshold)
 82 | 
 83 | 
 84 | proc loss*(self: Huber, y, p: float64): float64 =
 85 |   let z = abs(y - p)
 86 |   if z < self.threshold: result = 0.5 * z^2
 87 |   else: result = self.threshold * (z - 0.5*self.threshold)
 88 | 
 89 | 
 90 | proc dloss*(self: Huber, y, p: float64): float64 =
 91 |   let z = abs(y-p)
 92 |   if z < self.threshold: result = y - p
 93 |   else: result = self.threshold
 94 | 
 95 | 
 96 | proc ddloss*(self: Huber, y, p: float64): float64 =
 97 |   let z = abs(y-p)
 98 |   if z < self.threshold: result = 1.0
 99 |   else: result = 0.0
100 | 
101 | 
102 | proc mu*(self: Huber): float64 = 1.0
103 | 


--------------------------------------------------------------------------------
/src/nimfm/metrics.nim:
--------------------------------------------------------------------------------
  1 | import math, sequtils, algorithm, sugar
  2 | import utils
  3 | 
  4 | 
  5 | proc rmse*(yTrue, yScore: seq[float64]): float64 =
  6 |   ## Returns root mean squared error.
  7 |   if len(yTrue) != len(yScore):
  8 |     let msg = "len(yScore)=" & $len(yScore) & ", but len(yTrue)=" & $len(yTrue)
  9 |     raise newException(ValueError, msg)
 10 |   result = 0.0
 11 |   for (val1, val2) in zip(yScore, yTrue):
 12 |     result += pow(val1-val2, 2)
 13 |   result = sqrt(result / float(len(yTrue)))
 14 | 
 15 | 
 16 | proc r2*(yTrue, yScore: seq[float64]): float64 =
 17 |   ## Returns r2 score (the coefficient of determination).
 18 |   if len(yTrue) != len(yScore):
 19 |     let msg = "len(yScore)=" & $len(yScore) & ", but len(yTrue)=" & $len(yTrue)
 20 |     raise newException(ValueError, msg)
 21 | 
 22 |   let nSamples = yTrue.len
 23 |   var res = 0.0
 24 |   for (target, score) in zip(yTrue, yScore):
 25 |     res += (target-score)^2
 26 | 
 27 |   if res == 0:
 28 |     result = 1.0
 29 |   else:
 30 |     let mean = sum(yTrue) / float(nSamples)
 31 |     let tot = sum(yTrue.map(x=>(x-mean)^2))
 32 |     if tot != 0.0:
 33 |       result = 1.0 - res / tot
 34 |     else:
 35 |       echo("All instances have same target value.")
 36 |       result = 0.0
 37 | 
 38 | 
 39 | proc accuracy*(yTrue, yPred: seq[int]): float64 =
 40 |   ## Returns accuracy.
 41 |   if len(yPred) != len(yTrue):
 42 |     let msg = "len(yPred)=" & $len(yPred) & ", but len(yTrue)=" & $len(yTrue)
 43 |     raise newException(ValueError, msg)
 44 |   result = 0.0
 45 |   for (val1, val2) in zip(yPred, yTrue):
 46 |     result += float(val1 == val2)
 47 |   result /= float(len(yPred))
 48 | 
 49 | 
 50 | proc precisionRecallFscore*(yTrue, yPred: seq[int], pos=1):
 51 |                             tuple[prec, recall, fscore: float64] =
 52 |   ## Returns precision, recall, and F1-score for "binary classification".
 53 |   var
 54 |     tp, fp, tn, fn: float64
 55 |   if len(yPred) != len(yTrue):
 56 |     let msg = "len(yPred)=" & $len(yPred) & ", but len(yTrue)=" & $len(yTrue)
 57 |     raise newException(ValueError, msg)
 58 |   
 59 |   let nUnique = len(deduplicate(yTrue))
 60 |   if nUnique > 2:
 61 |     echo("yTrue has " & $nUnique & " unique values. " & 
 62 |          "All values that are not " & $pos & " are regarded as.")
 63 |   for (target, pred) in zip(yTrue, yPred):
 64 |     if target == pos:
 65 |       if pred == pos: tp += 1.0
 66 |       else: fn += 1.0
 67 |     else:
 68 |       if pred == pos: fp += 1.0
 69 |       else: tn += 1.0
 70 |   let prec = if (tp+fp) != 0: tp / (tp+fp) else: 0.0
 71 |   let recall = if (tp+fn) != 0: tp / (tp+fn) else: 0.0
 72 |   let fscore = if (prec+recall) != 0: 2*prec*recall/(prec+recall) else: 0
 73 |   result = (prec, recall, fscore)
 74 | 
 75 | 
 76 | proc rocauc*(yTrue: seq[int], yScore: seq[float64], pos:int = 1): float64 =
 77 |   ## Returns the area under the receiver operating characteristic curve
 78 |   ## for "binary classification".
 79 |   let indicesSorted = argsort(yScore, SortOrder.Descending)
 80 |   result = 0.0
 81 |   if len(yTrue) != len(yScore):
 82 |     let msg = "len(yScore)=" & $len(yScore) & ", but len(yTrue)=" & $len(yTrue)
 83 |     raise newException(ValueError, msg)
 84 |   var
 85 |     fp, tp, fpPrev, tpPrev: int
 86 |     scorePrev: float64 = NegInf
 87 |     np, nn: int
 88 |   for i in indicesSorted:
 89 |     if yScore[i] != scorePrev:
 90 |       result += float((fp - fpPrev) * (tp + tpPrev)) / 2.0
 91 |       scorePrev = yScore[i]
 92 |       fpPrev = fp
 93 |       tpPrev = tp
 94 |     
 95 |     if yTrue[i] == pos: 
 96 |       np += 1
 97 |       tp += 1
 98 |     else:
 99 |       nn += 1
100 |       fp += 1
101 | 
102 |   result += float((fp - fpPrev) * (tp + tpPrev)) / 2.0
103 |   result /= float(nn*np)
104 | 


--------------------------------------------------------------------------------
/src/nimfm/model.nim:
--------------------------------------------------------------------------------
1 | import ./model/models
2 | export models


--------------------------------------------------------------------------------
/src/nimfm/model/convex_factorization_machine.nim:
--------------------------------------------------------------------------------
  1 | import ../tensor/tensor, ../kernels, fm_base
  2 | import strutils, parseutils, sequtils, algorithm, typetraits
  3 | 
  4 | 
  5 | type
  6 |   ConvexFactorizationMachineObj* = object
  7 |     task*: TaskKind         ## regression or classification.
  8 |     degree*: int            ## Degree of the polynomial, 2.
  9 |     maxComponents*: int     ## Maximum number of basis vectors.
 10 |     fitIntercept*: bool     ## Whether to fit intercept (a.k.a bias) term.
 11 |     fitLinear*: bool        ## Whether to fit linear term.
 12 |     ignoreDiag*: bool       ## Whether ignored diag (FM) or not (PN).
 13 |     warmStart*: bool        ## Whether to do warwm start fitting.
 14 |     isInitialized*: bool
 15 |     P*: Matrix              ## Weights for the polynomial.
 16 |                             ## shape (nComponents, nFeatures)
 17 |     lams*: Vector           ## Weight for vectors in basis.
 18 |                             ## shape: (nComponents)
 19 |     w*: Vector              ## Weigths for linear term, shape: (nFeatures)
 20 |     intercept*: float64     ## Intercept term.
 21 | 
 22 |   ConvexFactorizationMachine* = ref ConvexFactorizationMachineObj
 23 | 
 24 | 
 25 | proc newConvexFactorizationMachine*(
 26 |   task: TaskKind, maxComponents = 30, fitIntercept = true, fitLinear = true,
 27 |   ignoreDiag=true, warmStart = false): ConvexFactorizationMachine =
 28 |   ## Create a new ConvexFactorizationMachine.
 29 |   ## task: classification or regression.
 30 |   ## maxComponents: Maximum number of basis vectors.
 31 |   ## fitIntercept: Whether to fit intercept (a.k.a bias) term or not.
 32 |   ## fitLinear: Whether to fit linear term or not.
 33 |   ## warmStart: Whether to do warwm start fitting or not.
 34 |   new(result)
 35 |   result.task = task
 36 |   result.degree = 2
 37 |   if maxComponents < 1:
 38 |     raise newException(ValueError, "maxComponents < 1.")
 39 |   result.maxComponents = maxComponents
 40 |   result.fitIntercept = fitIntercept
 41 |   result.fitLinear = fitLinear
 42 |   result.ignoreDiag = ignoreDiag
 43 |   result.warmStart = warmStart
 44 |   result.degree = 2
 45 |   result.isInitialized = false
 46 |   result.lams = zeros([0])
 47 | 
 48 | 
 49 | proc init*[Dataset](self: ConvexFactorizationMachine, X: Dataset, 
 50 |                     force=false) =
 51 |   ## Initializes the factorization machine.
 52 |   ## self will not be initialized if force=false, self is already initialized,
 53 |   ## and warmStart=true.
 54 |   if force or not (self.warmStart and self.isInitialized):
 55 |     let nFeatures: int = X.nFeatures
 56 |     self.w = zeros([nFeatures])
 57 |     self.P = zeros([0, nFeatures])
 58 |     self.lams = zeros([0])
 59 |     self.intercept = 0.0
 60 |   self.isInitialized = true
 61 | 
 62 | 
 63 | proc decisionFunction*[Dataset](self: ConvexFactorizationMachine, 
 64 |                                 X: Dataset): seq[float64] =
 65 |   ## Returns the model outputs as seq[float64].
 66 |   self.checkInitialized()
 67 |   let nSamples: int = X.nSamples
 68 |   let nFeatures = X.nFeatures
 69 |   var A = zeros([nSamples, 3])
 70 |   result = newSeqWith(nSamples, 0.0)
 71 | 
 72 |   linear(X, self.w, result)
 73 |   for i in 0..<nSamples:
 74 |     result[i] += self.intercept
 75 | 
 76 |   if (nFeatures != self.P.shape[1]):
 77 |     raise newException(ValueError, "Invalid nFeatures.")
 78 |   for s in 0..<self.P.shape[0]:
 79 |     if self.ignoreDiag:
 80 |       anova(X, self.P, A, 2, s)
 81 |     else:
 82 |       poly(X, self.P, A, 2, s)
 83 |     for i in 0..<nSamples:
 84 |       result[i] += self.lams[s]*A[i, 2]
 85 | 
 86 | 
 87 | proc dump*(self: ConvexFactorizationMachine, fname: string) =
 88 |   ## Dumps the fitted factorization machine.
 89 |   self.checkInitialized()
 90 |   let nComponents = self.P.shape[0]
 91 |   let nFeatures = self.P.shape[1]
 92 |   var f: File = open(fname, fmWrite)
 93 |   f.writeLine("task: ", $self.task)
 94 |   f.writeLine("nFeatures: ", nFeatures)
 95 |   f.writeLine("degree: ", 2)
 96 |   f.writeLine("nComponents: ", self.P.shape[0])
 97 |   f.writeLine("maxComponents: ", self.maxComponents)
 98 |   f.writeLine("fitIntercept: ", self.fitIntercept)
 99 |   f.writeLine("fitLinear: ", self.fitLinear)
100 |   f.writeLine("lams:")
101 |   f.writeLine(self.lams.join(" "))
102 |   f.writeLine("P:")
103 |   for s in 0..<nComponents:
104 |     f.writeLine(self.P[s].join(" "))
105 |   f.writeLine("w:")
106 |   f.writeLine(self.w.join(" "))
107 |   f.writeLine("intercept: ", self.intercept)
108 |   f.close()
109 | 
110 | 
111 | proc load*(fm: var ConvexFactorizationMachine, fname: string,
112 |            warmStart: bool) =
113 |   ## Loads the fitted convex factorization machine.
114 |   new(fm)
115 |   var f: File = open(fname, fmRead)
116 |   var nFeatures: int
117 |   var nComponents: int
118 |   var degree: int
119 |   fm.task = parseEnum[TaskKind](f.readLine().split(" ")[1])
120 |   discard parseInt(f.readLine().split(" ")[1], nFeatures, 0)
121 |   discard parseInt(f.readLine().split(" ")[1], degree, 0)
122 |   discard parseInt(f.readLine().split(" ")[1], nComponents, 0)
123 |   discard parseInt(f.readLine().split(" ")[1], fm.maxComponents, 0)
124 |   fm.fitIntercept = parseBool(f.readLine().split(" ")[1])
125 |   fm.fitLinear = parseBool(f.readLine().split(" ")[1])
126 |   fm.warmStart = warmStart
127 | 
128 |   var i = 0
129 |   var val: float64
130 | 
131 |   # read lams
132 |   discard f.readLine() # read "lams:"
133 |   let line_lams = f.readLine()
134 |   i = 0
135 |   fm.lams = zeros([nComponents])
136 | 
137 |   for s in 0..<nComponents:
138 |     i.inc(parseFloat(line_lams, val, i))
139 |     i.inc()
140 |     fm.lams[s] = val
141 |   # read P
142 |   fm.P = zeros([nComponents, nFeatures])
143 |   discard f.readLine() # read "P[order]:" and discard it
144 |   for s in 0..<nComponents:
145 |     let line = f.readLine()
146 |     i = 0
147 |     for j in 0..<nFeatures:
148 |       i.inc(parseFloat(line, val, i))
149 |       i.inc()
150 |       fm.P[s, j] = val
151 |   fm.w = zeros([nFeatures])
152 | 
153 |   # read w
154 |   discard f.readLine()
155 |   let line_w = f.readLine()
156 |   i = 0
157 |   for j in 0..<nFeatures:
158 |     i.inc(parseFloat(line_w, val, i))
159 |     i.inc()
160 |     fm.w[j] = val
161 |   # read intercept
162 |   discard parseFloat(f.readLine().split(" ")[1], fm.intercept, 0)
163 |   f.close()
164 |   fm.isInitialized = true
165 | 


--------------------------------------------------------------------------------
/src/nimfm/model/field_aware_factorization_machine.nim:
--------------------------------------------------------------------------------
  1 | import ../tensor/tensor, fm_base, ../dataset
  2 | import random, sequtils, strutils, parseutils, typetraits
  3 | 
  4 | 
  5 | type
  6 |   FieldAwareFactorizationMachineObj* = object
  7 |     task*: TaskKind         ## regression or classification.
  8 |     nComponents*: int       ## Number of basis vectors (rank hyperparameter).
  9 |     fitIntercept*: bool     ## Whether to fit intercept (a.k.a bias) term.
 10 |     fitLinear*: bool        ## Whether to fit linear term.
 11 |     warmStart*: bool        ## Whether to do warwm start fitting.
 12 |     randomState*: int       ## The seed of the pseudo random number generator.
 13 |     scale*: float64         ## The scale (a.k.a std) of Normal distribution for
 14 |                             ## initialization of higher-order weights.
 15 |     isInitialized*: bool
 16 |     P*: Tensor              ## Weights for the polynomial.
 17 |                             ## shape (nFields, nFeatures, nComponents)
 18 |     w*: Vector              ## Weigths for linear term, shape: (nFeatures)
 19 |     intercept*: float64     ## Intercept term.
 20 | 
 21 |   FieldAwareFactorizationMachine* = ref FieldAwareFactorizationMachineObj
 22 | 
 23 | 
 24 | proc newFieldAwareFactorizationMachine*(
 25 |   task: TaskKind, nComponents = 10, fitIntercept = true, fitLinear = true,
 26 |   warmStart = false, randomState = 1, scale = 0.01): FieldAwareFactorizationMachine =
 27 |   ## Create a new FactorizationMachine.
 28 |   ## task: classification or regression.
 29 |   ## nComponents: Number of basis vectors (rank hyperparameter).
 30 |   ## fitIntercept: Whether to fit intercept (a.k.a bias) term or not.
 31 |   ## fitLinear: Whether to fit linear term or not.
 32 |   ## warmStart: Whether to do warwm start fitting or not.
 33 |   ## randomState: The seed of the pseudo random number generator.
 34 |   ## scale: The scale (a.k.a std) of the Gaussian distribution for initialization
 35 |   ##        of higher-order weights.
 36 |   new(result)
 37 |   result.task = task
 38 |   if nComponents < 1:
 39 |     raise newException(ValueError, "nComponents < 1.")
 40 |   result.nComponents = nComponents
 41 |   result.fitIntercept = fitIntercept
 42 |   result.fitLinear = fitLinear
 43 |   result.warmStart = warmStart
 44 |   result.randomState = randomState
 45 |   result.scale = scale
 46 |   result.isInitialized = false
 47 | 
 48 | 
 49 | proc nAugments*(self: FieldAwareFactorizationMachine): int = 0
 50 | 
 51 | 
 52 | proc decisionFunction*(self: FieldAwareFactorizationMachine,
 53 |                        X: RowFieldDataset): seq[float64] =
 54 |   ## Returns the model outputs as seq[float64].
 55 |   self.checkInitialized()
 56 | 
 57 |   let nSamples: int = X.nSamples
 58 |   
 59 |   let nFeatures = X.nFeatures
 60 |   if (nFeatures != self.P.shape[1]):
 61 |     raise newException(ValueError, "Invalid nFeatures.")
 62 |   let nFields = X.nFields
 63 |   if nFields != self.P.shape[0]:
 64 |     raise newException(ValueError, "Invalid nFields.")
 65 | 
 66 |   result = newSeqWith(nSamples, self.intercept)
 67 | 
 68 |   for i in 0..<nSamples:
 69 |     for (j, val) in X.getRow(i):
 70 |       result[i] += val * self.w[j]
 71 | 
 72 |   for i in 0..<nSamples:
 73 |     for (f1, j1, val1) in X.getRowWithField(i):
 74 |       for (f2, j2, val2) in X.getRowWithField(i):
 75 |         if j1 < j2:
 76 |           result[i] += val1 * val2 * dot(self.P[f2, j1], self.P[f1, j2])
 77 | 
 78 | 
 79 | proc init*(self: FieldAwareFactorizationMachine, X: RowFieldDataset, force=false) =
 80 |   ## Initializes the field-aware factorization machine.
 81 |   ## self will not be initialized if force=false, ffm is already initialized,
 82 |   ## and warmStart=true.
 83 |   if force or not (self.warmStart and self.isInitialized):
 84 |     let nFeatures: int = X.nFeatures
 85 |     let nFields = X.nFields
 86 |     randomize(self.randomState)
 87 | 
 88 |     self.w = zeros([nFeatures])
 89 |     self.P = randomNormal([nFields, nFeatures, self.nComponents],
 90 |                            scale = self.scale)
 91 |     self.intercept = 0.0
 92 |   self.isInitialized = true
 93 | 
 94 | 
 95 | proc dump*(self: FieldAwareFactorizationMachine, fname: string) =
 96 |   ## Dumps the fitted factorization machine.
 97 |   self.checkInitialized()
 98 |   let nFields = self.P.shape[0]
 99 |   let nFeatures = self.P.shape[1]
100 |   var f: File = open(fname, fmWrite)
101 |   f.writeLine("task: ", $self.task)
102 |   f.writeLine("nFields: ", nFields)
103 |   f.writeLine("nFeatures: ", nFeatures)
104 |   f.writeLine("nComponents: ", self.nComponents)
105 |   f.writeLine("fitIntercept: ", self.fitIntercept)
106 |   f.writeLine("fitLinear: ", self.fitLinear)
107 |   f.writeLine("randomState: ", self.randomState)
108 |   f.writeLine("scale: ", self.scale)
109 |   for field in 0..<self.P.shape[0]:
110 |     f.writeLine("P[", field, "]:")
111 |     for j in 0..<nFeatures:
112 |       f.writeLine(self.P[field, j].join(" "))
113 |   f.writeLine("w:")
114 |   f.writeLine(self.w.join(" "))
115 |   f.writeLine("intercept: ", self.intercept)
116 |   f.close()
117 | 
118 | 
119 | proc load*(ffm: var FieldAwareFactorizationMachine, fname: string, warmStart: bool) =
120 |   ## Loads the fitted factorization machine.
121 |   new(ffm)
122 |   var f: File = open(fname, fmRead)
123 |   var nFeatures, nFields: int
124 |   ffm.task = parseEnum[TaskKind](f.readLine().split(" ")[1])
125 |   discard parseInt(f.readLine().split(" ")[1], nFields, 0)
126 |   discard parseInt(f.readLine().split(" ")[1], nFeatures, 0)
127 |   discard parseInt(f.readLine().split(" ")[1], ffm.nComponents, 0)
128 |   ffm.fitIntercept = parseBool(f.readLine().split(" ")[1])
129 |   ffm.fitLinear = parseBool(f.readLine().split(" ")[1])
130 |   ffm.warmStart = warmStart
131 |   discard parseInt(f.readLine().split(" ")[1], ffm.randomState, 0)
132 |   discard parseFloat(f.readLine().split(" ")[1], ffm.scale, 0)
133 | 
134 |   var i = 0
135 |   var val: float64
136 | 
137 |   # read P
138 |   ffm.P = zeros([nFields, nFeatures, ffm.nComponents])
139 |   for field in 0..<nFields:
140 |     discard f.readLine() # read "P[order]:" and discard it
141 |     for j in 0..<nFeatures:
142 |       let line = f.readLine()
143 |       i = 0
144 |       for s in 0..<ffm.nComponents:
145 |         i.inc(parseFloat(line, val, i))
146 |         i.inc()
147 |         ffm.P[field, j, s] = val
148 |   ffm.w = zeros([nFeatures])
149 |   discard f.readLine()
150 |   let line = f.readLine()
151 |   i = 0
152 |   for j in 0..<nFeatures:
153 |     i.inc(parseFloat(line, val, i))
154 |     i.inc()
155 |     ffm.w[j] = val
156 |   discard parseFloat(f.readLine().split(" ")[1], ffm.intercept, 0)
157 |   f.close()
158 |   ffm.isInitialized = true
159 | 


--------------------------------------------------------------------------------
/src/nimfm/model/fm_base.nim:
--------------------------------------------------------------------------------
 1 | import sequtils, math, sugar
 2 | import ../metrics, ../utils
 3 | 
 4 | 
 5 | type
 6 |   TaskKind* = enum
 7 |     regression = "r",
 8 |     classification = "c"
 9 | 
10 |   NotFittedError = object of Exception
11 | 
12 | 
13 | proc checkInitialized*[FM](self: FM) =
14 |   if not self.isInitialized:
15 |     raise newException(NotFittedError, "Factorization machines is not fitted.")
16 | 
17 | 
18 | proc predict*[Dataset, FM](self: FM, X: Dataset): seq[int] =
19 |   ## Returns the sign vector of the model outputs as seq[int].
20 |   result = self.decisionFunction(X).map(x=>sgn(x))
21 | 
22 | 
23 | proc predictProba*[Dataset, FM](self: FM, X: Dataset): seq[float64] =
24 |   ## Returns probabilities that each instance belongs to positive class.
25 |   ## It shoud be used only when task=classification.
26 |   result = self.decisionFunction(X).map(expit)
27 | 
28 | 
29 | proc checkTarget*[FM](self: FM, y: seq[SomeNumber]): seq[float64] =
30 |   ## Transforms targets vector to float for regression or
31 |   ## to sign for classification.
32 |   case self.task
33 |   of classification:
34 |     result = y.map(x => float(sgn(x)))
35 |   of regression:
36 |     result = y.map(x => float(x))
37 | 
38 | 
39 | proc score*[FM, Dataset](self: FM, X: Dataset, y: seq[float64]): float64 =
40 |   ## Returns the score between the model outputs and true targets.
41 |   ## Computes root mean squared error when task=regression (lower is better).
42 |   ## Computes accuracy when task=classification (higher is better). 
43 |   let yPred = self.decisionFunction(X)
44 |   case self.task
45 |   of regression:
46 |     result = rmse(y, yPred)
47 |   of classification:
48 |     result = accuracy(y.map(x=>sgn(x)), yPred.map(x=>sgn(x)))


--------------------------------------------------------------------------------
/src/nimfm/model/models.nim:
--------------------------------------------------------------------------------
1 | import
2 |   factorization_machine, convex_factorization_machine, fm_base,
3 |   field_aware_factorization_machine
4 | export
5 |   factorization_machine, convex_factorization_machine, fm_base,
6 |   field_aware_factorization_machine
7 | 


--------------------------------------------------------------------------------
/src/nimfm/model/params.nim:
--------------------------------------------------------------------------------
  1 | import ../tensor/tensor
  2 | 
  3 | 
  4 | type
  5 |   Params* = ref object
  6 |     P*: Tensor
  7 |     w*: Vector
  8 |     intercept*: float64
  9 |     fitLinear*: bool
 10 |     fitIntercept*: bool
 11 | 
 12 | 
 13 | proc newParams*(shape_P: array[3, int], len_w: int,
 14 |                 fitLinear, fitIntercept: bool): Params =
 15 |   new(result)
 16 |   result.fitLinear = fitLinear
 17 |   result.fitIntercept = fitIntercept
 18 |   result.P = zeros(shape_P)
 19 |   result.w = zeros([len_w])
 20 |   result.intercept = 0.0
 21 | 
 22 | 
 23 | proc newParams*(P: var Tensor, w: var Vector, intercept: float64,
 24 |                 fitLinear, fitIntercept: bool): Params =
 25 |   new(result)
 26 |   result.fitLinear = fitLinear
 27 |   result.fitIntercept = fitIntercept
 28 |   result.P = P
 29 |   result.w = w
 30 |   result.intercept =intercept
 31 | 
 32 | 
 33 | proc add*(self: Params, grad: Params, eta_intercept, eta_w, eta_P: float64) =
 34 |   if self.P.shape != grad.P.shape:
 35 |     raise newException(ValueError, "self.P.shape != grad.P.shape.")
 36 |   for i in 0..<self.P.shape[0]:
 37 |     for j in 0..<self.P.shape[1]:
 38 |       for k in 0..<self.P.shape[2]:
 39 |         self.P[i, j, k] += eta_P * grad.P[i, j, k]
 40 |   
 41 |   if self.fitLinear and grad.fitLinear:
 42 |     if self.w.shape != grad.w.shape:
 43 |       raise newException(ValueError, "self.w.shape != grad.w.shape.")
 44 |     for i in 0..<self.w.shape[0]:
 45 |       self.w[i] += eta_w * grad.w[i]
 46 | 
 47 |   if self.fitIntercept and grad.fitLinear:
 48 |     self.intercept += eta_intercept * grad.intercept
 49 | 
 50 | 
 51 | proc add*(self: Params, grad: Params, eta: float64) =
 52 |   self.add(grad, eta, eta, eta)
 53 | 
 54 | 
 55 | proc `+=`*(self: Params, grad: Params) = self.add(grad, 1.0)
 56 | 
 57 | 
 58 | proc `-=`*(self: Params, grad: Params) = self.add(grad, -1.0)
 59 | 
 60 | 
 61 | proc scale*(self: Params, scale_intercept, scale_w, scale_P: float64) =
 62 |   self.P *= scale_P
 63 |   if self.fitLinear:
 64 |     self.w *= scale_w
 65 |   if self.fitIntercept:
 66 |     self.intercept *= scale_intercept
 67 | 
 68 | 
 69 | proc scale*(self: Params, scale: float64) =
 70 |   self.scale(scale, scale, scale)
 71 | 
 72 | 
 73 | proc `*=`*(self: Params, scale: float64) = self.scale(scale)
 74 | 
 75 | 
 76 | proc `/=`*(self: Params, scale: float64) = self.scale(1.0 / scale)
 77 | 
 78 | 
 79 | proc `<-`*(self: Params, params: Params) =
 80 |   self.P <- params.P
 81 |   self.w <- params.w
 82 |   self.intercept = params.intercept
 83 | 
 84 | proc `<-`*(self: Params, c: float64) =
 85 |   self.P <- c
 86 |   self.w <- c
 87 |   self.intercept = c
 88 | 
 89 | 
 90 | proc step*(self, grads: Params, eta_intercept, eta_w, eta_P: float64,
 91 |            alpha0, alpha, beta: float64) {.inline.} =
 92 |   let 
 93 |     scale_P = 1.0 + eta_P * beta
 94 |     scale_w = 1.0 + eta_w * alpha
 95 |     scale_intercept = 1.0 + eta_intercept * alpha0
 96 |     
 97 |   self.add(grads, -eta_intercept, -eta_w, -eta_P)
 98 |   self.scale(1.0 / scale_intercept, 1.0 / scale_w, 1.0 / scale_P)
 99 | 
100 | 
101 | proc dot*(params1, params2: Params): float64 =
102 |   result = dot(params1.P, params2.P)
103 |   if params1.fitLinear and params2.fitLinear:
104 |     result += dot(params1.w, params2.w)
105 |   if params1.fitIntercept and params2.fitIntercept:
106 |     result += params1.intercept * params2.intercept
107 | 


--------------------------------------------------------------------------------
/src/nimfm/modules.nim:
--------------------------------------------------------------------------------
1 | import model, loss, dataset, metrics, tensor, optimizer, regularizer
2 | export model, loss, dataset, metrics, tensor, optimizer, regularizer


--------------------------------------------------------------------------------
/src/nimfm/optimizer.nim:
--------------------------------------------------------------------------------
1 | import ./optimizer/optimizers
2 | export optimizers


--------------------------------------------------------------------------------
/src/nimfm/optimizer/adagrad_ffm.nim:
--------------------------------------------------------------------------------
 1 | import ../dataset, ../tensor/tensor, ../loss
 2 | import ../model/field_aware_factorization_machine, ../model/params
 3 | from ../model/fm_base import checkTarget
 4 | from sgd import stoppingCriterion
 5 | from sgd_ffm import predictWithGrad
 6 | from adagrad import AdaGrad, newAdaGrad, update, updateG, init, finalize
 7 | import sequtils, math, random, sugar
 8 | export adagrad.AdaGrad, adagrad.newAdaGrad
 9 | 
10 | 
11 | proc fit*[L](self: AdaGrad[L], X: RowFieldDataset, y: seq[float64],
12 |              ffm: FieldAwareFactorizationMachine,
13 |              callback: (AdaGrad[L], FieldAwareFactorizationMachine)->void = nil) =
14 |   ## Fits the factorization machine on X and y by stochastic gradient descent.
15 |   ffm.init(X)
16 | 
17 |   let y = ffm.checkTarget(y)
18 |   let
19 |     nSamples = X.nSamples
20 |     fitLinear = ffm.fitLinear
21 |     fitIntercept = ffm.fitIntercept
22 |   var
23 |     indices = toSeq(0..<nSamples)
24 |     df: Tensor = zeros(ffm.P.shape)
25 |     isConverged = false
26 |   
27 |   # initialization
28 |   init(self, ffm.P, ffm.w, ffm.warmStart, fitLinear, fitIntercept)
29 | 
30 |   for epoch in 0..<self.maxIter:
31 |     var viol = 0.0
32 |     var runningLoss = 0.0
33 |     if X.nCached == X.nSamples and self.shuffle: shuffle(indices)
34 |     
35 |     for i in indices:
36 |       # update parameters lazily
37 |       if self.it != 1:
38 |         viol += update(self, ffm.P, ffm.w, ffm.intercept, X, i, ffm.nAugments,
39 |                        fitLinear, fitIntercept)
40 |       let yPred = predictWithGrad(X, i, ffm.P, ffm.w, ffm.intercept, df)
41 |       runningLoss += self.loss.loss(y[i], yPred)
42 |       updateG(self, X, df, i, y[i], yPred, X.nAugments, fitLinear, fitIntercept)
43 | 
44 |       if self.nCalls > 0 and self.it mod self.nCalls == 0:
45 |         if not callback.isNil:
46 |           finalize(self, ffm.P, ffm.w, ffm.intercept, fitLinear, fitIntercept)
47 |           callback(self, ffm)
48 |       inc(self.it)
49 | 
50 |     # one epoch done
51 |     runningLoss /= float(nSamples)
52 |     if not callback.isNil:
53 |       finalize(self, ffm.P, ffm.w, ffm.intercept, fitLinear, fitIntercept)
54 |       callback(self, ffm)
55 | 
56 |     let isContinue = stoppingCriterion(
57 |       ffm.P, ffm.w, ffm.intercept, self.alpha0, self.alpha, self.beta,
58 |       runningLoss, viol, self.tol, self.verbose, epoch, self.maxIter,
59 |       isConverged)
60 |     if not isContinue: break
61 |     
62 |   if not isConverged and self.verbose > 0:
63 |     echo("Objective did not converge. Increase maxIter.")
64 |   
65 |   # finalize
66 |   finalize(self, ffm.P, ffm.w, ffm.intercept, fitLinear, fitIntercept)


--------------------------------------------------------------------------------
/src/nimfm/optimizer/adagrad_ffm_multi.nim:
--------------------------------------------------------------------------------
  1 | import ../dataset, ../tensor/tensor, ../loss
  2 | import ../model/field_aware_factorization_machine, ../model/params
  3 | from ../model/fm_base import checkTarget
  4 | from sgd import stoppingCriterion
  5 | from adagrad import AdaGrad, newAdaGrad, init, finalize, updateG, update
  6 | export adagrad.AdaGrad, adagrad.newAdaGrad
  7 | from sgd_ffm import predictWithGrad
  8 | from sgd_multi import nThreads
  9 | import sequtils, math, random, sugar, threadpool
 10 | 
 11 | 
 12 | var 
 13 |   dA {.threadvar.}:  Tensor # zeros(P.shape)
 14 | 
 15 | 
 16 | proc epochSub[L](self: ptr AdaGrad[L], X: ptr RowDataset, P: ptr Tensor,
 17 |                  w: ptr Vector, intercept: ptr float64, y: ptr Vector,
 18 |                  nAugments: int, fitLinear, fitIntercept: bool,
 19 |                  indices: ptr seq[int], s, t: int): (float64, float64) =
 20 |   if dA.isNil or dA.shape != P[].shape:
 21 |     dA = zeros(P[].shape)
 22 | 
 23 |   for ii in s..<t:
 24 |     let i = indices[ii]
 25 |     # update parameters lazily
 26 |     if self[].it != 1:
 27 |       result[1] += self[].update(P[], w[], intercept[], X[], i, nAugments, 
 28 |                                  fitLinear, fitIntercept)
 29 |     let yPred = predictWithGrad(X[], i, P[], w[], intercept[], dA)
 30 |     result[0] += self.loss.loss(y[i], yPred)
 31 |     updateG(self[], X[], dA, i, y[i], yPred, nAugments, fitLinear,
 32 |             fitIntercept)
 33 |     inc(self[].it)
 34 | 
 35 | 
 36 | proc fit*[L](self: AdaGrad[L], X: RowFieldDataset, y: seq[float64],
 37 |              ffm: FieldAwareFactorizationMachine, maxThreads: int,
 38 |              callback: (AdaGrad[L], FieldAwareFactorizationMachine)->void = nil) =
 39 |   ## Fits the factorization machine on X and y by stochastic gradient descent.
 40 |   ffm.init(X)
 41 | 
 42 |   let y = ffm.checkTarget(y)
 43 |   let
 44 |     nSamples = X.nSamples
 45 |     fitLinear = ffm.fitLinear
 46 |     fitIntercept = ffm.fitIntercept
 47 |     nThreads = nThreads(maxThreads)
 48 |   var
 49 |     indices = toSeq(0..<nSamples)
 50 |     isConverged = false
 51 |     responses = newSeq[FlowVar[(float64, float64)]](nThreads)
 52 |     borders = newSeqWith(nThreads+1, 0)
 53 |   
 54 |   # initialization
 55 |   init(self, ffm.P, ffm.w, ffm.warmStart, fitLinear, fitIntercept)
 56 |  
 57 |   for th in 0..<nThreads:
 58 |     borders[th+1] = borders[th] + nSamples div nThreads
 59 |   borders[^1] = nSamples
 60 | 
 61 | 
 62 |   for epoch in 0..<self.maxIter:
 63 |     var viol = 0.0
 64 |     var runningLoss = 0.0
 65 |     if X.nCached == X.nSamples and self.shuffle: shuffle(indices)
 66 |     
 67 |     var nRest = nSamples
 68 |     while nRest > 0:
 69 |       X.readCache(nSamples-nRest)
 70 |       borders[0] = nSamples - nRest
 71 |       for th in 0..<nThreads:
 72 |         borders[th+1] = borders[th] + X.nCached div nThreads
 73 |       borders[^1] = borders[0] + X.nCached
 74 |       
 75 |       # async parallel update!
 76 |       for th in 0..<nThreads:
 77 |         responses[th] = spawn epochSub(
 78 |           unsafeAddr(self), unsafeAddr(X), addr(ffm.P), addr(ffm.w),
 79 |           addr(ffm.intercept), unsafeAddr(y), ffm.nAugments, fitLinear,
 80 |           fitIntercept, addr(indices), borders[th], borders[th+1])
 81 | 
 82 |       dec(nRest, X.nCached)
 83 |       for resp in responses:
 84 |         let ret = ^resp
 85 |         runningLoss += ret[0]
 86 |         viol += ret[1]
 87 | 
 88 |     # one epoch done
 89 |     runningLoss /= float(nSamples)
 90 |     if not callback.isNil:
 91 |       finalize(self, ffm.P, ffm.w, ffm.intercept, fitLinear, fitIntercept)
 92 |       callback(self, ffm)
 93 | 
 94 |     let isContinue = stoppingCriterion(
 95 |       ffm.P, ffm.w, ffm.intercept, self.alpha0, self.alpha, self.beta,
 96 |       runningLoss, viol, self.tol, self.verbose, epoch, self.maxIter,
 97 |       isConverged)
 98 |     if not isContinue: break
 99 |     
100 |   if not isConverged and self.verbose > 0:
101 |     echo("Objective did not converge. Increase maxIter.")
102 |   
103 |   # finalize
104 |   finalize(self, ffm.P, ffm.w, ffm.intercept, fitLinear, fitIntercept)


--------------------------------------------------------------------------------
/src/nimfm/optimizer/adagrad_multi.nim:
--------------------------------------------------------------------------------
  1 | import ../dataset, ../tensor/tensor, ../model/factorization_machine
  2 | import ../model/params, ../loss
  3 | from ../model/fm_base import checkTarget
  4 | from sgd import predictWithGrad, stoppingCriterion, transpose
  5 | from sgd_multi import nThreads
  6 | from adagrad import AdaGrad, newAdaGrad, updateG, update, finalize, init
  7 | export adagrad.AdaGrad, adagrad.newAdaGrad
  8 | import sequtils, math, random, sugar, threadpool
  9 | 
 10 | var 
 11 |   A {.threadvar.}: Matrix # zeros([nComponents, degree+1])
 12 |   dA {.threadvar.}:  Tensor # zeros(P.shape)
 13 | 
 14 | 
 15 | proc epochSub[L](self: ptr AdaGrad[L], X: ptr RowDataset, P: ptr Tensor,
 16 |                  w: ptr Vector, intercept: ptr float64,
 17 |                  y: ptr Vector, nComponents, degree, nAugments: int,
 18 |                  fitLinear, fitIntercept: bool,
 19 |                  indices: ptr seq[int], s, t: int): (float64, float64) =
 20 |   if A.isNil or A.shape != [nComponents, degree+1]:
 21 |     A = zeros([nComponents, degree+1])
 22 |   if dA.isNil or dA.shape != P[].shape:
 23 |     dA = zeros(P[].shape)
 24 | 
 25 |   for ii in s..<t:
 26 |     let i = indices[ii]
 27 |     # update parameters lazily
 28 |     if self[].it != 1:
 29 |       result[1] += self[].update(P[], w[], intercept[], X[], i, nAugments, 
 30 |                                  fitLinear, fitIntercept)
 31 |     let yPred = predictWithGrad(X[], i, P[], w[], intercept[], A, dA, degree,
 32 |                                 nAugments)
 33 |     result[0] += self.loss.loss(y[i], yPred)
 34 |     updateG(self[], X[], dA, i, y[i], yPred, nAugments, fitLinear,
 35 |             fitIntercept)
 36 |     inc(self[].it)
 37 | 
 38 | 
 39 | proc fit*[L](self: AdaGrad[L], X: RowDataset, y: seq[float64],
 40 |              fm: FactorizationMachine, maxThreads: int,
 41 |              callback: (AdaGrad[L], FactorizationMachine)->void = nil) =
 42 |   ## Fits the factorization machine on X and y by stochastic gradient descent.
 43 |   fm.init(X)
 44 | 
 45 |   let y = fm.checkTarget(y)
 46 |   let
 47 |     nSamples = X.nSamples
 48 |     nComponents = fm.P.shape[1]
 49 |     nOrders = fm.P.shape[0]
 50 |     degree = fm.degree
 51 |     nAugments = fm.nAugments
 52 |     fitLinear = fm.fitLinear
 53 |     fitIntercept = fm.fitIntercept
 54 |     nThreads = nThreads(maxThreads)
 55 |   var
 56 |     indices = toSeq(0..<nSamples)
 57 |     P: Tensor = zeros([nOrders, fm.P.shape[2], nComponents])
 58 |     isConverged = false
 59 |     responses = newSeq[FlowVar[(float64, float64)]](nThreads)
 60 |     borders = newSeqWith(nThreads+1, 0)
 61 |   
 62 |   # initialization
 63 |   init(self, P, fm.w, fm.warmStart, fitLinear, fitIntercept)
 64 |  
 65 |   for th in 0..<nThreads:
 66 |     borders[th+1] = borders[th] + nSamples div nThreads
 67 |   borders[^1] = nSamples
 68 | 
 69 |   # copy for fast training
 70 |   transpose(P, fm.P)
 71 | 
 72 |   for epoch in 0..<self.maxIter:
 73 |     var viol = 0.0
 74 |     var runningLoss = 0.0
 75 |     if X.nCached == X.nSamples and self.shuffle: shuffle(indices)
 76 |     
 77 |     var nRest = nSamples
 78 |     while nRest > 0:
 79 |       X.readCache(nSamples-nRest)
 80 |       borders[0] = nSamples - nRest
 81 |       for th in 0..<nThreads:
 82 |         borders[th+1] = borders[th] + X.nCached div nThreads
 83 |       borders[^1] = borders[0] + X.nCached
 84 |       
 85 |       # async parallel update!
 86 |       for th in 0..<nThreads:
 87 |         responses[th] = spawn epochSub(unsafeAddr(self), unsafeAddr(X), addr(P),
 88 |           addr(fm.w), addr(fm.intercept), unsafeAddr(y), nComponents, degree,
 89 |           nAugments, fitLinear, fitIntercept, addr(indices), borders[th],
 90 |           borders[th+1])
 91 | 
 92 |       dec(nRest, X.nCached)
 93 |       for resp in responses:
 94 |         let ret = ^resp
 95 |         runningLoss += ret[0]
 96 |         viol += ret[1]
 97 | 
 98 |     # one epoch done
 99 |     runningLoss /= float(nSamples)
100 |     if not callback.isNil:
101 |       finalize(self, P, fm.w, fm.intercept, fitLinear, fitIntercept)
102 |       transpose(fm.P, P)
103 |       callback(self, fm)
104 |     
105 |     let isContinue = stoppingCriterion(
106 |       P, fm.w, fm.intercept, self.alpha0, self.alpha, self.beta, runningLoss,
107 |       viol, self.tol, self.verbose, epoch, self.maxIter, isConverged)
108 |     if not isContinue: break
109 | 
110 |   if not isConverged and self.verbose > 0:
111 |     echo("Objective did not converge. Increase maxIter.")
112 |   
113 |   # finalize
114 |   finalize(self, P, fm.w, fm.intercept, fitLinear, fitIntercept)
115 |   transpose(fm.P, P)


--------------------------------------------------------------------------------
/src/nimfm/optimizer/fista.nim:
--------------------------------------------------------------------------------
  1 | import ../dataset, ../tensor/tensor, ../loss
  2 | import ../model/factorization_machine, ../model/fm_base, ../model/params
  3 | import optimizer_base, utils
  4 | from ../regularizer/regularizers import newSquaredL12
  5 | from pgd import predictAll, predictAllWithGrad, linesearch, finalize
  6 | import math, sugar
  7 | 
  8 | 
  9 | type
 10 |   FISTA*[L, R] = ref object of BaseCSROptimizer
 11 |     gamma*: float64
 12 |     loss*: L
 13 |     reg*: R
 14 |     rho: float64
 15 |     sigma: float64
 16 |     maxSearch: int
 17 |     t: float64
 18 | 
 19 | 
 20 | proc newFISTA*[L, R](maxIter=100, alpha0=1e-6, alpha=1e-3, beta=1e-4,
 21 |                    gamma=1e-4, loss: L=newSquared(), reg: R=newSquaredL12(), 
 22 |                    rho=0.5, sigma=1.0, maxSearch = -1, verbose = 1,
 23 |                    tol = 1e-6): FISTA[L, R] =
 24 |   ## Creates new FISTA.
 25 |   ## maxIter: Maximum number of iteration. At each iteration, 
 26 |   ##          all parameters are updated once by using all samples.
 27 |   ## alpha0: Regularization-strength for intercept.
 28 |   ## alpha: Regularization-strength for linear term.
 29 |   ## beta: Regularization-strength for higher-order weights.
 30 |   ## gamma: Sparsity-inducing-regularization-strength for higher-order weights.
 31 |   ## loss: Loss function. It must have mu: float64 field and
 32 |   ##       loss/dloss proc: (float64, float64)->float64.
 33 |   ## reg: Sparsity-inducing regularization.
 34 |   ## rho: Paraneter for line search. (0, 1)
 35 |   ## sigma: Parameter for line search. (0, 1]
 36 |   ## maxSearch: Maximum number of iterations in line search. If <= 0,
 37 |   ##            line search runs until the stopping condition is satisfied.
 38 |   ## verbose: Whether to print information on optimization processes.
 39 |   ## tol: Tolerance hyperparameter for stopping criterion.
 40 |   result = FISTA[L, R](
 41 |     maxIter: maxIter, alpha0: alpha0, alpha: alpha, beta: beta, gamma: gamma,
 42 |     loss: loss, reg: reg, rho: rho, sigma: sigma, maxSearch: maxSearch,
 43 |     tol: tol, verbose: verbose, t: 0)
 44 | 
 45 | 
 46 | proc extrapolate*(z_params, params, old_params: Params, coef: float64) =
 47 |   z_params <- params
 48 |   z_params.add(params, coef)
 49 |   z_params.add(old_params, -coef)
 50 | 
 51 | 
 52 | proc fit*[L, R](self: FISTA[L, R], X: RowDataset, y: seq[float64],
 53 |                 sfm: FactorizationMachine,
 54 |                 callback: (FISTA[L, R], FactorizationMachine)->void = nil) =
 55 |   ## Fits the sparse factorization machine on X and y by accelerated pgd.
 56 |   sfm.init(X)
 57 |   let y = sfm.checkTarget(y)
 58 |   let
 59 |     nSamples = X.nSamples
 60 |     nFeatures = X.nFeatures
 61 |     nComponents = sfm.P.shape[1]
 62 |     nOrders = sfm.P.shape[0]
 63 |     degree = sfm.degree
 64 |     fitLinear = sfm.fitLinear
 65 |     fitIntercept = sfm.fitIntercept
 66 |     nAugments = sfm.nAugments
 67 |   var
 68 |     yPred: Vector = zeros([nSamples])
 69 |     dL: Vector = zeros([nSamples])
 70 |     P: Tensor = zeros([nOrders, sfm.P.shape[2], nComponents])
 71 |     params: Params
 72 |     old_params = newParams(P.shape, sfm.w.len, fitLinear, fitIntercept)
 73 |     z_params = newParams(P.shape, sfm.w.len, fitLinear, fitIntercept)
 74 |     grads = newParams(P.shape, sfm.w.len, fitLinear, fitIntercept)
 75 |     A: Matrix = zeros([nComponents, degree+1])
 76 |     dA: Tensor = zeros(P.shape)
 77 |     isConverged = false 
 78 | 
 79 |   # copy for fast training
 80 |   for order in 0..<sfm.P.shape[0]:
 81 |     P[order] = sfm.P[order].T
 82 |   params = newParams(P, sfm.w, sfm.intercept, fitLinear, fitIntercept)
 83 | 
 84 |   # compute caches
 85 |   old_params <- params
 86 |   z_params <- params
 87 | 
 88 |   if not sfm.warmStart:
 89 |     self.t = 0
 90 |   
 91 |   self.reg.initSGD(degree, nFeatures+nAugments, nComponents)
 92 |   
 93 |   if self.verbose > 0: # echo header
 94 |     echoHeader(self.maxIter, viol=true)
 95 | 
 96 |   # perform optimization
 97 |   var lossVal = Inf
 98 |   var regVal = Inf
 99 |   for it in 0..<self.maxIter:
100 |     let t = (sqrt(4*self.t^2+1.0)+1.0) / 2.0
101 |     # compute z_{k+1}
102 |     extrapolate(z_params, params, old_params, (self.t-1)/t)
103 |     old_params <- z_params
104 |     predictAllWithGrad(X, y, yPred, z_params, grads, A, dA, dL,
105 |                        self.loss, degree, nAugments)
106 |     var (z_loss, z_reg) = lineSearch(
107 |       X, y, yPred, z_params, old_params, grads, A, self.alpha0, self.alpha,
108 |       self.beta, self.gamma, self.loss, self.reg, degree, nAugments, 1.0,
109 |       self.rho, self.sigma, self.maxSearch)
110 |     
111 |     # Accept? Accept!
112 |     if (z_loss + z_reg) <= (lossVal + regVal):
113 |       lossVal = z_loss
114 |       regVal = z_reg
115 |       old_params <- params
116 |       params <- z_params
117 |       self.t = t
118 |     else: # Restart!
119 |       self.t = 1.0
120 |     
121 |     if not callback.isNil:
122 |       finalize(sfm, params)
123 |       callback(self, sfm)  
124 |     
125 |     var viol = Inf
126 |     if self.tol > 0 or self.verbose > 0:
127 |       viol = computeViol(params, old_params)
128 | 
129 |     if self.verbose > 0:
130 |       echoInfo(it+1, self.maxIter, viol, lossVal, regVal)
131 |     
132 |     if viol < self.tol:
133 |       if self.verbose > 0: echo("Converged at epoch ", it+1, ".")
134 |       isConverged = true
135 |       break
136 | 
137 |   if not isConverged and self.verbose > 0:
138 |     echo("Objective did not converge. Increase maxIter.")
139 | 
140 |   # finalize
141 |   finalize(sfm, params)


--------------------------------------------------------------------------------
/src/nimfm/optimizer/fit_linear.nim:
--------------------------------------------------------------------------------
 1 | import ../dataset, ../tensor/tensor, ../loss
 2 | import math
 3 | 
 4 | 
 5 | proc fitLinearCD*[L](w: var Vector, X: ColDataset, y: seq[float64],
 6 |                      yPred: var Vector, colNormSq: Vector,
 7 |                      alpha: float64, loss: L): float64 =
 8 |   result = 0.0
 9 |   let nFeatures = X.nFeatures
10 |   var
11 |     update = 0.0
12 |     invStepSize = 0.0
13 | 
14 |   for j in 0..<nFeatures:
15 |     update = alpha * w[j]
16 |     for (i, val) in X.getCol(j):
17 |       update += loss.dloss(y[i], yPred[i]) * val
18 |     invStepSize = loss.mu * colNormSq[j] + alpha
19 |     if invStepSize < 1e-12: 
20 |       continue
21 |     update /= invStepSize
22 |     result += abs(update)
23 |     w[j] -= update
24 |     for (i, val) in X.getCol(j):
25 |       yPred[i] -= update * val
26 | 
27 | 
28 | proc fitInterceptCD*[L](intercept: var float64, y: seq[float64],
29 |                         yPred: var Vector, nSamples: int,
30 |                         alpha0: float64, loss: L): float64 =
31 |   result = alpha0 * intercept
32 |   for i in 0..<nSamples:
33 |     result += loss.dloss(y[i], yPred[i])
34 |   result /= loss.mu * float(nSamples) + alpha0
35 |   intercept -= result
36 |   for i in 0..<nSamples:
37 |     yPred[i] -= result
38 |   result = abs(result)
39 | 
40 | 
41 | proc fitLinearSGD*(w: var Vector, X: RowDataset, i: int,
42 |                    alpha, dL, eta: float64): float64 =
43 |   result = 0.0
44 |   for (j, val) in X.getRow(i):
45 |     let update = eta * (dL * val + alpha*w[j])
46 |     w[j] -= update
47 |     result += abs(update)
48 | 
49 | 
50 | proc fitLinearAdaGrad*(w, g_sum_w, g_norms_w: var Vector, X: RowDataset,
51 |                        i: int, alpha, eta: float64, it: float64): float64 =
52 |   result = 0.0
53 |   let denom = it*eta*alpha
54 |   for (j, val) in X.getRow(i):
55 |     let wj = w[j]
56 |     w[j] = - eta * g_sum_w[j] / (denom + sqrt(g_norms_w[j]))
57 |     result += abs(wj - w[j])
58 | 


--------------------------------------------------------------------------------
/src/nimfm/optimizer/optimizer_base.nim:
--------------------------------------------------------------------------------
 1 | type
 2 |   BaseOptimizerObj = object of RootObj
 3 |     verbose*: int
 4 |     tol*: float64
 5 |     maxIter*: int
 6 |     alpha0*: float64
 7 |     alpha*: float64
 8 |     beta*: float64
 9 | 
10 |   BaseOptimizer* = ref object of BaseOptimizerObj
11 | 
12 |   BaseCSCOptimizer* = ref object of BaseOptimizer
13 | 
14 |   BaseCSROptimizer* = ref object of BaseOptimizer
15 | 


--------------------------------------------------------------------------------
/src/nimfm/optimizer/optimizers.nim:
--------------------------------------------------------------------------------
1 | import cd, sgd, greedy_cd, hazan, adagrad, pcd, pbcd, psgd, pgd, nmapgd, fista,
2 |        katyusha, minibatch_psgd, sgd_multi, adagrad_multi, sgd_ffm, adagrad_ffm,
3 |        sgd_ffm_multi, adagrad_ffm_multi
4 | export cd, sgd, greedy_cd, hazan, adagrad, pcd, pbcd, psgd, pgd, nmapgd, fista,
5 |        katyusha, minibatch_psgd, sgd_multi, adagrad_multi, sgd_ffm, adagrad_ffm,
6 |        sgd_ffm_multi, adagrad_ffm_multi


--------------------------------------------------------------------------------
/src/nimfm/optimizer/sgd_ffm.nim:
--------------------------------------------------------------------------------
  1 | import ../dataset, ../tensor/tensor, ../loss
  2 | import ../model/field_aware_factorization_machine
  3 | from ../model/fm_base import checkTarget
  4 | import sequtils, math, random, sugar
  5 | from sgd import
  6 |   SGD, newSGD, lazilyUpdate, update, finalize, stoppingCriterion, init,
  7 |   SchedulingKind
  8 | export sgd.SGD, sgd.newSGD, sgd.SchedulingKind
  9 | 
 10 | 
 11 | proc predictWithGrad*(X: RowFieldDataset, i: int, P: Tensor, w: Vector,
 12 |                       intercept: float64, dA: var Tensor): float64 =
 13 |   result = intercept
 14 |   for (j, val) in X.getRow(i):
 15 |     result += w[j] * val
 16 | 
 17 |   # initialize dA
 18 |   for f in 0..<P.shape[0]:
 19 |     for j in X.getRowIndices(i):
 20 |       for s in 0..<P.shape[2]:
 21 |         dA[f, j, s] = 0.0 
 22 |   # compute prediction/gradient
 23 |   for (f1, j1, val1) in X.getRowWithField(i):
 24 |     for (f2, j2, val2) in X.getRowWithField(i):
 25 |       if j1 < j2:
 26 |         let tmp = dot(P[f2, j1], P[f1, j2])
 27 |         result += tmp * val1 * val2
 28 |         for s in 0..<P.shape[2]:
 29 |           dA[f2, j1, s] += val1 * val2 * P[f1, j2, s]
 30 |           dA[f1, j2, s] += val1 * val2 * P[f2, j1, s]
 31 | 
 32 | 
 33 | proc step*[L](self: SGD[L], X: RowDataset, P: var Tensor, w: var Vector,
 34 |               intercept: var float64, i: int, yi: float64, dA: var Tensor,
 35 |               scaling_P, scaling_w: var float64,
 36 |               scalings_P, scalings_w: var Vector, nAugments: int,
 37 |               fitLinear, fitIntercept: bool, runningLoss, viol: var float64) =
 38 |   # synchronize (lazily update) and compute prediction/gradient
 39 |   lazilyUpdate(X, i, P, w, scaling_P, scaling_w, scalings_P, scalings_w,
 40 |                fitLinear)
 41 |   let yPred = predictWithGrad(X, i, P, w, intercept, dA)
 42 |   runningLoss += self.loss.loss(yi, yPred)
 43 |   # nField * nComponents * nnz
 44 |   viol += update(self, X, P, w, intercept, dA, i, yi, yPred, scaling_P,
 45 |                  scaling_w, scalings_P, scalings_w, nAugments, fitLinear,
 46 |                  fitIntercept)
 47 | 
 48 | 
 49 | proc fit*[L](self: SGD[L], X: RowFieldDataset, y: seq[float64],
 50 |              ffm: FieldAwareFactorizationMachine,
 51 |              callback: (SGD[L], FieldAwareFactorizationMachine)->void = nil) =
 52 |   ## Fits the factorization machine on X and y by stochastic gradient descent.
 53 |   ffm.init(X)
 54 | 
 55 |   let y = ffm.checkTarget(y)
 56 |   let
 57 |     nSamples = X.nSamples
 58 |     fitLinear = ffm.fitLinear
 59 |     fitIntercept = ffm.fitIntercept
 60 |   var
 61 |     scaling_w = 1.0
 62 |     scaling_P = 1.0
 63 |     scalings_w = ones([len(ffm.w)])
 64 |     scalings_P = ones([ffm.P.shape[1]])
 65 |     indices = toSeq(0..<nSamples)
 66 |     dA: Tensor = zeros(ffm.P.shape)
 67 |     isConverged = false
 68 | 
 69 |   if not ffm.warmstart:
 70 |     self.init()
 71 | 
 72 |   for epoch in 0..<self.maxIter:
 73 |     var viol = 0.0
 74 |     var runningLoss = 0.0
 75 |     if X.nCached == X.nSamples and self.shuffle: shuffle(indices)
 76 | 
 77 |     for i in indices:
 78 |       step(self, X, ffm.P, ffm.w, ffm.intercept, i, y[i], dA, scaling_P,
 79 |            scaling_w, scalings_P, scalings_w, 0, fit_linear, fitIntercept,
 80 |            runningLoss, viol)
 81 | 
 82 |       if self.nCalls > 0 and self.it mod self.nCalls == 0:
 83 |         if not callback.isNil:
 84 |           finalize(ffm.P, ffm.w, scaling_P, scaling_w, scalings_P, scalings_w,
 85 |                    fitLinear)
 86 |           callback(self, ffm)
 87 |       inc(self.it)
 88 | 
 89 |     # one epoch done
 90 |     runningLoss /= float(nSamples)
 91 |     if not callback.isNil and self.nCalls <= 0:
 92 |       finalize(ffm.P, ffm.w, scaling_P, scaling_w, scalings_P, scalings_w,
 93 |                fitLinear)
 94 |       callback(self, ffm)
 95 |     let isContinue = stoppingCriterion(
 96 |       ffm.P, ffm.w, ffm.intercept, self.alpha0, self.alpha, self.beta,
 97 |       runningLoss, viol, self.tol, self.verbose, epoch, self.maxIter,
 98 |       isConverged)
 99 |     if not isContinue: break
100 | 
101 |   if not isConverged and self.verbose > 0:
102 |     echo("Objective did not converge. Increase maxIter.")
103 | 
104 |   # finalize
105 |   finalize(ffm.P, ffm.w, scaling_P, scaling_w, scalings_P, scalings_w,
106 |            fitLinear)


--------------------------------------------------------------------------------
/src/nimfm/optimizer/sgd_ffm_multi.nim:
--------------------------------------------------------------------------------
  1 | import ../dataset, ../tensor/tensor
  2 | import ../model/field_aware_factorization_machine
  3 | from ../model/fm_base import checkTarget
  4 | import sequtils, math, random, sugar, threadpool
  5 | from sgd import
  6 |   SGD, newSGD, finalize, stoppingCriterion, init, SchedulingKind
  7 | export sgd.SGD, sgd.newSGD, sgd.SchedulingKind
  8 | from sgd_multi import nThreads
  9 | from sgd_ffm import step
 10 | 
 11 | var 
 12 |   dA {.threadvar.}:  Tensor # zeros(P.shape)
 13 | 
 14 | 
 15 | proc epochSub[L](self: ptr SGD[L], X: ptr RowDataset, P: ptr Tensor,
 16 |                  w: ptr Vector, intercept: ptr float64,
 17 |                  y: ptr Vector, scaling_P, scaling_w: ptr float64,
 18 |                  scalings_P, scalings_w: ptr Vector,
 19 |                  nAugments: int, fitLinear, fitIntercept: bool,
 20 |                  indices: ptr seq[int], s, t: int): (float64, float64) =
 21 |   if dA.isNil or dA.shape != P[].shape:
 22 |     dA = zeros(P[].shape)
 23 |   for ii in s..<t:
 24 |     let i = indices[ii]
 25 |     step(self[], X[], P[], w[], intercept[], i, y[i], dA, scaling_P[],
 26 |          scaling_w[], scalings_P[], scalings_w[], nAugments,
 27 |          fit_linear, fitIntercept, result[0], result[1])
 28 |     inc(self[].it)
 29 | 
 30 | 
 31 | proc fit*[L](self: SGD[L], X: RowFieldDataset, y: seq[float64],
 32 |              ffm: FieldAwareFactorizationMachine, maxThreads: int, 
 33 |              callback: (SGD[L], FieldAwareFactorizationMachine)->void = nil) =
 34 |   ## Fits the factorization machine on X and y by stochastic gradient descent.
 35 |   ffm.init(X)
 36 | 
 37 |   let y = ffm.checkTarget(y)
 38 |   let
 39 |     nSamples = X.nSamples
 40 |     fitLinear = ffm.fitLinear
 41 |     fitIntercept = ffm.fitIntercept
 42 |     nThreads = nThreads(maxThreads)
 43 |   var
 44 |     scaling_w = 1.0
 45 |     scaling_P = 1.0
 46 |     scalings_w = ones([len(ffm.w)])
 47 |     scalings_P = ones([ffm.P.shape[1]])
 48 |     indices = toSeq(0..<nSamples)
 49 |     isConverged = false
 50 |     responses = newSeq[FlowVar[(float64, float64)]](nThreads)
 51 |     borders = newSeqWith(nThreads+1, 0)
 52 | 
 53 |   if not ffm.warmstart:
 54 |     self.init()
 55 |   
 56 |   for th in 0..<nThreads:
 57 |     borders[th+1] = borders[th] + nSamples div nThreads
 58 |   borders[^1] = nSamples
 59 | 
 60 |   for epoch in 0..<self.maxIter:
 61 |     var viol = 0.0
 62 |     var runningLoss = 0.0
 63 |     if X.nCached == X.nSamples and self.shuffle: shuffle(indices)
 64 |     var nRest = nSamples
 65 | 
 66 |     while nRest > 0:
 67 |       X.readCache(nSamples-nRest)
 68 |       borders[0] = nSamples - nRest
 69 |       for th in 0..<nThreads:
 70 |         borders[th+1] = borders[th] + X.nCached div nThreads
 71 |       borders[^1] = borders[0] + X.nCached
 72 |       
 73 |       # async parallel update!
 74 |       for th in 0..<nThreads:
 75 |         responses[th] = spawn epochSub(
 76 |           unsafeAddr(self), unsafeAddr(X), addr(ffm.P), addr(ffm.w),
 77 |           addr(ffm.intercept), unsafeAddr(y), addr(scaling_P),addr(scaling_w),
 78 |           addr(scalings_P), addr(scalings_w), ffm.nAugments, fitLinear,
 79 |           fitIntercept, addr(indices), borders[th], borders[th+1])
 80 | 
 81 |       dec(nRest, X.nCached)
 82 |       for resp in responses:
 83 |         let ret = ^resp
 84 |         runningLoss += ret[0]
 85 |         viol += ret[1]
 86 | 
 87 |     # one epoch done
 88 |     runningLoss /= float(nSamples)
 89 |     if not callback.isNil:
 90 |       finalize(ffm.P, ffm.w, scaling_P, scaling_w, scalings_P, scalings_w,
 91 |                fitLinear)
 92 |       callback(self, ffm)
 93 | 
 94 |     let isContinue = stoppingCriterion(
 95 |       ffm.P, ffm.w, ffm.intercept, self.alpha0, self.alpha, self.beta,
 96 |       runningLoss, viol, self.tol, self.verbose, epoch, self.maxIter,
 97 |       isConverged)
 98 |     if not isContinue: break
 99 | 
100 |   if not isConverged and self.verbose > 0:
101 |     echo("Objective did not converge. Increase maxIter.")
102 |   # finalize
103 |   finalize(ffm.P, ffm.w, scaling_P, scaling_w, scalings_P, scalings_w, fitLinear)


--------------------------------------------------------------------------------
/src/nimfm/optimizer/sgd_multi.nim:
--------------------------------------------------------------------------------
  1 | import ../dataset, ../tensor/tensor, ../model/factorization_machine
  2 | from ../model/fm_base import checkTarget
  3 | import sequtils, math, random, sugar, threadpool, cpuinfo
  4 | from sgd import
  5 |   SGD, init, stoppingCriterion, transpose, finalize, step, SchedulingKind
  6 | export sgd.SGD, sgd.newSGD, sgd.SchedulingKind
  7 | 
  8 | var 
  9 |   A {.threadvar.}: Matrix # zeros([nComponents, degree+1])
 10 |   dA {.threadvar.}:  Tensor # zeros(P.shape)
 11 | 
 12 | 
 13 | proc nThreads*(maxThreads: int):int =
 14 |   if maxThreads < 0:
 15 |     result = countProcessors() * 2
 16 |   else:
 17 |     result = maxThreads
 18 |   result = min(result, MaxThreadPoolSize)
 19 | 
 20 | 
 21 | proc epochSub[L](self: ptr SGD[L], X: ptr RowDataset, P: ptr Tensor,
 22 |                  w: ptr Vector, intercept: ptr float64,
 23 |                  y: ptr Vector, scaling_P, scaling_w: ptr float64,
 24 |                  scalings_P, scalings_w: ptr Vector,
 25 |                  nComponents, degree, nAugments: int,
 26 |                  fitLinear, fitIntercept: bool,
 27 |                  indices: ptr seq[int], s, t: int): (float64, float64) =
 28 |   if A.isNil or A.shape != [nComponents, degree+1]:
 29 |     A = zeros([nComponents, degree+1])
 30 |   if dA.isNil or dA.shape != P[].shape:
 31 |     dA = zeros(P[].shape)
 32 |   for ii in s..<t:
 33 |     let i = indices[ii]
 34 |     step(self[], X[], P[], w[], intercept[], i, y[i], A, dA, scaling_P[],
 35 |          scaling_w[], scalings_P[], scalings_w[], degree, nAugments,
 36 |          fit_linear, fitIntercept, result[0], result[1])
 37 |     inc(self[].it)
 38 | 
 39 | 
 40 | proc fit*[L](self: SGD[L], X: RowDataset, y: seq[float64],
 41 |              fm: FactorizationMachine, maxThreads: int,
 42 |              callback: (SGD[L], FactorizationMachine)->void = nil) =
 43 |   ## Fits the factorization machine on X and y by stochastic gradient descent.
 44 |   fm.init(X)
 45 | 
 46 |   let y = fm.checkTarget(y)
 47 |   let
 48 |     nSamples = X.nSamples
 49 |     nComponents = fm.P.shape[1]
 50 |     nOrders = fm.P.shape[0]
 51 |     degree = fm.degree
 52 |     nAugments = fm.nAugments
 53 |     fitLinear = fm.fitLinear
 54 |     fitIntercept = fm.fitIntercept
 55 |     nThreads = nThreads(maxThreads)
 56 |   var
 57 |     scaling_w = 1.0
 58 |     scaling_P = 1.0
 59 |     scalings_w = ones([len(fm.w)])
 60 |     scalings_P = ones([fm.P.shape[2]])
 61 |     indices = toSeq(0..<nSamples)
 62 |     P: Tensor = zeros([nOrders, fm.P.shape[2], nComponents])
 63 |     isConverged = false
 64 |     responses = newSeq[FlowVar[(float64, float64)]](nThreads)
 65 |     borders = newSeqWith(nThreads+1, 0)
 66 | 
 67 |   if not fm.warmstart:
 68 |     self.init()
 69 |   
 70 |   for th in 0..<nThreads:
 71 |     borders[th+1] = borders[th] + nSamples div nThreads
 72 |   borders[^1] = nSamples
 73 | 
 74 |   # copy for fast training
 75 |   transpose(P, fm.P)
 76 | 
 77 |   for epoch in 0..<self.maxIter:
 78 |     var viol = 0.0
 79 |     var runningLoss = 0.0
 80 |     if X.nCached == X.nSamples and self.shuffle: shuffle(indices)
 81 |     var nRest = nSamples
 82 | 
 83 |     while nRest > 0:
 84 |       X.readCache(nSamples-nRest)
 85 |       borders[0] = nSamples - nRest
 86 |       for th in 0..<nThreads:
 87 |         borders[th+1] = borders[th] + X.nCached div nThreads
 88 |       borders[^1] = borders[0] + X.nCached
 89 |       
 90 |       # async parallel update!
 91 |       for th in 0..<nThreads:
 92 |         responses[th] = spawn epochSub(unsafeAddr(self), unsafeAddr(X), addr(P),
 93 |           addr(fm.w), addr(fm.intercept), unsafeAddr(y), addr(scaling_P), addr(scaling_w),
 94 |           addr(scalings_P), addr(scalings_w), nComponents, degree, nAugments,
 95 |           fitLinear, fitIntercept, addr(indices), borders[th], borders[th+1])
 96 | 
 97 |       dec(nRest, X.nCached)
 98 |       for resp in responses:
 99 |         let ret = ^resp
100 |         runningLoss += ret[0]
101 |         viol += ret[1]
102 | 
103 |     # one epoch done
104 |     runningLoss /= float(nSamples)
105 |     if not callback.isNil:
106 |       finalize(P, fm.w, scaling_P, scaling_w, scalings_P, scalings_w,
107 |                fitLinear)
108 |       transpose(fm.P, P)
109 |       callback(self, fm)
110 | 
111 |     let isContinue = stoppingCriterion(
112 |       P, fm.w, fm.intercept, self.alpha0, self.alpha, self.beta, runningLoss,
113 |       viol, self.tol, self.verbose, epoch, self.maxIter, isConverged)
114 |     if not isContinue: break
115 | 
116 |   if not isConverged and self.verbose > 0:
117 |     echo("Objective did not converge. Increase maxIter.")
118 |   # finalize
119 |   finalize(P, fm.w, scaling_P, scaling_w, scalings_P, scalings_w, fitLinear)
120 |   transpose(fm.P, P)


--------------------------------------------------------------------------------
/src/nimfm/optimizer/utils.nim:
--------------------------------------------------------------------------------
 1 | import ../tensor/tensor, ../model/params, ../loss
 2 | import math, strformat, strutils
 3 | 
 4 | 
 5 | proc computeViol*(P, old_P: Tensor, w, old_w: Vector,
 6 |                   intercept, intercept_old: float64,
 7 |                   fitLinear, fitIntercept: bool): float64 {.inline.} =
 8 |   result = 0.0
 9 |   for order in 0..<P.shape[0]:
10 |     for j in 0..<P.shape[1]:
11 |       for s in 0..<P.shape[2]:
12 |         result += (P[order, j, s] - old_P[order, j, s])^2
13 |   if fitLinear:
14 |     for j in 0..<len(w):
15 |       result += (w[j] - old_w[j])^2
16 |   if fitIntercept:
17 |     result += (intercept - intercept_old)^2
18 | 
19 | 
20 | proc computeViol*(params, old_params: Params): float64 =
21 |   result = computeViol(params.P, old_params.P, params.w, old_params.w,
22 |                        params.intercept, old_params.intercept,
23 |                        params.fitLinear, params.fitIntercept)
24 | 
25 | 
26 | proc echoHeader*(maxIter: int, viol=true, loss=true, regul=true) =
27 |   let epoch = alignLeft("Epoch", len($maxIter))
28 |   
29 |   stdout.write(fmt"{epoch}")
30 |   if viol:
31 |     let viol = alignLeft("Violation", 10)
32 |     stdout.write(fmt"   {viol}")
33 |   if loss:
34 |     let loss = alignLeft("Loss", 10)
35 |     stdout.write(fmt"   {loss}")
36 |   if regul:
37 |     stdout.write(fmt"   Regularization")
38 | 
39 |   stdout.write("\n")
40 |   stdout.flushFile()
41 | 
42 | 
43 | proc echoInfo*(iter, maxIter: int, viol, loss, regul: float64) =
44 |   let epoch = alignLeft($iter, max(5, len($maxIter)))
45 |   stdout.write(fmt"{epoch}")
46 |   if viol >= 0:
47 |     stdout.write(fmt"   {viol:<10.4e}")
48 |   if loss >= 0:
49 |     stdout.write(fmt"   {loss:<10.4e}")
50 |   if regul >= 0:
51 |     stdout.write(fmt"   {regul:<10.4e}")
52 |   stdout.write("\n")
53 |   stdout.flushFile()
54 | 
55 | 
56 | proc regularization*[T](P: T, w: Vector, intercept: float64,
57 |                         alpha0, alpha, beta: float64): float64 =
58 |   result = 0.5 * alpha0 * intercept^2 + 0.5 * alpha * norm(w, 2)^2
59 |   result += 0.5 * beta * norm(P, 2)^2
60 | 
61 | 
62 | proc regularization*(params: Params, alpha0, alpha, beta: float64): float64 =
63 |   result = regularization(params.P, params.w, params.intercept,
64 |                           alpha0, alpha, beta)
65 | 
66 | 
67 | proc objective*[L, T](y: seq[float64], yPred: Vector, P: T, w: Vector,
68 |                       intercept: float64, alpha0, alpha, beta: float64,
69 |                       loss: L): (float64, float64) =
70 |   result[0] = 0.0
71 |   let nSamples = len(y)
72 |   for i in 0..<nSamples:
73 |     result[0] += loss.loss(y[i], yPred[i])
74 |   result[0] /= float(nSamples)
75 |   result[1] = regularization(P, w, intercept, alpha0, alpha, beta)
76 | 
77 | 
78 | proc objective*[L](y: seq[float64], yPred: Vector, params: Params,
79 |                     alpha0, alpha, beta: float64,
80 |                     loss: L): (float64, float64) =
81 |   result = objective(y, yPred, params.P, params.w, params.intercept,
82 |                      alpha0, alpha, beta, loss)


--------------------------------------------------------------------------------
/src/nimfm/regularizer.nim:
--------------------------------------------------------------------------------
1 | import ./regularizer/regularizers
2 | export regularizers


--------------------------------------------------------------------------------
/src/nimfm/regularizer/l1.nim:
--------------------------------------------------------------------------------
  1 | import ../tensor/tensor, ../dataset, utils
  2 | 
  3 | 
  4 | type
  5 |   L1* = ref object
  6 |     scaling: float64
  7 |     scalings: Vector
  8 |     threshold: float64
  9 |     thresholds: Vector
 10 |     norms: Vector
 11 |     value*: float64
 12 | 
 13 | 
 14 | proc newL1*(): L1 = L1()
 15 | 
 16 | 
 17 | # P.shape: (nFeatures, nComponents)
 18 | proc eval*(self: L1, P: Matrix): float64 = norm(P, 1)
 19 | 
 20 | 
 21 | proc eval*(self: L1, P: Matrix, degree: int): float64 = norm(P, 1)
 22 | 
 23 | 
 24 | # for pcd
 25 | proc prox*(self: L1, psj, update, lam: float64,
 26 |            degree, s, j: int): float64 {.inline} =
 27 |   result = softthreshold(psj-update, lam)
 28 | 
 29 | 
 30 | # for pbcd
 31 | proc prox*(self: L1, pj: var Vector, lam: float64, degree, j: int) {.inline.} =
 32 |   for i in 0..<len(pj):
 33 |     pj[i] = softthreshold(pj[i], lam)
 34 | 
 35 | 
 36 | # for psgd/pgd/minibatch-psgd
 37 | # P.shape: [nFeatures, nComponents]
 38 | proc prox*(self: L1, P: var Matrix, lam: float64, degree: int) {.inline.} =
 39 |   for j in 0..<len(P):
 40 |     for s in 0..<P.shape[1]:
 41 |       P[j, s] = softthreshold(P[j, s], lam)
 42 | 
 43 | 
 44 | proc initCD*(self: L1, degree, nFeatures, nComponents: int) = discard
 45 | 
 46 | 
 47 | proc initSGD*(self: L1, degree, nFeatures, nComponents: int) =
 48 |   self.scalings = ones([nFeatures])
 49 |   self.scaling = 1.0
 50 |   self.thresholds = zeros([nFeatures])
 51 |   self.threshold = 0.0
 52 | 
 53 | 
 54 | proc initBCD*(self: L1, degree, nFeatures, nComponents: int) =
 55 |   self.norms = zeros([nFeatures])
 56 | 
 57 | # for pcd
 58 | proc computeCacheCDAll*(self: L1, P: Matrix, degree: int) = discard
 59 | proc computeCacheCD*(self: L1, P: Matrix, degree, s: int) = discard
 60 | proc updateCacheCD*(self: L1, P: Matrix, degree, s, j: int) = discard
 61 | 
 62 | 
 63 | # for pbcd
 64 | # P.shape: (nFeatures, nComponents)
 65 | proc computeCacheBCD*(self: L1, P: Matrix, degree: int) =
 66 |   self.value = 0.0
 67 |   for j in 0..<P.shape[0]:
 68 |     self.norms[j] = norm(P[j], 1)
 69 |     self.value += self.norms[j]
 70 | 
 71 | 
 72 | proc updateCacheBCD*(self: L1, P: Matrix, degree, j: int) =
 73 |   self.value -= self.norms[j]
 74 |   self.norms[j] = norm(P[j], 1)
 75 |   self.value += self.norms[j]
 76 | 
 77 | 
 78 | # for psgd
 79 | # P.shape: (nFeatures, nComponents)
 80 | proc lazyUpdate*(self: L1, P: var Matrix, beta, gamma: float64,
 81 |                  degree: int, X: RowDataset, i: int) {.inline.} =
 82 |   for (j, _) in X.getRow(i):
 83 |     for s in 0..<P.shape[1]:
 84 |       P[j, s] *= self.scaling / self.scalings[j]
 85 |       P[j, s] = softthreshold(
 86 |         P[j, s], gamma * self.scaling * (self.threshold - self.thresholds[j]))
 87 | 
 88 | 
 89 | proc lazyUpdateFinal*(self:L1, P: var Tensor, beta, gamma: float64,
 90 |                       degree: int) {.inline.} =
 91 |   for order in 0..<P.shape[0]:
 92 |     for j in 0..<P.shape[1]:
 93 |       for s in 0..<P.shape[2]:
 94 |         P[order, j, s] *= self.scaling / self.scalings[j]
 95 |         P[order, j, s] = softthreshold(
 96 |           P[order, j, s], gamma * self.scaling * (self.threshold - self.thresholds[j]))
 97 |   self.scalings[0..^1] = 1.0
 98 |   self.scaling = 1.0
 99 |   self.thresholds[0..^1] = 0.0
100 |   self.threshold = 0.0
101 | 
102 | 
103 | proc updateCacheSGD*(self: L1, eta, beta, gamma: float64,
104 |                      degree: int, X: RowDataset, i: int) {.inline.} =
105 |   let eta_P_scaled = eta / (1+eta*beta)
106 |   self.threshold += eta_P_scaled / self.scaling
107 |   self.scaling *= (1 - eta_P_scaled * beta)
108 |   for (j, _) in X.getRow(i):
109 |     self.scalings[j] = self.scaling
110 |     self.thresholds[j] = self.threshold
111 |   
112 | 
113 | proc resetCacheSGD*(self: L1, P: var Tensor, gamma: float64,
114 |                     degree: int) {.inline.} = 
115 |   if self.scaling < 1e-8:
116 |     for order in 0..<P.shape[0]:
117 |       for j in 0..<P.shape[1]:
118 |         for s in 0..<P.shape[2]:
119 |           P[order, j, s] /= self.scalings[j]
120 |           P[order, j, s] = softthreshold(
121 |             P[order, j, s], gamma * self.threshold - self.thresholds[j])
122 |           P[order, j, s] *= self.threshold
123 |     self.threshold = 0.0
124 |     self.scaling = 1.0
125 |     self.thresholds[0.. ^1] = 0.0
126 |     self.scalings[0.. ^1] = 1.0
127 | 
128 | 
129 | proc step*(self: L1, P: var Matrix, dA: Matrix, 
130 |            dL, beta, gamma, eta_P_scaled: float64, degree: int,
131 |            indices: iterator) {.inline.} =
132 |   let nComponents = P.shape[1]
133 |   for j in indices():
134 |     for s in 0..<nComponents:
135 |       let update = eta_P_scaled * (dL*dA[j, s] + beta*P[j, s])
136 |       P[j, s] = softthreshold(P[j, s]-update, gamma*eta_P_scaled)
137 | 


--------------------------------------------------------------------------------
/src/nimfm/regularizer/l21.nim:
--------------------------------------------------------------------------------
  1 | import ../tensor/tensor, ../dataset
  2 | 
  3 | 
  4 | type
  5 |   L21* = ref object
  6 |     norms: Vector
  7 |     value*: float64
  8 |     scaling: float64
  9 |     scalings: Vector
 10 |     threshold: float64
 11 |     thresholds: Vector
 12 | 
 13 | 
 14 | proc newL21*(): L21 = L21()
 15 | 
 16 | 
 17 | proc eval*(self: L21, P: Matrix): float64 =
 18 |   result = norm(norm(P, 2, axis=1), 1)
 19 | 
 20 | 
 21 | proc eval*(self: L21, P: Matrix, degree: int): float64 = self.eval(P)
 22 | 
 23 | 
 24 | # for pbcd
 25 | proc prox*(self: L21, pj: var Vector, lam: float64, degree, j: int) {.inline.} =
 26 |   let norm = norm(pj, 2)
 27 |   if norm > lam: pj *= (1.0 - lam / norm)
 28 |   else: 
 29 |     pj[0.. ^1] = 0.0
 30 | 
 31 | 
 32 | # for psgd/pgd/minibatch-psgd
 33 | proc prox*(self: L21, P: var Matrix, gamma: float64, degree: int) {.inline.} =
 34 |   for j in 0..<len(P):
 35 |     self.prox(P[j], gamma, degree, j)
 36 | 
 37 | 
 38 | proc initSGD*(self: L21, degree, nFeatures, nComponents: int) =
 39 |   self.scalings = ones([nFeatures])
 40 |   self.thresholds = zeros([nFeatures])
 41 |   self.scaling = 1.0
 42 |   self.threshold = 0.0
 43 | 
 44 | 
 45 | proc initBCD*(self: L21, degree, nFeatures, nComponents: int) =
 46 |   self.norms = zeros([nFeatures])
 47 | 
 48 | 
 49 | # for pbcd
 50 | # P.shape: (nFeatures, nComponents)
 51 | proc computeCacheBCD*(self: L21, P: Matrix, degree: int) =
 52 |   self.value = 0.0
 53 |   for j in 0..<P.shape[0]:
 54 |     self.norms[j] = norm(P[j], 2)
 55 |     self.value += self.norms[j]
 56 | 
 57 | 
 58 | proc updateCacheBCD*(self: L21, P: Matrix, degree, j: int) =
 59 |   self.value -= self.norms[j]
 60 |   self.norms[j] = norm(P[j], 2)
 61 |   self.value += self.norms[j]
 62 | 
 63 | 
 64 | proc lazyUpdate*(self: L21, P: var Matrix, beta, gamma: float64,
 65 |                  degree: int, X: RowDataset, i: int) {.inline.} =
 66 |   for (j, _) in X.getRow(i):
 67 |     let threshold = (self.threshold - self.thresholds[j]) / self.scalings[j]
 68 |     self.prox(P[j], threshold*gamma, degree, j)
 69 |     P[j] *= self.scaling / self.scalings[j] 
 70 | 
 71 | 
 72 | proc lazyUpdateFinal*(self:L21, P: var Tensor, beta, gamma: float64,
 73 |                       degree: int) {.inline.} =
 74 |   for order in 0..<P.shape[0]:
 75 |     for j in 0..<P.shape[1]:
 76 |       let threshold = (self.threshold - self.thresholds[j]) /  self.scalings[j]
 77 |       self.prox(P[order, j], threshold*gamma, degree, j)
 78 |       P[order, j] *= self.scaling / self.scalings[j] 
 79 | 
 80 | 
 81 | proc updateCacheSGD*(self: L21, eta, beta, gamma: float64,
 82 |                      degree: int, X: RowDataset, i: int) {.inline.} =
 83 |   self.threshold += eta * self.scaling
 84 |   self.scaling /= (1 + eta * beta)
 85 |   for (j, _) in X.getRow(i):
 86 |     self.scalings[j] = self.scaling
 87 |     self.thresholds[j] = self.threshold
 88 |   
 89 | 
 90 | proc resetCacheSGD*(self: L21, P: var Tensor, gamma: float64,
 91 |                     degree: int) {.inline.} = 
 92 |   if self.scaling < 1e-8: # to avoid numerical error
 93 |     for order in 0..<P.shape[0]:
 94 |       for j in 0..<P.shape[1]:
 95 |         let threshold = (self.threshold - self.thresholds[j]) / self.scalings[j]
 96 |         self.prox(P[order, j], threshold*gamma, degree, j)
 97 |         P[order, j] *= self.scaling / self.scalings[j] 
 98 |     self.threshold = 0.0
 99 |     self.scaling = 1.0
100 |     self.thresholds[0.. ^1] = 0.0
101 |     self.scalings[0.. ^1] = 1.0
102 | 
103 | 
104 | # for psgd
105 | proc step*(self: L21, P: var Matrix, dA: Matrix, 
106 |            dL, beta, gamma, eta_P_scaled: float64, degree: int,
107 |            indices: iterator) {.inline.} =
108 |   let nComponents = P.shape[1]
109 |   for j in indices():
110 |     for s in 0..<nComponents:
111 |       let update = eta_P_scaled * (dL*dA[j, s] + beta*P[j, s])
112 |       P[j, s] -= update
113 |     self.prox(P[j], eta_P_scaled*gamma, degree, j)


--------------------------------------------------------------------------------
/src/nimfm/regularizer/omegacs.nim:
--------------------------------------------------------------------------------
 1 | import ../tensor/tensor
 2 | 
 3 | 
 4 | type
 5 |   OmegaCS* = ref object
 6 |     norms: Vector
 7 |     value*: float64
 8 |     dcache: Vector
 9 |     cache: Vector
10 | 
11 | 
12 | proc newOmegaCS*(): OmegaCS = new(result)
13 | 
14 | 
15 | proc eval*(self: OmegaCS, P: Matrix, degree: int): float64 =
16 |   let nFeatures = P.shape[0]
17 |   if len(self.norms) < nFeatures:
18 |     self.norms.setLen(nFeatures)
19 |   norm(P, self.norms, 2, axis=1)
20 |  
21 |   if len(self.cache) < degree+1:
22 |     self.cache.setLen(degree+1)
23 |   self.cache[0..^1] = 0.0
24 |   self.cache[0] = 1.0
25 |   for deg in 0..<degree: # compute (degree-1)-ANOVA kernel
26 |     for j in 0..<nFeatures:
27 |       self.cache[degree-deg] += self.cache[degree-deg-1]*self.norms[j]
28 |   result = self.cache[degree]
29 | 
30 | 
31 | proc initBCD*(self: OmegaCS, degree, nFeatures, nComponents: int) =
32 |   self.norms = zeros([nFeatures])
33 |   self.cache = zeros([degree+1])
34 |   self.dcache = zeros([degree+1])
35 |   self.dcache[1] = 1.0
36 | 
37 | # for pbcd
38 | # P.shape: (nFeatures, nComponents)
39 | proc recomputeCacheBCD(self: OmegaCS, degree: int) =
40 |   self.cache[0.. ^1] = 0.0
41 |   self.cache[0] = 1.0
42 |   for j in 0..<len(self.norms):
43 |     for deg in 0..<degree:
44 |       self.cache[degree-deg] += self.cache[degree-deg-1] * self.norms[j]
45 |   self.value = self.cache[degree]
46 | 
47 | 
48 | proc computeCacheBCD*(self: OmegaCS, P: Matrix, degree: int) =
49 |   for j in 0..<P.shape[0]:
50 |     self.norms[j] = norm(P[j], 2)
51 |   recomputeCacheBCD(self, degree)
52 | 
53 | 
54 | proc updateCacheBCD*(self: OmegaCS, P: Matrix, degree, j: int) =
55 |   let norm = norm(P[j], 2)
56 |   for deg in 1..<degree+1:
57 |     self.cache[deg] += self.dcache[deg] * norm 
58 |     self.cache[deg] -= self.dcache[deg] * self.norms[j]
59 |   self.norms[j] = norm
60 |   if min(self.cache) < 0:
61 |     recomputeCacheBCD(self, degree)
62 |   self.value = self.cache[degree]
63 | 
64 | 
65 | proc prox*(self: OmegaCS, pj: var Vector, lam: float64, degree, j: int) {.inline.} =
66 |   let norm = norm(pj, 2)
67 |   for deg in 2..<degree+1:
68 |     self.dcache[deg] = self.cache[deg-1] - self.dcache[deg-1]*self.norms[j]
69 | 
70 |   # recompute since a numerical error occurs
71 |   if min(self.dcache) < 0:
72 |     self.norms[j] = 0.0
73 |     recomputeCacheBCD(self, degree-1)
74 |     self.dcache[0] = 0.0
75 |     self.dcache[1] = 1.0
76 |     for deg in 2..<degree+1:
77 |       self.dcache[deg] = self.cache[deg-1]
78 |     self.norms[j] = norm
79 |     recomputeCacheBCD(self, degree)
80 | 
81 |   # prox!
82 |   if norm > lam*self.dcache[degree]: 
83 |     pj *= 1.0 - lam*self.dcache[degree] / norm
84 |   else: 
85 |     pj[0..^1] = 0.0
86 | 


--------------------------------------------------------------------------------
/src/nimfm/regularizer/omegati.nim:
--------------------------------------------------------------------------------
 1 | import ../tensor/tensor, utils, math
 2 | 
 3 | 
 4 | type
 5 |   OmegaTI* = ref object
 6 |     dcache: Vector
 7 |     cache: Vector
 8 |     absp: Vector
 9 |     p: Vector
10 |     value*: float64
11 | 
12 | 
13 | proc newOmegaTI*(): OmegaTI =
14 |   new(result)
15 | 
16 | 
17 | proc eval*(self: OmegaTI, P: Matrix, degree: int): float64 =
18 |   let nFeatures = P.shape[0]
19 |   let nComponents = P.shape[1]
20 |   var cache = zeros([degree+1, nComponents])
21 |   cache[0, 0..^1] = 1.0
22 |   for j in 0..<nFeatures:
23 |     for deg in 0..<degree:
24 |       for s in 0..<nComponents:
25 |         cache[degree-deg, s] += cache[degree-deg-1, s]*abs(P[j, s])
26 |   result = sum(cache[degree])
27 | 
28 | 
29 | proc initCD*(self: OmegaTI, degree, nFeatures, nComponents: int) =
30 |   self.dcache = zeros([degree+1])
31 |   self.absp = zeros([nFeatures])
32 |   self.cache = zeros([degree+1])
33 | 
34 | 
35 | # for pcd
36 | # P.shape: (nComponents, nFeatures)
37 | proc computeCacheCDAll*(self: OmegaTI, P: Matrix, degree: int) = discard
38 | 
39 | 
40 | proc computeCacheCD*(self: OmegaTI, P: Matrix, degree, s: int) =
41 |   let nFeatures = P.shape[1]
42 |   for j in 0..<nFeatures:
43 |     self.absp[j] = abs(P[s, j])
44 | 
45 |   self.cache[1..^1] = 0.0
46 |   self.cache[0] = 1.0
47 |   self.dcache[0..^1] = 0.0
48 |   self.dcache[1] = 1.0
49 |   for j in 0..<nFeatures:
50 |     for deg in 0..<degree:
51 |       self.cache[degree-deg] += self.cache[degree-deg-1] * abs(P[s, j])
52 |  
53 | 
54 | proc updateCacheCD*(self: OmegaTI, P: Matrix, degree, s, j: int) =
55 |   for deg in 1..<degree:
56 |     self.cache[deg] = self.dcache[deg+1] + self.dcache[deg] * abs(P[s, j])
57 | 
58 | 
59 | # for pcd
60 | proc prox*(self: OmegaTI, psj, update, lam: float64, 
61 |            degree, s, j: int): float64 {.inline.} =
62 |   for deg in 2..<degree+1: # dcache[deg] = derivative of cache[deg]
63 |     self.dcache[deg] = self.cache[deg-1] - self.dcache[deg-1]*self.absp[j]
64 |     if self.dcache[deg] < 0:
65 |       self.dcache[deg] = 0.0
66 |   result = softthreshold(psj-update, lam*self.dcache[degree])
67 | 


--------------------------------------------------------------------------------
/src/nimfm/regularizer/regularizers.nim:
--------------------------------------------------------------------------------
1 | import l1, l21, squaredl12, squaredl21, omegati, omegacs
2 | export l1, l21, squaredl12, squaredl21, omegati, omegacs
3 | 


--------------------------------------------------------------------------------
/src/nimfm/regularizer/squaredl21.nim:
--------------------------------------------------------------------------------
  1 | import ../tensor/tensor, ../dataset
  2 | from squaredl12 import proxSquaredL12
  3 | import sequtils, math
  4 | 
  5 | 
  6 | type
  7 |   SquaredL21* = ref object
  8 |     norms: Vector
  9 |     candidates: seq[int]
 10 |     value*: float64
 11 |     cache: float64
 12 |     transpose: bool
 13 | 
 14 | 
 15 | proc newSquaredL21*(transpose=false): SquaredL21 =
 16 |   new(result)
 17 |   result.transpose = transpose
 18 | 
 19 | 
 20 | proc eval*(self: SquaredL21, P: Matrix): float64 =
 21 |   let axis = if self.transpose: 0 else: 1
 22 |   result = norm(norm(P, 2, axis=axis), 1)^2
 23 | 
 24 | 
 25 | proc eval*(self: SquaredL21, P: Matrix, degree: int): float64 =
 26 |   if degree != 2:
 27 |     raise newException(ValueError, "SquaredL21 supports only degree=2.")
 28 |   result = self.eval(P)
 29 | 
 30 | 
 31 | # for bcd
 32 | proc prox*(self: SquaredL21, pj: var Vector, lam: float64,
 33 |            degree, j: int) {.inline.} =
 34 |   for s in 0..<len(pj):
 35 |     pj[s] /= (1+2*lam)
 36 |   let norm = norm(pj, 2)
 37 |   if self.cache < self.norms[j]:
 38 |     self.cache = sum(self.norms)
 39 |   let lamScaled = 2.0 * lam / (1.0+2*lam) * (self.cache - self.norms[j])
 40 |   if norm > lamScaled: 
 41 |     pj *= 1.0 - lamScaled / norm
 42 |   else: 
 43 |     pj[.. ^1] = 0.0
 44 | 
 45 | 
 46 | # for pgd/psgd/minibatch-psgd
 47 | # P.shape: [nFeatures, nComponents]
 48 | proc prox*(self: SquaredL21, P: var Matrix, 
 49 |            lam: float64, degree: int) {.inline.} =
 50 |   if not self.transpose:
 51 |     norm(P, self.norms, 2, 1)
 52 |     for i in 0..<P.shape[0]:
 53 |       if self.norms[i] != 0:
 54 |         P[i] /= self.norms[i]
 55 |     proxSquaredL12(self.norms, lam, self.candidates)
 56 |     for i in 0..<P.shape[0]:
 57 |       P[i] *= self.norms[i]
 58 |   else:
 59 |     norm(P, self.norms, 2, 0)
 60 |     for j in 0..<P.shape[0]:
 61 |       if self.norms[j] != 0.0:
 62 |         for s in 0..<P.shape[1]:
 63 |           P[j, s] /= self.norms[j]
 64 |     proxSquaredL12(self.norms, lam, self.candidates)
 65 |     P *= self.norms
 66 | 
 67 | 
 68 | proc initBCD*(self: SquaredL21, degree, nFeatures, nComponents: int) =
 69 |   if degree != 2:
 70 |     raise newException(ValueError, "SquaredL21 supports only degree=2.")
 71 |   if self.transpose:
 72 |     raise newException(ValueError, "transpose=true is not supported for BCD.")
 73 |   self.norms = zeros([nFeatures])
 74 | 
 75 | 
 76 | proc initSGD*(self: SquaredL21, degree, nFeatures, nComponents: int) =
 77 |   ## Initializes SquaredL21 object for PSGD solver.
 78 |   if degree != 2:
 79 |     raise newException(ValueError, "SquaredL12 supports only degree=2.")
 80 | 
 81 |   if self.transpose:
 82 |     self.candidates = newSeqWith(nComponents, 0)
 83 |     self.norms = zeros([nComponents])
 84 |   else:
 85 |     self.candidates = newSeqWith(nFeatures, 0)
 86 |     self.norms = zeros([nComponents])
 87 | 
 88 | # for pbcd
 89 | # P.shape: (nFeatures, nComponents)
 90 | proc computeCacheBCD*(self: SquaredL21, P: Matrix, degree: int) =
 91 |   for j in 0..<P.shape[0]:
 92 |     self.norms[j] = norm(P[j], 2)
 93 |   self.cache = sum(self.norms)
 94 |   self.value = self.cache^2
 95 | 
 96 | 
 97 | proc updateCacheBCD*(self: SquaredL21, P: Matrix, degree, j: int) =
 98 |   self.cache -= self.norms[j]
 99 |   self.norms[j] = norm(P[j], 2)
100 |   self.cache += self.norms[j]
101 |   self.value = self.cache^2
102 | 
103 | # for psgd
104 | proc lazyUpdate*(self: SquaredL21, P: var Matrix, beta, gamma: float64,
105 |                  degree: int, X: RowDataset, i: int) {.inline.} = discard
106 |   
107 | 
108 | proc lazyUpdateFinal*(self:SquaredL21, P: var Tensor, 
109 |                       beta, gamma: float64, degree: int) {.inline.} = discard
110 |   
111 | 
112 | proc updateCacheSGD*(self: SquaredL21, eta, beta, gamma: float64,
113 |                      degree: int, X: RowDataset, i: int) {.inline.} = discard
114 | 
115 | 
116 | proc resetCacheSGD*(self: SquaredL21, P: var Tensor, 
117 |                     gamma: float64, degree: int) {.inline.} = discard
118 | 
119 | 
120 | 
121 | proc step*(self: SquaredL21, P: var Matrix, dA: Matrix, 
122 |            dL, beta, gamma, eta_P_scaled: float64, degree: int,
123 |            indices: iterator) {.inline.} =
124 |   # Updates all parameters (i.e., sparsity is not leveraged)
125 |   for j in 0..<P.shape[0]:
126 |     for s in 0..<P.shape[1]:
127 |       P[j, s] -= eta_P_scaled * (dL*dA[j, s] + beta*P[j, s])
128 | 
129 |   self.prox(P, eta_P_scaled*gamma, degree)
130 | 


--------------------------------------------------------------------------------
/src/nimfm/regularizer/utils.nim:
--------------------------------------------------------------------------------
 1 | import math, ../tensor/tensor, sequtils, random
 2 | 
 3 | 
 4 | proc softthreshold*(x, alpha: float64): float64 {.inline.} =
 5 |   result = float64(sgn(x)) * max(abs(x) - alpha, 0.0)
 6 | 
 7 | 
 8 | proc projL1ball*(v: var Vector, z: float64) {.inline.} =
 9 |   let n = len(v)
10 |   var
11 |     rho, nG, nL, offset, nCandidates, pivotIdx: int
12 |     cumsum, cumsumCache, pivot, theta: float64
13 |     candidates = toSeq(0..<2*n)
14 |   nCandidates = n
15 |   while nCandidates != 0:
16 |     pivot_idx = candidates[offset+rand(nCandidates-1)]
17 |     pivot = v[pivot_idx]
18 |     nG = 0
19 |     nL = 0
20 |     cumsum_cache = 0
21 |     for i in 0..<nCandidates:
22 |       let j = candidates[offset+i]
23 |       if j != pivotIdx:
24 |         if v[j] >= pivot:
25 |           cumsum_cache += v[j]
26 |           candidates[nG] = j
27 |           nG += 1
28 |         else:
29 |           candidates[n+nL] = j
30 |           nL += 1
31 |     # discard greaters from candidates
32 |     if ((cumsum + cumsumCache) - float(rho+nG)*pivot) < z:
33 |       nCandidates = nL
34 |       offset = n
35 |       cumsum += cumsum_cache + pivot
36 |       candidates[nG] = pivot_idx
37 |       nG += 1
38 |       rho += nG
39 |     else: # discard lessers from candidates
40 |       nCandidates = nG
41 |       offset = 0
42 | 
43 |   theta = (cumsum - z) / float(rho)
44 |   for i in 0..<n:
45 |     v[i] = v[i] - theta
46 |     if v[i] < 0:
47 |         v[i] = 0
48 | 


--------------------------------------------------------------------------------
/src/nimfm/tensor.nim:
--------------------------------------------------------------------------------
1 | import
2 |   ./tensor/tensor, ./tensor/sparse, ./tensor/sparse_stream
3 | export tensor, sparse, sparse_stream


--------------------------------------------------------------------------------
/src/nimfm/utils.nim:
--------------------------------------------------------------------------------
 1 | import tables, algorithm, sequtils, math
 2 | 
 3 | type
 4 |   LabelEncoderObj[T] = object
 5 |     classes*: seq[T]
 6 |     table*: TableRef[T, int]
 7 |     invTable*: TableRef[int, T]
 8 | 
 9 |   LabelEncoder*[T] = ref LabelEncoderObj[T]
10 | 
11 | 
12 | proc argsort*[T](a: T, order=SortOrder.Ascending): seq[int] =
13 |   result = toSeq(0..<a.len)
14 |   case order
15 |   of SortOrder.Ascending: 
16 |     sort(result,  proc(i, j: int ): int = cmp(a[i], a[j]))
17 |   of SortOrder.Descending:
18 |     sort(result,  proc(i, j: int ): int = -cmp(a[i], a[j]))
19 | 
20 | 
21 | proc argmin*[T](a: seq[T]): int =
22 |   result = 0
23 |   for i in 1..<len(a):
24 |     if a[i] < a[result]: result = i
25 | 
26 | 
27 | proc argmax*[T](a: seq[T]): int =
28 |   result = 0
29 |   for i in 1..<len(a):
30 |     if a[i] > a[result]: result = i
31 | 
32 | 
33 | proc expit*(x: float64): float64  =  exp(min(0.0, x)) / (1.0 + exp(-abs(x)))
34 | 
35 | 
36 | proc expit*(a: openarray[float64]): seq[float64] = a.map(expit)    
37 | 
38 | 
39 | proc newLabelEncoder*[T](): LabelEncoder[T] =
40 |   result = new(LabelEncoder[T])
41 |   result.table = newTable[T, int]()
42 |   result.invTable = newTable[int, T]()
43 |   result.classes = newSeq[T]()
44 | 
45 | 
46 | proc fit*[T](le: LabelEncoder[T], y: openArray[T]) =
47 |   # initialization
48 |   var nClasses: int = 0
49 |   clear(le.table)
50 |   clear(le.invTable)
51 |   le.classes.setLen(0)
52 | 
53 |   for val in y:
54 |     if not le.table.hasKey(val):
55 |       le.table[val] = nClasses
56 |       le.classes.add(val)
57 |       inc(nClasses)
58 |   le.classes.sort()
59 |   # sorted transformation
60 |   for i, val in le.classes:
61 |     le.table[val] = i
62 |     le.invTable[i] = val
63 | 
64 | 
65 | proc transform*[T](le: LabelEncoder[T], y: openArray[T], yEnc: var seq[int]) =
66 |   yEnc = newSeq[int](len(y))
67 |   for i, val in y:
68 |     if not le.table.haskey(val):
69 |       raise newException(KeyError, "Key " & $val & " is unknown.")
70 |     yEnc[i] = le.table[val]
71 | 
72 | 
73 | proc transformed*[T](le: LabelEncoder[T], y: openArray[T]): seq[int] =
74 |   transform(le, y, result)
75 | 
76 | 
77 | proc inverseTransform*[T](le: LabelEncoder[T], y: openArray[int],
78 |                           yEnc: var seq[T]) =
79 |   yEnc.setLen(len(y))
80 |   for i, val in y:
81 |     if not le.invTable.haskey(val):
82 |       raise newException(KeyError, "Label " & $val & " is unknown.")
83 |     yEnc[i] = le.invTable[val]
84 | 
85 | 
86 | proc inverseTransformed*[T](le: LabelEncoder[T], y: openArray[int]): seq[T] =
87 |   inverseTransform(le, y, result)
88 | 


--------------------------------------------------------------------------------
/tests/comb.nim:
--------------------------------------------------------------------------------
 1 | proc comb*(n, m: int, k=0): seq[seq[int]] =
 2 |   result = @[]
 3 |   if m == 1:
 4 |     for i in k..<n:
 5 |       result.add(@[i])
 6 |   else:
 7 |     for i in k..<(n-m+1):
 8 |       for val in comb(n, m-1, i+1):
 9 |         result.add(@[i] & val)
10 | 
11 | 
12 | proc combNotj*(n, m, j: int, k=0): seq[seq[int]] =
13 |   result = @[]
14 |   if m == 1:
15 |     for i in k..<n:
16 |       if i != j:
17 |         result.add(@[i])
18 |   else:
19 |     for i in k..<(n-m+1):
20 |       if i != j:
21 |         for val in combNotj(n, m-1, j, i+1):
22 |           result.add(@[i] & val)
23 | 
24 | 


--------------------------------------------------------------------------------
/tests/config.nims:
--------------------------------------------------------------------------------
1 | switch("path", "$projectDir/../src")
2 | 


--------------------------------------------------------------------------------
/tests/kernels_slow.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/tensor/tensor
 2 | import comb
 3 | import math
 4 | 
 5 | 
 6 | proc anovaSlow*(x, p: Vector, degree: int): float64 = 
 7 |   result = 0.0
 8 |   let d = len(x)
 9 |   if d != len(p):
10 |     raise newException(ValueError, "len(x) != len(p).")
11 |   for indices in comb(d, degree):
12 |     var prod = 1.0
13 |     for j in indices:
14 |       prod *= p[j]*x[j]
15 |     result += prod
16 | 
17 | 
18 | proc anovaSlow*(X: Matrix,  P: Matrix, 
19 |                 i, degree, s, d, m:int): float64 = 
20 |   result = 0.0
21 |   for indices in comb(d+m, degree):
22 |     var prod = 1.0
23 |     for j in indices:
24 |       prod *= P[s, j]
25 |       if j < d:
26 |         prod *= X[i, j]
27 |     result += prod
28 | 
29 | 
30 | proc polySlow*(X, P: Matrix, i, degree, s, d, m: int): float64 =
31 |   result = 0.0
32 |   for j in 0..<d:
33 |     result += X[i, j] * P[s, j]
34 |   for j in 0..<m:
35 |     result += X[i, d+j] * P[s, d+j]
36 |   result = result^degree
37 | 


--------------------------------------------------------------------------------
/tests/model/cfm_slow.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/tensor/tensor, nimfm/metrics
 2 | import nimfm/model/fm_base, nimfm/model/convex_factorization_machine
 3 | import sugar, sequtils, math
 4 | 
 5 | 
 6 | type
 7 |   CFMSlow* = ref ConvexFactorizationMachineObj
 8 | 
 9 | 
10 | proc newCFMSlow*(
11 |   task: TaskKind, maxComponents = 30,
12 |   fitIntercept = true, fitLinear = true,
13 |   ignoreDiag=true, warmStart = false): CFMSlow =
14 |   new(result)
15 |   result.task = task
16 |   result.degree = 2
17 |   if maxComponents < 1:
18 |     raise newException(ValueError, "maxComponents < 1.")
19 |   result.maxComponents = maxComponents
20 |   result.fitIntercept = fitIntercept
21 |   result.fitLinear = fitLinear
22 |   result.ignoreDiag = ignoreDiag
23 |   result.warmStart = warmStart
24 |   result.degree = 2
25 |   result.isInitialized = false
26 |   result.lams = zeros([maxComponents])
27 | 
28 | 
29 | proc init*(self: CFMSlow, X: Matrix, force=false) =
30 |   ## Initializes the factorization machine.
31 |   ## If force=false, fm is already initialized, and warmStart=true,
32 |   ## fm will not be initialized.
33 |   if force or not (self.warmStart and self.isInitialized):
34 |     let nFeatures: int = X.shape[1]
35 |     self.w = zeros([nFeatures])
36 |     self.P = zeros([self.maxComponents, nFeatures])
37 |     self.intercept = 0.0
38 |   self.isInitialized = true
39 | 
40 | 
41 | proc decisionFunction*(self: CFMSlow, X: Matrix): seq[float64] =
42 |   self.checkInitialized()
43 |   let nSamples: int = X.shape[0]
44 |   let nFeatures = X.shape[1]
45 |   result = newSeqWith(nSamples, self.intercept)
46 | 
47 |   for i in 0..<nSamples:
48 |     for j in 0..<nFeatures:
49 |       result[i] += self.w[j] * X[i, j]
50 |   
51 |   if nFeatures != self.P.shape[1]:
52 |     raise newException(ValueError, "Invalid nFeatures.")
53 | 
54 |   for i in 0..<nSamples:
55 |     for s in 0..<len(self.lams):
56 |       var kernel = 0.0
57 |       if self.ignoreDiag:
58 |         for j1 in 0..<nFeatures:
59 |           for j2 in j1+1..<nFeatures:
60 |             kernel += X[i, j1] * X[i, j2] * self.P[s, j1] * self.P[s, j2]
61 |       else:
62 |         for j in 0..<nFeatures:
63 |           kernel += self.P[s, j] * X[i, j]
64 |         kernel = kernel * kernel
65 |       result[i] += self.lams[s] * kernel
66 | 
67 | 
68 | proc predict*(self: CFMSlow, X: Matrix): seq[int] =
69 |   result = decisionFunction(self, X).map(x=>sgn(x))
70 | 
71 | 
72 | proc checkTarget*(self: CFMSlow, y: seq[SomeNumber]): seq[float64] =
73 |   case self.task
74 |   of classification:
75 |     result = y.map(x => float(sgn(x)))
76 |   of regression:
77 |     result = y.map(x => float(x))
78 | 
79 | 
80 | proc score*(self: CFMSlow, X: Matrix, y: seq[float64]): float64 =
81 |   let yPred = self.decisionFunction(X)
82 |   case self.task
83 |   of regression:
84 |     result = rmse(y, yPred)
85 |   of classification:
86 |     result = accuracy(y.map(x=>sgn(x)), yPred.map(x=>sgn(x)))


--------------------------------------------------------------------------------
/tests/model/ffm_slow.nim:
--------------------------------------------------------------------------------
  1 | import nimfm/tensor/tensor, nimfm/metrics
  2 | import nimfm/model/fm_base
  3 | from nimfm/model/field_aware_factorization_machine import 
  4 |   FieldAwareFactorizationMachineObj
  5 | import sugar, random, sequtils, math
  6 | 
  7 | 
  8 | type
  9 |   FFMSlow* = ref FieldAwareFactorizationMachineObj
 10 | 
 11 |   NotFittedError = object of Exception
 12 | 
 13 | 
 14 | proc checkInitialized*(self: FFMSlow) =
 15 |   if not self.isInitialized:
 16 |     raise newException(NotFittedError, "Factorization machines is not fitted.")
 17 | 
 18 | 
 19 | proc nAugments*(self: FFMSlow): int = 0
 20 | 
 21 | 
 22 | proc newFFMSlow*(task: TaskKind, n_components = 10, fitIntercept = true, 
 23 |                  fitLinear = true, warmStart = false, randomState = 1,
 24 |                  scale = 0.01): FFMSlow =
 25 |   new(result)
 26 |   result.task = task
 27 |   if n_components < 1:
 28 |     raise newException(ValueError, "nComponents < 1.")
 29 |   result.n_components = n_components
 30 |   result.fitIntercept = fitIntercept
 31 |   result.fitLinear = fitLinear
 32 |   result.warmStart = warmStart
 33 |   result.randomState = randomState
 34 |   result.scale = scale
 35 |   result.isInitialized = false
 36 | 
 37 | 
 38 | proc decisionFunction*(self: FFMSlow, X: Matrix, fields: seq[int],
 39 |                        i: int): float64 =
 40 |   let nFeatures = X.shape[1]
 41 |   let nFields = max(fields) + 1
 42 |   let nAugments = self.nAugments
 43 | 
 44 |   result = self.intercept
 45 |   for j in 0..<nFeatures:
 46 |     result += self.w[j] * X[i, j]
 47 |   
 48 |   for j1 in 0..<(nFeatures+nAugments):
 49 |     let f1 = if j1 < nFeatures: fields[j1] else: nFields
 50 |     let val1 = if j1 < nFeatures: X[i, j1] else: 1.0
 51 |     for j2 in (j1+1)..<(nFeatures+nAugments):
 52 |       let f2 = if j2 < nFeatures: fields[j2] else: nFields
 53 |       let val2 = if j2 < nFeatures: X[i, j2] else: 1.0
 54 |       let interaction = val1 * val2
 55 |       for s in 0..<self.nComponents:
 56 |         result += interaction * self.P[f2, j1, s] * self.P[f1, j2, s]
 57 |     
 58 | 
 59 | proc decisionFunction*(self: FFMSlow, X: Matrix, fields: seq[int]): seq[float64] =
 60 |   self.checkInitialized()
 61 |   let nSamples: int = X.shape[0]
 62 |   let nFeatures = X.shape[1]
 63 |   let nAugments = self.nAugments
 64 |   result = newSeqWith(nSamples, 0.0)
 65 |   if (nAugments + nFeatures != self.P.shape[1]):
 66 |     raise newException(ValueError, "Invalid nFeatures.")
 67 |   for i in 0..<nSamples:
 68 |     result[i] = decisionFunction(self, X, fields, i)
 69 |   
 70 | 
 71 | proc predict*(self: FFMSlow, X: Matrix, fields: seq[int]): seq[int] =
 72 |   result = decisionFunction(self, X, fields).map(x=>sgn(x))
 73 | 
 74 | 
 75 | proc init*(self: FFMSlow, X: Matrix, fields: seq[int]) =
 76 |   if not (self.warmStart and self.isInitialized):
 77 |     let nFeatures: int = X.shape[1]
 78 |     randomize(self.randomState)
 79 |     let nFields = max(fields) + 1
 80 |     let nAugments = self.nAugments
 81 |     self.w = zeros([nFeatures])
 82 |     if nAugments > 0:
 83 |       self.P = randomNormal([nFields + 1, nFeatures+nAugments, self.nComponents],
 84 |                              scale = self.scale)
 85 |     else:
 86 |       self.P = randomNormal([nFields, nFeatures+nAugments, self.nComponents],
 87 |                              scale = self.scale)
 88 | 
 89 |     self.intercept = 0.0
 90 |   self.isInitialized = true
 91 | 
 92 | 
 93 | proc checkTarget*(self: FFMSlow, y: seq[SomeNumber]): seq[float64] =
 94 |   case self.task
 95 |   of classification:
 96 |     result = y.map(x => float(sgn(x)))
 97 |   of regression:
 98 |     result = y.map(x => float(x))
 99 | 
100 | 
101 | proc score*(self: FFMSlow, X: Matrix, fields: seq[int], y: seq[float64]): float64 =
102 |   let yPred = self.decisionFunction(X, fields)
103 |   case self.task
104 |   of regression:
105 |     result = rmse(y, yPred)
106 |   of classification:
107 |     result = accuracy(y.map(x=>sgn(x)), yPred.map(x=>sgn(x)))
108 | 
109 | 
110 | proc computeGrad*(self: FFMSlow, X: Matrix, fields: seq[int], i: int,
111 |                   dL: float64, grad: var Tensor) =
112 |   let
113 |     nFeatures = X.shape[1]
114 |     nAugments = self.nAugments
115 |     nComponents = self.nComponents
116 |     nFields = max(fields)+1
117 | 
118 |   for j1 in 0..<(nFeatures+nAugments):
119 |     let f1 = if j1 < nFeatures: fields[j1] else: nFields
120 |     let val1 = if j1 < nFeatures: X[i, j1] else: 1.0
121 |     for j2 in (j1+1)..<(nFeatures+nAugments):
122 |       let f2 = if j2 < nFeatures: fields[j2] else: nFields
123 |       let val2 = if j2 < nFeatures: X[i, j2] else: 1.0
124 |       let interaction = val1 * val2
125 |       for s in 0..<nComponents:
126 |         grad[f2, j1, s] += dL * self.P[f1, j2, s] * interaction
127 |         grad[f1, j2, s] += dL * self.P[f2, j1, s] * interaction
128 | 


--------------------------------------------------------------------------------
/tests/model/fm_slow.nim:
--------------------------------------------------------------------------------
  1 | import nimfm/tensor/tensor, nimfm/metrics
  2 | import nimfm/model/fm_base
  3 | from nimfm/model/factorization_machine 
  4 |   import FactorizationMachineObj, FitLowerKind, nAugments, nOrders
  5 | import sugar, random, sequtils, math
  6 | import ../comb
  7 | 
  8 | export nAugments
  9 | 
 10 | 
 11 | type
 12 |   FMSlow* = ref FactorizationMachineObj
 13 | 
 14 |   NotFittedError = object of Exception
 15 | 
 16 | 
 17 | proc checkInitialized*(self: FMSlow) =
 18 |   if not self.isInitialized:
 19 |     raise newException(NotFittedError, "Factorization machines is not fitted.")
 20 | 
 21 | 
 22 | proc newFMSlow*(task: TaskKind, degree = 2, n_components = 30,
 23 |                 fitLower = explicit, fitIntercept = true, fitLinear = true,
 24 |                 warmStart = false, randomState = 1, scale = 0.01): FMSlow =
 25 |   new(result)
 26 |   result.task = task
 27 |   if degree < 1:
 28 |     raise newException(ValueError, "degree < 1.")
 29 |   result.degree = degree
 30 |   if n_components < 1:
 31 |     raise newException(ValueError, "nComponents < 1.")
 32 |   result.n_components = n_components
 33 |   result.fitLower = fitLower
 34 |   result.fitIntercept = fitIntercept
 35 |   result.fitLinear = fitLinear
 36 |   result.warmStart = warmStart
 37 |   result.randomState = randomState
 38 |   result.scale = scale
 39 |   result.isInitialized = false
 40 | 
 41 | 
 42 | proc decisionFunction*(self: FMSlow, X: Matrix, i: int): float64 =
 43 |   let
 44 |     nFeatures = X.shape[1]
 45 |     nComponents = self.nComponents
 46 |     nAugments = self.nAugments
 47 |   result = self.intercept
 48 |   for j in 0..<nFeatures:
 49 |     result += self.w[j] * X[i, j]
 50 |   for order in 0..<self.nOrders:
 51 |     for s in 0..<nComponents:
 52 |       var anova = 0.0
 53 |       for indices in comb(nFeatures+nAugments, self.degree-order):
 54 |         var prod = 1.0
 55 |         for j in indices:
 56 |           prod *= self.P[order, s, j]
 57 |           if j < nFeatures:
 58 |             prod *= X[i, j]
 59 |         anova += prod
 60 |       result += anova
 61 | 
 62 | 
 63 | proc decisionFunction*(self: FMSlow, X: Matrix): seq[float64] =
 64 |   self.checkInitialized()
 65 |   let nSamples: int = X.shape[0]
 66 |   let nFeatures = X.shape[1]
 67 |   result = newSeqWith(nSamples, 0.0)
 68 |   let nAugments = self.nAugments
 69 |   if (nAugments + nFeatures != self.P.shape[2]):
 70 |     raise newException(ValueError, "Invalid nFeatures.")
 71 |   
 72 |   for i in 0..<nSamples:
 73 |     result[i] = self.decisionFunction(X, i)
 74 |       
 75 |    
 76 | proc predict*(self: FMSlow, X: Matrix): seq[int] =
 77 |   result = decisionFunction(self, X).map(x=>sgn(x))
 78 | 
 79 | 
 80 | proc init*(self: FMSlow, X: Matrix) =
 81 |   if not (self.warmStart and self.isInitialized):
 82 |     let nFeatures: int = X.shape[1]
 83 |     randomize(self.randomState)
 84 | 
 85 |     self.w = zeros([nFeatures])
 86 |     let nOrders = self.nOrders
 87 |     let nAugments = self.nAugments
 88 |     self.P = randomNormal([nOrders, self.nComponents, nFeatures+nAugments],
 89 |                            scale = self.scale)
 90 |     self.intercept = 0.0
 91 |   self.isInitialized = true
 92 | 
 93 | 
 94 | proc checkTarget*(self: FMSlow, y: seq[SomeNumber]): seq[float64] =
 95 |   case self.task
 96 |   of classification:
 97 |     result = y.map(x => float(sgn(x)))
 98 |   of regression:
 99 |     result = y.map(x => float(x))
100 | 
101 | 
102 | proc score*(self: FMSlow, X: Matrix, y: seq[float64]): float64 =
103 |   let yPred = self.decisionFunction(X)
104 |   case self.task
105 |   of regression:
106 |     result = rmse(y, yPred)
107 |   of classification:
108 |     result = accuracy(y.map(x=>sgn(x)), yPred.map(x=>sgn(x)))
109 | 
110 | 
111 | proc computeGrad*(self: FMSlow, X: Matrix, i: int, dL: float64,
112 |                   grad: var Tensor) =
113 |   let
114 |     nFeatures = X.shape[1]
115 |     nComponents = self.P.shape[1]
116 |     nAugments = self.nAugments
117 | 
118 |   for order in 0..<self.P.shape[0]:
119 |     for s in 0..<nComponents:
120 |       for j in 0..<(nFeatures+nAugments):
121 |         var tmp = 0.0
122 |         
123 |         for indices in combNotj(nFeatures+nAugments, self.degree-order-1, j):
124 |           var prod = 1.0
125 |           for j2 in indices:
126 |             prod *= self.P[order, s, j2]
127 |             if j2 < nFeatures:
128 |               prod *= X[i, j2]
129 |           tmp += prod
130 |         
131 |         if j < nFeatures:
132 |           tmp *= X[i, j]
133 | 
134 |         grad[order, s, j] += dL * tmp
135 | 


--------------------------------------------------------------------------------
/tests/optimizer/adagrad_ffm_slow.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/tensor/tensor, nimfm/loss
 2 | import sequtils, random
 3 | import ../model/ffm_slow
 4 | from adagrad_slow import AdaGradSlow, newAdaGradSlow, init, update
 5 | export AdaGradSlow, newAdaGradSlow
 6 | 
 7 | 
 8 | proc fit*[L](self: AdaGradSlow[L], X: Matrix, fields: seq[int],
 9 |              y: seq[float64], ffm: var FFMSlow) =
10 |   ffm.init(X, fields)
11 |   let y = ffm.checkTarget(y)
12 |   let
13 |     nSamples = X.shape[0]
14 |     fitLinear = ffm.fitLinear
15 |     fitIntercept = ffm.fitIntercept
16 |     loss = self.loss
17 |   var
18 |     indices = toSeq(0..<nSamples)
19 |     grad: Tensor = zeros(ffm.P.shape)
20 | 
21 |   init(self, ffm.P, ffm.w, ffm.warmStart, fitLinear, fitIntercept)
22 | 
23 |   for epoch in 0..<self.maxIter:
24 |     if self.shuffle: shuffle(indices)
25 |     for i in indices:
26 |       # compute prediction
27 |       let yPred = decisionFunction(ffm, X, fields, i)
28 | 
29 |       # compute gradient
30 |       let dL = loss.dloss(y[i], yPred)
31 |       grad <- 0.0
32 |       computeGrad(ffm, X, fields, i, dL, grad)
33 | 
34 |       update(self, X, i, ffm.P, ffm.w, ffm.intercept, grad, dL,
35 |              fitLinear, fitIntercept)
36 |       
37 |       inc(self.it)


--------------------------------------------------------------------------------
/tests/optimizer/adagrad_slow.nim:
--------------------------------------------------------------------------------
  1 | import nimfm/tensor/tensor, nimfm/loss
  2 | import nimfm/optimizer/optimizer_base
  3 | import sequtils, math, random
  4 | import ../model/fm_slow
  5 | 
  6 | type
  7 |   AdaGradSlow*[L] = ref object of BaseCSROptimizer
  8 |     loss*: L
  9 |     eta0: float64
 10 |     eps: float64
 11 |     shuffle*: bool
 12 |     it*: int
 13 |     g_sum_P*: Tensor
 14 |     g_norms_P*: Tensor
 15 |     g_sum_w*: Vector
 16 |     g_norms_w*: Vector
 17 |     g_sum_intercept*: float64
 18 |     g_norms_intercept*: float64
 19 |     
 20 | 
 21 | proc newAdaGradSlow*[L](eta0 = 0.1,  maxIter = 100, alpha0=1e-6, alpha=1e-3,
 22 |                         beta=1e-3, loss: L = newSquared(), eps=1e-10,
 23 |                         verbose = 1, tol = 1e-3, shuffle = true): AdaGradSlow[L] =
 24 |   result = AdaGradSlow[L](eta0: eta0, alpha0: alpha0, alpha: alpha, beta: beta,
 25 |                           loss: loss, eps: eps, maxIter: maxIter, tol: tol, it: 1,
 26 |                           verbose: verbose, shuffle: shuffle)
 27 | 
 28 | 
 29 | proc init*[L](self: AdaGradSlow[L], P: Tensor, w: Vector,
 30 |               warmStart, fitLinear, fitIntercept: bool) =
 31 |   if not warmstart:
 32 |     self.it = 1
 33 |   if self.it == 1:
 34 |     self.g_sum_P = zeros(P.shape)
 35 |     self.g_norms_P = zeros(P.shape) + self.eps
 36 |     if fitLinear:
 37 |       self.g_sum_w = zeros(w.shape)
 38 |       self.g_norms_w = zeros(w.shape) + self.eps
 39 |     if fitIntercept:
 40 |       self.g_sum_intercept = 0.0
 41 |       self.g_norms_intercept = self.eps
 42 |   
 43 | 
 44 | proc update*[L](self: AdaGradSlow[L], X: Matrix, i: int, P: var Tensor, w: var Vector,
 45 |                 intercept: var float64, grad: Tensor, dL: float64,
 46 |                 fitLinear, fitIntercept: bool) =
 47 |   let it = float(self.it)
 48 | 
 49 |   if fitIntercept:
 50 |     self.g_sum_intercept += dL
 51 |     self.g_norms_intercept += dL^2
 52 |     let denom = sqrt(self.g_norms_intercept) + self.eta0 * it * self.alpha0
 53 |     intercept = - self.eta0 * self.g_sum_intercept / denom
 54 | 
 55 |   if fitLinear:
 56 |     let denom = self.eta0 * it * self.alpha 
 57 |     for j in 0..<X.shape[1]:
 58 |       self.g_sum_w[j] += dL * X[i, j]
 59 |       self.g_norms_w[j] += (dL * X[i, j])^2
 60 |       w[j] = - self.eta0 * self.g_sum_w[j]
 61 |       w[j] /= (denom + sqrt(self.g_norms_w[j]))
 62 |     
 63 |   for order in 0..<P.shape[0]:
 64 |     let denom = self.eta0 * it * self.beta
 65 |     for s in 0..<P.shape[1]:
 66 |       for j in 0..<P.shape[2]:
 67 |         self.g_sum_P[order, s, j] += grad[order, s, j]
 68 |         self.g_norms_P[order, s, j] += grad[order, s, j]^2
 69 |         P[order, s, j] = -self.eta0 * self.g_sum_P[order, s, j]
 70 |         P[order, s, j] /= (denom + sqrt(self.g_norms_P[order, s, j]))
 71 | 
 72 | 
 73 | proc fit*[L](self: AdaGradSlow[L], X: Matrix, y: seq[float64], fm: var FMSlow) =
 74 |   fm.init(X)
 75 |   let y = fm.checkTarget(y)
 76 |   let
 77 |     nSamples = X.shape[0]
 78 |     fitLinear = fm.fitLinear
 79 |     fitIntercept = fm.fitIntercept
 80 |     loss = self.loss
 81 |   var
 82 |     indices = toSeq(0..<nSamples)
 83 |     grad: Tensor = zeros(fm.P.shape)
 84 | 
 85 |   init(self, fm.P, fm.w, fm.warmStart, fm.fitLinear, fm.fitIntercept)
 86 | 
 87 |   for epoch in 0..<self.maxIter:
 88 |     if self.shuffle: shuffle(indices)
 89 |     for i in indices:
 90 |       # compute prediction
 91 |       let yPred = decisionFunction(fm, X, i)
 92 |       
 93 |       # compute gradient
 94 |       let dL = loss.dloss(y[i], yPred)
 95 |       grad <- 0.0
 96 |       computeGrad(fm, X, i, dL, grad)
 97 | 
 98 |       # update parameters
 99 |       update(self, X, i, fm.P, fm.w, fm.intercept, grad, dL,
100 |              fitLinear, fitIntercept)
101 |       
102 |       inc(self.it)


--------------------------------------------------------------------------------
/tests/optimizer/cd_slow.nim:
--------------------------------------------------------------------------------
  1 | import nimfm/tensor/tensor, nimfm/optimizer/optimizer_base
  2 | import sequtils, math
  3 | import ../model/fm_slow, ../kernels_slow, fit_linear_slow, ../comb
  4 | from nimfm/loss import newSquared
  5 | 
  6 | type
  7 |   CDSlow*[L] = ref object of BaseCSCOptimizer
  8 |     ## Coordinate descent solver for test.
  9 |     loss: L
 10 | 
 11 | proc newCDSlow*[L](maxIter = 100, alpha0 = 1e-6, alpha = 1e-3, beta = 1e-3,
 12 |                    loss: L =newSquared(), verbose = 1,
 13 |                    tol = 1e-3): CDSlow[L] =
 14 |   result = CDSlow[L](maxIter: maxIter, alpha0: alpha0, alpha: alpha,
 15 |                      beta: beta, loss: loss, tol: tol, verbose: verbose)
 16 | 
 17 | 
 18 | proc predict*(P: Tensor, w: Vector, intercept: float64, X: Matrix,
 19 |               yPred: var seq[float64], degree: int) =
 20 | 
 21 |   let
 22 |     nSamples = X.shape[0]
 23 |     nFeatures = X.shape[1]
 24 |     nAugments = P.shape[2] - nFeatures
 25 |     nOrders = P.shape[0]
 26 |     nComponents = P.shape[1]
 27 |   for i in 0..<nSamples:
 28 |     yPred[i] = intercept
 29 | 
 30 |   for j in 0..<nFeatures:
 31 |     for i in 0..<nSamples:
 32 |       yPred[i] += w[j] * X[i, j]
 33 | 
 34 |   for order in 0..<nOrders:
 35 |     for s in 0..<nComponents:
 36 |       for i in 0..<nSamples:
 37 |         let anova = anovaSlow(X, P[order], i, degree-order,
 38 |                               s, nFeatures, nAugments)
 39 |         yPred[i] += anova
 40 | 
 41 | 
 42 | # compute ient naively
 43 | # dA/dpj =  anova_without_j  * xj
 44 | proc computeDerivatives*(P: Tensor, X: Matrix, dA: var Vector,
 45 |                          degree, order, s, j, nAugments: int) =
 46 |   let
 47 |     nSamples = X.shape[0]
 48 |     nFeatures = X.shape[1]
 49 |  
 50 |   for i in 0..<nSamples:
 51 |     dA[i] = 0
 52 |   for i in 0..<nSamples:
 53 |     for indices in combNotj(nFeatures+nAugments, degree-1, j):
 54 |       var prod = 1.0
 55 |       for j2 in indices:
 56 |         prod *= P[order, s, j2]
 57 |         if j2 < nFeatures:
 58 |           prod *= X[i, j2]
 59 |       dA[i] += prod
 60 |     if j < nFeatures:
 61 |       dA[i] *= X[i, j]
 62 | 
 63 | 
 64 | proc computeGrad*[L](P: Tensor, y, yPred: seq[float64], beta: float64,
 65 |                      order, s, j: int, loss: L, dA: var Vector): float64 =
 66 |   result = beta * P[order, s, j]
 67 |   let nSamples = dA.shape[0]
 68 |   for i in 0..<nSamples:
 69 |     result += loss.dloss(y[i], yPred[i]) * dA[i]
 70 | 
 71 | 
 72 | proc computeInvStepSize*[L](beta: float64, loss: L, dA: var Vector): float64 =
 73 |   let nSamples = dA.shape[0]
 74 |   result = 0.0
 75 |   for i in 0..<nSamples:
 76 |     result += dA[i]^2
 77 |   result = result*loss.mu + beta
 78 | 
 79 | 
 80 | proc epoch[L](X: Matrix, y: seq[float64], yPred: var seq[float64],
 81 |               P: var Tensor, beta: float64, degree, order, nAugments: int,
 82 |               loss: L, dA: var Vector, w: Vector,
 83 |               intercept: float64): float64 =
 84 |   result = 0.0
 85 |   let nFeatures = X.shape[1]
 86 |   let nComponents = P.shape[1]
 87 |   for s in 0..<nComponents:
 88 |     for j in 0..<nFeatures+nAugments:
 89 |       computeDerivatives(P, X, dA, degree-order, order, s, j, nAugments)
 90 |       let invStepSize = computeInvStepSize(beta, loss, dA)
 91 |       let update = computeGrad(P, y, yPred, beta, order, s, j, loss, dA) / invStepSize
 92 |       P[order, s, j] -= update
 93 |       result += abs(update)
 94 |       # naive synchronize
 95 |       predict(P, w, intercept, X, yPred, degree)
 96 | 
 97 | 
 98 | proc fit*[L](self: CDSlow[L], X: Matrix, y: seq[float64],
 99 |              fm: var FMSlow) =
100 |   fm.init(X)
101 |   let y = fm.checkTarget(y)
102 |   let
103 |     nSamples = X.shape[0]
104 |     nFeatures = X.shape[1]
105 |     nOrders = fm.P.shape[0]
106 |     degree = fm.degree
107 |     alpha0 = self.alpha0 * float(nSamples)
108 |     alpha = self.alpha * float(nSamples)
109 |     beta = self.beta * float(nSamples)
110 |     fitLinear = fm.fitLinear
111 |     fitIntercept = fm.fitIntercept
112 |     nAugments = fm.nAugments
113 |     loss = self.loss
114 | 
115 |   # caches
116 |   var
117 |     yPred = newSeqWith(nSamples, fm.intercept)
118 |     dA: Vector = zeros([nSamples])
119 |     colNormSq: Vector = zeros([nFeatures])
120 | 
121 |   if fitLinear:
122 |     for j in 0..<nFeatures:
123 |       for i in 0..<nSamples:
124 |         colNormSq[j] += X[i, j]^2
125 | 
126 |   predict(fm.P, fm.w, fm.intercept, X, yPred, degree)
127 |   for it in 0..<self.maxIter:
128 |     var viol = 0.0
129 |     if fitIntercept:
130 |       viol += fitInterceptCD(fm.intercept, y, yPred, nSamples, alpha0, loss)
131 |       predict(fm.P, fm.w, fm.intercept, X, yPred, degree)
132 | 
133 |     if fitLinear:
134 |       viol += fitLinearCD(fm.w, X, y, yPred, colNormSq, alpha, loss)
135 |       predict(fm.P, fm.w, fm.intercept, X, yPred, degree)
136 |     
137 |     for order in 0..<nOrders:
138 |       viol += epoch(X, y, yPred, fm.P, beta, degree, order, nAugments,
139 |                     loss, dA, fm.w, fm.intercept)
140 | 


--------------------------------------------------------------------------------
/tests/optimizer/fit_linear_slow.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/loss, nimfm/tensor/tensor
 2 | 
 3 | 
 4 | proc fitLinearCD*[L](w: var Vector, X: Matrix, y: seq[float64],
 5 |                      yPred: var seq[float64], colNormSq: Vector,
 6 |                      alpha: float64, loss: L): float64 =
 7 |   result = 0.0
 8 |   let nSamples = X.shape[0]
 9 |   let nFeatures = X.shape[1]
10 |   var
11 |     update = 0.0
12 |     invStepSize = 0.0
13 | 
14 |   for j in 0..<nFeatures:
15 |     update = alpha * w[j]
16 |     for i in 0..<nSamples:
17 |       update += loss.dloss(y[i], yPred[i]) * X[i, j]
18 |     invStepSize = loss.mu * colNormSq[j] + alpha
19 |     update /= invStepSize
20 |     result += abs(update)
21 |     w[j] -= update
22 |     for i in 0..<nSamples:
23 |       yPred[i] -= update * X[i, j]
24 | 
25 | 
26 | proc fitInterceptCD*[L](intercept: var float64, y: seq[float64],
27 |                         yPred: var seq[float64], nSamples: int,
28 |                         alpha0: float64, loss: L): float64 =
29 |   result = alpha0 * intercept
30 |   for i in 0..<nSamples:
31 |     result += loss.dloss(y[i], yPred[i])
32 |   result /= loss.mu * float(nSamples) + alpha0
33 |   intercept -= result
34 |   for i in 0..<nSamples:
35 |     yPred[i] -= result
36 |   result = abs(result)
37 | 
38 | 
39 | proc fitLinearSGD*(w: var Vector, X: Matrix, alpha, dL, eta: float64,
40 |                    i: int): float64 =
41 |   result = 0.0
42 |   for j in 0..<X.shape[1]:
43 |     let update = eta * (dL * X[i, j] + alpha*w[j])
44 |     w[j] -= update
45 |     result += abs(update)
46 | 


--------------------------------------------------------------------------------
/tests/optimizer/hazan_slow.nim:
--------------------------------------------------------------------------------
  1 | import nimfm/tensor/tensor, nimfm/optimizer/optimizer_base, nimfm/utils
  2 | from nimfm/model/fm_base import checkTarget, checkInitialized
  3 | import math, ../model/cfm_slow, ../kernels_slow
  4 | 
  5 | type
  6 |   HazanSlow* = ref object of BaseCSCOptimizer
  7 |     ## Hazan's algorithm for convex factorization machines with SquaredLoss.
  8 |     ## In this solver, the regularization for P is not 
  9 |     ## squared Frobenius norm but the trace norm for interaction weight
 10 |     ## matrix. 
 11 |     ## This solver solves not regularized problem but constrained problem.
 12 |     ## Regularization parameters alpha0, alpha, and beta
 13 |     ## in ConvexFactorizationMachine are ignored.
 14 |     eta: float64
 15 |     maxIterPower: int
 16 |     tolPower: float64
 17 |     optimal: bool
 18 |     nTol: int
 19 |     it: int
 20 | 
 21 | 
 22 | proc newHazanSlow*(
 23 |   maxIter = 100, eta=1000.0, verbose = 2, tol = 1e-7, nTol=10, 
 24 |   maxIterPower = 1000, tolPower = 1e-7, optimal = true): HazanSlow =
 25 |   result = HazanSlow(
 26 |     maxIter: maxIter, eta: eta, tol: tol, nTol: nTol, verbose: verbose,
 27 |     maxIterPower: maxIterPower, tolPower: tolPower, optimal: optimal)
 28 | 
 29 | 
 30 | proc predict(yPredQuad, yPredLinear: var Vector, K: var Matrix,
 31 |              X, P: Matrix, lams, w: Vector, intercept: float,
 32 |              ignoreDiag: bool) = 
 33 |   let nSamples = X.shape[0]
 34 |   let nFeatures = X.shape[1]
 35 |   mvmul(X, w, yPredLinear)
 36 |   yPredLinear += intercept
 37 |   yPredQuad[0..^1] = 0.0
 38 |   for s in 0..<len(K):
 39 |     for i in 0..<nSamples:
 40 |       if ignoreDiag: 
 41 |         K[s, i] = anovaSlow(X, P, i, 2, s, nFeatures, 0)
 42 |       else: 
 43 |         K[s, i] = polySlow(X, P, i, 2, s, nFeatures, 0)
 44 |     yPredQuad += lams[s] * K[s]
 45 | 
 46 | 
 47 | proc fit*(self: HazanSlow, X: Matrix, y: seq[float64],
 48 |           cfm: var CFMSlow) =
 49 |   ## Fits the factorization machine on X and y by coordinate descent.
 50 |   cfm.init(X)
 51 |   let y = checkTarget(cfm, y)
 52 |   let
 53 |     nSamples = X.shape[0]
 54 |     nFeatures = X.shape[1]
 55 |     fitLinear = cfm.fitLinear
 56 |     fitIntercept = cfm.fitIntercept
 57 |     ignoreDiag = cfm.ignoreDiag
 58 | 
 59 |   # caches
 60 |   var
 61 |     yPredLinear: Vector = zeros([nSamples])
 62 |     yPredQuad: Vector = zeros([nSamples])
 63 |     residual: Vector = zeros([nSamples])
 64 |     cacheK: Vector = zeros([nSamples])
 65 |     K: Matrix = zeros([len(cfm.lams), nSaMples])
 66 |     Z: Matrix = zeros(X.shape)
 67 |     ZTZ: Matrix
 68 |     w: Vector
 69 |     resZ: Vector
 70 |     colNormSq: Vector
 71 |     Preconditioner: Matrix
 72 |     lossOld = 0.0
 73 |     lossNew = 0.0
 74 |   
 75 |   for i in 0..<nSamples:
 76 |     Z[i] = X[i]
 77 |   if not cfm.warmStart:
 78 |     self.it = 0
 79 | 
 80 |   if fitLinear:
 81 |     w = cfm.w[0..<nFeatures]
 82 |     resZ = zeros([nFeatures])
 83 |     colNormSq = zeros([nFeatures])
 84 |     for i in 0..<nSamples:
 85 |       for j in 0..<nFeatures:
 86 |         colNormSq[j] += X[i, j]^2
 87 |     if fitIntercept: 
 88 |       Z.addCol(ones([nSamples]))
 89 |       w.add(cfm.intercept)
 90 |       colNormSq.add(float(nSamples))
 91 |       resZ.add(0.0)
 92 |     colNormSq += 1e-5
 93 | 
 94 |     Preconditioner = zeros([len(colNormSq), len(colNormSq)])
 95 |     for j in 0..<len(colNormSq):
 96 |       Preconditioner[j, j] = 1.0 / colNormSq[j]
 97 | 
 98 |   ZTZ = matmul(Z.T, Z)
 99 |   # start optimization
100 |   predict(yPredQuad, yPredLinear, K, X, cfm.P, cfm.lams, cfm.w,
101 |             cfm.intercept, ignoreDiag)
102 |   residual = y - yPredQuad - yPredLinear
103 |   lossOld = norm(residual, 2)^2 / float(nSamples)
104 |   var stepsize = 0.0
105 |   var nTol = 0
106 |   self.it = 0
107 |   for it in 0..<self.maxIter:
108 |     if not self.optimal and self.it >= cfm.maxComponents:
109 |       break
110 |     
111 |     predict(yPredQuad, yPredLinear, K, X, cfm.P, cfm.lams, cfm.w,
112 |             cfm.intercept, ignoreDiag)
113 |     residual = y - yPredQuad - yPredLinear
114 |     
115 |     # fit P
116 |     var gradJ = matmul(X.T*residual, X)
117 |     if ignoreDiag:
118 |       for i in 0..<nSamples:
119 |         for j in 0..<nFeatures:
120 |           gradJ[j, j] -= residual[i] * X[i, j]^2
121 | 
122 |     let (_, p) = powerMethod(gradJ, self.maxIterPower, tol=self.tolPower)
123 | 
124 |     # Add or replace?
125 |     var s = self.it
126 |     if s >= cfm.maxComponents: # replace old p
127 |       s = argmin(cfm.lams) # replace old p whose lambda is minimum
128 |     cfm.P[s] = p
129 | 
130 |     for i in 0..<nSamples:
131 |       if ignoreDiag:
132 |         cacheK[i] = anovaSlow(X, cfm.P, i, 2, s, nFeatures, 0)
133 |       else:
134 |         cacheK[i] = polySlow(X, cfm.P, i, 2, s, nFeatures, 0)
135 |     K[s] = cacheK
136 | 
137 |     # compute yPredQuad
138 |     predict(yPredQuad, yPredLinear, K, X, cfm.P, cfm.lams, cfm.w,
139 |             cfm.intercept, ignoreDiag)
140 |     residual = y - yPredQuad - yPredLinear
141 | 
142 |     # compute stepsize
143 |     if self.optimal:
144 |       let d = self.eta * K[s] - yPredQuad
145 |       stepsize = dot(d, residual) / norm(d, 2)^2
146 |       stepsize = min(max(1e-10, stepsize), 1.0)
147 |     else:
148 |       stepsize = 2.0 / (float(self.it)+2.0)
149 | 
150 |     # update lams
151 |     cfm.lams *= (1-stepsize)
152 |     cfm.lams[s] += self.eta*stepsize
153 |     if sum(cfm.lams) > self.eta:
154 |       cfm.lams *= self.eta  / sum(cfm.lams)
155 | 
156 |     # fit w
157 |     predict(yPredQuad, yPredLinear, K, X, cfm.P, cfm.lams, cfm.w,
158 |             cfm.intercept, ignoreDiag)
159 |     residual = y - yPredQuad
160 |     yPredLinear <- 0.0
161 |     if fitLinear:
162 |       vmmul(residual, Z, resZ)
163 |       let maggrad = norm(resZ, 1)
164 |       let tolCG = 1e-5 * maggrad
165 |       w *= colNormSq # since we use left-right preconditioning
166 |       cg(ZTZ, resZ, w, maxIter=1000, init=false, tol=tolCG, 
167 |          preconditioner=Preconditioner)
168 |       cfm.w = w[0..<nFeatures]
169 |       if fitIntercept:
170 |         cfm.intercept = w[^1]
171 |     elif fitIntercept:
172 |       cfm.intercept = sum(residual) / float(nSamples)
173 |     
174 |     # stopping criterion
175 |     predict(yPredQuad, yPredLinear, K, X, cfm.P, cfm.lams, cfm.w,
176 |             cfm.intercept, ignoreDiag)
177 |     residual = y - yPredQuad - yPredLinear
178 |     lossNew = norm(residual, 2)^2  / float(nSamples)
179 |     if lossOld - lossNew < self.tol:
180 |       inc(nTol)
181 |       if nTol == self.nTol:
182 |         break
183 |     else:
184 |       nTol = 0
185 |     lossOld = lossNew
186 |     inc(self.it)
187 | 


--------------------------------------------------------------------------------
/tests/optimizer/pbcd_slow.nim:
--------------------------------------------------------------------------------
  1 | import nimfm/tensor/tensor, nimfm/optimizer/optimizer_base, nimfm/loss
  2 | import sequtils, math
  3 | import ../model/fm_slow, fit_linear_slow
  4 | from ../regularizer/regularizers import newSquaredL12Slow
  5 | from cd_slow import computeDerivatives, computeInvStepSize, computeGrad, predict
  6 | 
  7 | 
  8 | type
  9 |   PBCDSlow*[L, R] = ref object of BaseCSCOptimizer
 10 |     ## Coordinate descent solver for test.
 11 |     gamma: float64
 12 |     loss: L
 13 |     reg: R
 14 |     sigma: float64
 15 |     rho: float64
 16 |     maxSearch: int
 17 |     shrink: bool
 18 |     shuffle: bool
 19 | 
 20 | 
 21 | proc newPBCDSlow*[L, R](maxIter = 100, alpha0=1e-6, alpha=1e-3, beta=1e-4,
 22 |                        gamma=1e-4, loss: L = newSquared(),
 23 |                        reg: R = newSquaredL12Slow(), sigma=0.01, rho=0.5,
 24 |                        maxSearch=0, shrink=false, shuffle=false,
 25 |                        verbose = 1, tol = 1e-3): PBCDSlow[L, R] =
 26 |   result = PBCDSlow[L, R](maxIter: maxIter, alpha0: alpha0, alpha: alpha, 
 27 |                           beta: beta, gamma: gamma, loss: loss, reg: reg,
 28 |                           tol: tol, verbose: verbose, sigma: sigma, rho: rho,
 29 |                           maxSearch: maxSearch, shrink: shrink,
 30 |                           shuffle: shuffle)
 31 | 
 32 | 
 33 | proc lineSearch[L, R](self: PBCDSlow[L, R], X: Matrix, y: seq[float64],
 34 |                       yPred: var Vector, P: var Tensor, j, order, degree: int,
 35 |                       w: Vector, intercept: float64,
 36 |                       grad, delta, old_p: var Vector,
 37 |                       oldLossVal, oldRegVal: float64) =
 38 |   var it  = 0
 39 |   var alpha = 1.0
 40 |   var newLossVal = 0.0
 41 |   var newRegVal = 0.0
 42 | 
 43 |   for i in 0..<X.shape[0]:
 44 |     newLossVal += self.loss.loss(y[i], yPred[i])
 45 |   newLossVal /= float(X.shape[0])
 46 |   newRegVal = self.gamma * self.reg.eval(P[order].T, degree-order)
 47 | 
 48 |   newRegVal += 0.5 * self.beta * norm(P[order], 2)^2 
 49 |   var cond = - dot(grad, delta) + newRegVal - oldRegVal
 50 |   var decreasing = newLossVal + newRegVal - oldLossVal - oldRegVal
 51 | 
 52 |   while not (decreasing <= self.sigma * alpha * cond):
 53 |     if it >= self.maxSearch: break
 54 |     # update!
 55 |     alpha *= self.rho
 56 |     for s in 0..<P.shape[1]:
 57 |       P[order, s, j] = old_p[s] - alpha * delta[s]
 58 |     # compute loss
 59 |     predict(P, w, intercept, X, yPred, degree)
 60 |     newLossVal = 0.0
 61 |     for i in 0..<X.shape[0]:
 62 |       newLossVal += self.loss.loss(y[i], yPred[i])
 63 |     newLossVal /= float(X.shape[0])
 64 |     # compute regularization
 65 |     newRegVal = self.gamma * self.reg.eval(P[order].T, degree-order)
 66 |     decreasing = newLossVal + newRegVal - oldLossVal - oldLossVal
 67 |     decreasing += 0.5 * self.beta * (norm(P[order], 2)^2 - norm(old_p, 2)^2)
 68 | 
 69 |     inc(it)
 70 |   delta *= alpha
 71 | 
 72 | 
 73 | proc epoch[L, R](self: PBCDSlow[L, R], X: Matrix, y: seq[float64], yPred: var seq[float64],
 74 |                  P: var Tensor, degree, order, nAugments: int,
 75 |                  dA: var Vector, w: Vector, intercept: float64, 
 76 |                  grad, invStepSizes, delta, old_p: var Vector): float64 =
 77 |   result = 0.0
 78 |   let nFeatures = X.shape[1]
 79 |   let nComponents = P.shape[1]
 80 |   let nSamples = float(X.shape[0])
 81 |   let beta = self.beta * nSamples
 82 |   var invStepSize = 0.0
 83 |   var lossVal = 0.0
 84 |   var regVal = 0.0
 85 |   var nnz: int = 0
 86 | 
 87 |   for j in 0..<nFeatures+nAugments:
 88 |     predict(P, w, intercept, X, yPred, degree)
 89 |     # compute gradient and invStepSize
 90 |     for s in 0..<nComponents:
 91 |       computeDerivatives(P, X, dA, degree-order, order, s, j, nAugments)
 92 |       invStepSizes[s] = computeInvStepSize(0.0, self.loss, dA)
 93 |       grad[s] = computeGrad(P, y, yPred, beta, order, s, j, self.loss, dA)
 94 |     grad /= nSamples
 95 |     invStepSizes /= nSamples
 96 |     
 97 |     # compute invStepSize
 98 |     if self.maxSearch != 0: 
 99 |       invStepSize = max(invStepSizes)
100 |       lossVal = 0.0
101 |       regVal = 0.0
102 |       for i in 0..<X.shape[0]:
103 |         lossVal += self.loss.loss(y[i], yPred[i])
104 |       lossVal /= nSamples
105 |       regVal = self.gamma * self.reg.eval(P[order].T, degree-order)
106 |       regVal += 0.5 * self.beta * norm(P[order], 2)^2
107 |     else:
108 |       invStepSize = sum(invStepSizes)
109 |     invStepSize += self.beta
110 |     invStepSize = max(invStepSize, 1e-12)
111 | 
112 |     # gradient update and proximal update
113 |     for s in 0..<nComponents:
114 |       old_p[s] = P[order, s, j]
115 |       P[order, s, j] -= grad[s] / invStepSize
116 |     self.reg.prox(P[order], self.gamma / invStepSize, degree-order, j)
117 | 
118 |     predict(P, w, intercept, X, yPred, degree)
119 |     # line search?
120 |     for s in 0..<nComponents:
121 |       nnz = 0
122 |       if P[order, s, j] != 0.0:
123 |         inc(nnz)
124 |     if not (self.maxSearch == 0):
125 |       for s in 0..<nComponents:
126 |         delta[s] = - P[order, s, j] + old_p[s]
127 |       lineSearch(self, X, y, yPred, P, j, order, degree, w, intercept,
128 |                  grad, delta, old_p, lossVal, regVal)
129 | 
130 | 
131 | proc fit*[L, R](self: PBCDSlow[L, R], X: Matrix, y: seq[float64],
132 |                 fm: var FMSlow) =
133 |   fm.init(X)
134 |   let y = fm.checkTarget(y)
135 |   let
136 |     nSamples = X.shape[0]
137 |     nFeatures = X.shape[1]
138 |     nOrders = fm.P.shape[0]
139 |     degree = fm.degree
140 |     alpha0 = self.alpha0 * float(nSamples)
141 |     alpha = self.alpha * float(nSamples)
142 |     fitLinear = fm.fitLinear
143 |     fitIntercept = fm.fitIntercept
144 |     nAugments = fm.nAugments
145 |     loss = self.loss
146 |   var
147 |     grad = zeros([fm.P.shape[1]])
148 |     invStepSizes = zeros([fm.P.shape[1]])
149 |     delta = zeros([fm.P.shape[1]])
150 |     old_p = zeros([fm.P.shape[1]])
151 | 
152 |   # caches
153 |   var
154 |     yPred = newSeqWith(nSamples, fm.intercept)
155 |     dA: Vector = zeros([nSamples])
156 |     colNormSq: Vector = zeros([nFeatures])
157 | 
158 |   if fitLinear:
159 |     for j in 0..<nFeatures:
160 |       for i in 0..<nSamples:
161 |         colNormSq[j] += X[i, j]^2
162 | 
163 |   predict(fm.P, fm.w, fm.intercept, X, yPred, degree)
164 |   for it in 0..<self.maxIter:
165 |     var viol = 0.0
166 |     if fitIntercept:
167 |       viol += fitInterceptCD(fm.intercept, y, yPred, nSamples, alpha0, loss)
168 |       predict(fm.P, fm.w, fm.intercept, X, yPred, degree)
169 | 
170 |     if fitLinear:
171 |       viol += fitLinearCD(fm.w, X, y, yPred, colNormSq, alpha, loss)
172 |       predict(fm.P, fm.w, fm.intercept, X, yPred, degree)
173 |     
174 |     for order in 0..<nOrders:
175 |       viol += epoch(self, X, y, yPred, fm.P, degree, order, nAugments,
176 |                     dA, fm.w, fm.intercept, grad, invStepSizes, delta, old_p)
177 | 


--------------------------------------------------------------------------------
/tests/optimizer/pcd_slow.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/tensor/tensor, nimfm/optimizer/optimizer_base, nimfm/loss
 2 | import sequtils, math
 3 | import ../model/fm_slow, fit_linear_slow
 4 | from ../regularizer/regularizers import newSquaredL12Slow
 5 | from cd_slow import computeDerivatives, computeInvStepSize, computeGrad, predict
 6 | 
 7 | 
 8 | type
 9 |   PCDSlow*[L, R] = ref object of BaseCSCOptimizer
10 |     ## Coordinate descent solver for test.
11 |     gamma: float64
12 |     loss: L
13 |     reg: R
14 | 
15 | 
16 | proc newPCDSlow*[L, R](maxIter = 100, alpha0=1e-6, alpha=1e-3, beta=1e-4,
17 |                        gamma=1e-4, loss: L = newSquared(),
18 |                        reg: R = newSquaredL12Slow(),
19 |                        verbose = 1, tol = 1e-3): PCDSlow[L, R] =
20 |   result = PCDSlow[L, R](maxIter: maxIter, alpha0: alpha0, alpha: alpha, 
21 |                          beta: beta, gamma: gamma, loss: loss, reg: reg,
22 |                          tol: tol, verbose: verbose)
23 | 
24 | 
25 | proc epoch[L, R](X: Matrix, y: seq[float64], yPred: var seq[float64],
26 |                  P: var Tensor, beta: float64, degree, order, nAugments: int,
27 |                  loss: L, dA: var Vector, w: Vector,
28 |                  intercept: float64, gamma: float64, reg: R): float64 =
29 |   result = 0.0
30 |   let nFeatures = X.shape[1]
31 |   let nComponents = P.shape[1]
32 |   for s in 0..<nComponents:
33 |     for j in 0..<nFeatures+nAugments:
34 |       computeDerivatives(P, X, dA, degree-order, order, s, j, nAugments)
35 |       let invStepSize = computeInvStepSize(beta, loss, dA)
36 |       if invStepSize < 1e-12: continue
37 |       let update = computeGrad(P, y, yPred, beta, order, s, j, loss, dA) / invStepSize
38 |       P[order, s, j] -= update
39 |       reg.prox(P[order], gamma / invStepSize, degree-order, s, j)
40 |       result += abs(update)
41 |       # naive synchronize
42 |       predict(P, w, intercept, X, yPred, degree)
43 | 
44 | 
45 | proc fit*[L, R](self: PCDSlow[L, R], X: Matrix, y: seq[float64],
46 |                 fm: var FMSlow) =
47 |   fm.init(X)
48 |   let y = fm.checkTarget(y)
49 |   let
50 |     nSamples = X.shape[0]
51 |     nFeatures = X.shape[1]
52 |     nOrders = fm.P.shape[0]
53 |     degree = fm.degree
54 |     alpha0 = self.alpha0 * float(nSamples)
55 |     alpha = self.alpha * float(nSamples)
56 |     beta = self.beta * float(nSamples)
57 |     fitLinear = fm.fitLinear
58 |     fitIntercept = fm.fitIntercept
59 |     nAugments = fm.nAugments
60 |     loss = self.loss
61 | 
62 |   # caches
63 |   var
64 |     yPred = newSeqWith(nSamples, fm.intercept)
65 |     dA: Vector = zeros([nSamples])
66 |     colNormSq: Vector = zeros([nFeatures])
67 | 
68 |   if fitLinear:
69 |     for j in 0..<nFeatures:
70 |       for i in 0..<nSamples:
71 |         colNormSq[j] += X[i, j]^2
72 | 
73 |   predict(fm.P, fm.w, fm.intercept, X, yPred, degree)
74 |   for it in 0..<self.maxIter:
75 |     var viol = 0.0
76 |     if fitIntercept:
77 |       viol += fitInterceptCD(fm.intercept, y, yPred, nSamples, alpha0, loss)
78 |       predict(fm.P, fm.w, fm.intercept, X, yPred, degree)
79 | 
80 |     if fitLinear:
81 |       viol += fitLinearCD(fm.w, X, y, yPred, colNormSq, alpha, loss)
82 |       predict(fm.P, fm.w, fm.intercept, X, yPred, degree)
83 |     
84 |     for order in 0..<nOrders:
85 |       viol += epoch(X, y, yPred, fm.P, beta, degree, order, nAugments,
86 |                     loss, dA, fm.w, fm.intercept, self.gamma*float(nSamples),
87 |                     self.reg)
88 | 


--------------------------------------------------------------------------------
/tests/optimizer/psgd_slow.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/tensor/tensor, nimfm/loss
 2 | import nimfm/optimizer/optimizer_base, nimfm/optimizer/sgd
 3 | import sequtils, math, random
 4 | import ../model/fm_slow
 5 | import ../regularizer/squaredl12_slow
 6 | 
 7 | type
 8 |   PSGDSlow*[L, R] = ref object of BaseCSROptimizer
 9 |     gamma: float64
10 |     loss: L
11 |     reg: R
12 |     eta0: float64
13 |     scheduling: SchedulingKind
14 |     power: float64
15 |     it: int
16 |     shuffle: bool
17 | 
18 | 
19 | proc newPSGDSlow*[L, R](eta0 = 0.01, alpha0=1e-6, alpha=1e-3, beta=1e-4,
20 |                         gamma=1e-4, loss: L = newSquared(), 
21 |                         reg: R = newSquaredL12Slow(), scheduling = optimal,
22 |                         power = 1.0, maxIter = 100, verbose = 1, tol = 1e-3,
23 |                         shuffle = true): PSGDSlow[L, R] =
24 |   result = PSGDSlow[L, R](eta0: eta0, alpha0: alpha0, alpha: alpha, beta: beta,
25 |                           gamma: gamma, loss: loss, reg: reg,
26 |                           scheduling: scheduling, power: power, it: 1, 
27 |                           maxIter: maxIter, tol: tol, verbose: verbose,
28 |                           shuffle: shuffle)
29 | 
30 | 
31 | proc getEta(self: PSGDSlow, reg: float64): float64 {.inline.} =
32 |   case self.scheduling
33 |   of constant:
34 |     result = self.eta0
35 |   of optimal:
36 |     result = self.eta0 / pow(1.0+self.eta0*reg*float(self.it), self.power)
37 |   of invscaling:
38 |     result = self.eta0 / pow(toFloat(self.it), self.power)
39 |   of pegasos:
40 |     result = 1.0 / (reg * toFloat(self.it))
41 | 
42 | 
43 | proc fit*[L, R](self: PSGDSlow[L, R], X: Matrix, y: seq[float64],
44 |                 fm: var FMSlow) =
45 |   fm.init(X)
46 |   let y = fm.checkTarget(y)
47 |   let
48 |     nSamples = X.shape[0]
49 |     nFeatures = X.shape[1]
50 |     nComponents = fm.P.shape[1]
51 |     nOrders = fm.P.shape[0]
52 |     degree = fm.degree
53 |     alpha0 = self.alpha0
54 |     alpha = self.alpha
55 |     beta = self.beta
56 |     fitLinear = fm.fitLinear
57 |     fitIntercept = fm.fitIntercept
58 |     nAugments = fm.nAugments
59 |     loss = self.loss
60 |   var
61 |     indices = toSeq(0..<nSamples)
62 |     grad: Tensor = zeros(fm.P.shape)
63 | 
64 |   if not fm.warmstart:
65 |     self.it = 1
66 | 
67 |   for epoch in 0..<self.maxIter:
68 |     var runningLoss = 0.0
69 |     if self.shuffle: shuffle(indices)
70 |     for i in indices:
71 |       # compute prediction
72 |       let yPred = decisionFunction(fm, X, i)
73 |       runningLoss += loss.loss(y[i], yPred)
74 |       
75 |       # compute gradient
76 |       let dL = loss.dloss(y[i], yPred)
77 |       grad <- 0.0
78 |       computeGrad(fm, X, i, dL, grad)
79 | 
80 |       # update parameters
81 |       let eta_w = self.getEta(alpha)
82 |       let eta_P = self.getEta(beta)
83 |       let eta_P_scaled = eta_P / (1.0 + eta_P * beta)
84 |       if fitIntercept:
85 |         let update = self.getEta(alpha0) * (dL + alpha0 * fm.intercept)
86 |         fm.intercept -= update / (1.0 + self.getEta(alpha0)*alpha0)
87 | 
88 |       if fitLinear:
89 |         for j in 0..<nFeatures:
90 |           let update = eta_w * (dL * X[i, j] + alpha * fm.w[j])
91 |           fm.w[j] -= update / (1.0 + eta_w * alpha)
92 |     
93 |       for order in 0..<nOrders:
94 |         for s in 0..<nComponents:
95 |           for j in 0..<(nFeatures+nAugments):
96 |             let update =  (grad[order, s, j] + beta * fm.P[order, s, j])
97 |             fm.P[order, s, j] -= eta_P_scaled * update
98 |         self.reg.prox(fm.P[order], self.gamma*eta_P_scaled, degree-order)
99 |       inc(self.it)


--------------------------------------------------------------------------------
/tests/optimizer/sgd_ffm_slow.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/tensor/tensor, nimfm/loss
 2 | import sequtils, random
 3 | import ../model/ffm_slow
 4 | from sgd_slow import SGDSlow, getEta, newSGDSlow
 5 | export SGDSlow, newSGDSlow
 6 | 
 7 | 
 8 | proc fit*[L](self: SGDSlow[L], X: Matrix, fields: seq[int], y: seq[float64],
 9 |              ffm: FFMSlow) =
10 |   ffm.init(X, fields)
11 |   let y = ffm.checkTarget(y)
12 |   let
13 |     nSamples = X.shape[0]
14 |     nFeatures = X.shape[1]
15 |     alpha0 = self.alpha0
16 |     alpha = self.alpha
17 |     beta = self.beta
18 |     fitLinear = ffm.fitLinear
19 |     fitIntercept = ffm.fitIntercept
20 |     loss = self.loss
21 |   var
22 |     indices = toSeq(0..<nSamples)
23 |     grad: Tensor = zeros(ffm.P.shape)
24 | 
25 |   if not ffm.warmstart:
26 |     self.it = 1
27 |   for epoch in 0..<self.maxIter:
28 |     var runningLoss = 0.0
29 |     if self.shuffle: shuffle(indices)
30 |     for i in indices:
31 |       # compute prediction and gradient
32 |       let yPred = decisionFunction(ffm, X, fields, i)
33 |       runningLoss += loss.loss(y[i], yPred)
34 |       
35 |       # compute gradient
36 |       let dL = loss.dloss(y[i], yPred)
37 |       grad <- 0.0
38 |       computeGrad(ffm, X, fields, i, dL, grad)
39 | 
40 |       # update parameters
41 |       let wEta = self.getEta(alpha)
42 |       let PEta = self.getEta(beta)
43 |       if fitIntercept:
44 |         let update = self.getEta(alpha0) * (dL + alpha0 * ffm.intercept)
45 |         ffm.intercept -= update
46 | 
47 |       if fitLinear:
48 |         for j in 0..<nFeatures:
49 |           let update = wEta * (dL * X[i, j] + alpha * ffm.w[j])
50 |           ffm.w[j] -= update  
51 | 
52 |       for f in 0..<ffm.P.shape[0]:
53 |         for j in 0..<ffm.P.shape[1]:
54 |           for s in 0..<ffm.P.shape[2]:
55 |             let update =  PEta * (grad[f, j, s] + beta * ffm.P[f, j, s])
56 |             ffm.P[f, j, s] -= update
57 |       inc(self.it)
58 | 


--------------------------------------------------------------------------------
/tests/optimizer/sgd_slow.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/tensor/tensor, nimfm/loss
 2 | import nimfm/optimizer/optimizer_base, nimfm/optimizer/sgd
 3 | import sequtils, math, random
 4 | import ../kernels_slow, ../model/fm_slow, ../comb
 5 | 
 6 | type
 7 |   SGDSlow*[L] = ref object of BaseCSROptimizer
 8 |     loss*: L
 9 |     eta0*: float64
10 |     scheduling: SchedulingKind
11 |     power: float64
12 |     it*: int
13 |     shuffle*: bool
14 | 
15 | 
16 | proc newSGDSlow*[L](eta0 = 0.01, alpha0=1e-6, alpha=1e-3, beta=1e-3,
17 |                     loss: L = newSquared(),scheduling = optimal, power = 1.0,
18 |                     maxIter = 100, verbose = 1, tol = 1e-3,
19 |                     shuffle = true): SGDSlow[L] =
20 |   result = SGDSlow[L](eta0: eta0, alpha0: alpha0, alpha: alpha, beta: beta,
21 |                       loss: loss, scheduling: scheduling, power: power, it: 1,
22 |                       maxIter: maxIter, tol: tol, verbose: verbose,
23 |                       shuffle: shuffle)
24 | 
25 | 
26 | proc getEta*(self: SGDSlow, reg: float64): float64 {.inline.} =
27 |   case self.scheduling
28 |   of constant:
29 |     result = self.eta0
30 |   of optimal:
31 |     result = self.eta0 / pow(1.0+self.eta0*reg*float(self.it), self.power)
32 |   of invscaling:
33 |     result = self.eta0 / pow(toFloat(self.it), self.power)
34 |   of pegasos:
35 |     result = 1.0 / (reg * toFloat(self.it))
36 | 
37 | 
38 | proc fit*[L](self: SGDSlow[L], X: Matrix, y: seq[float64], fm: var FMSlow) =
39 |   fm.init(X)
40 |   let y = fm.checkTarget(y)
41 |   let
42 |     nSamples = X.shape[0]
43 |     nFeatures = X.shape[1]
44 |     nComponents = fm.P.shape[1]
45 |     nOrders = fm.P.shape[0]
46 |     degree = fm.degree
47 |     alpha0 = self.alpha0
48 |     alpha = self.alpha
49 |     beta = self.beta
50 |     fitLinear = fm.fitLinear
51 |     fitIntercept = fm.fitIntercept
52 |     nAugments = fm.nAugments
53 |     loss = self.loss
54 |   var
55 |     indices = toSeq(0..<nSamples)
56 |     grad: Tensor = zeros(fm.P.shape)
57 | 
58 |   if not fm.warmstart:
59 |     self.it = 1
60 |   for epoch in 0..<self.maxIter:
61 |     var runningLoss = 0.0
62 |     if self.shuffle: shuffle(indices)
63 |     for i in indices:
64 |       # compute prediction
65 |       let yPred = decisionFunction(fm, X, i)
66 |       runningLoss += loss.loss(y[i], yPred)
67 |       
68 |       # compute gradient
69 |       let dL = loss.dloss(y[i], yPred)
70 |       grad <- 0.0
71 |       computeGrad(fm, X, i, dL, grad)
72 | 
73 |       # update parameters
74 |       let wEta = self.getEta(alpha)
75 |       let PEta = self.getEta(beta)
76 |       if fitIntercept:
77 |         let update = self.getEta(alpha0) * (dL + alpha0 * fm.intercept)
78 |         fm.intercept -= update
79 | 
80 |       if fitLinear:
81 |         for j in 0..<nFeatures:
82 |           let update = wEta * (dL * X[i, j] + alpha * fm.w[j])
83 |           fm.w[j] -= update  
84 |     
85 |       for order in 0..<nOrders:
86 |         for s in 0..<nComponents:
87 |           for j in 0..<(nFeatures+nAugments):
88 |             let update =  PEta * (grad[order, s, j] + beta * fm.P[order, s, j])
89 |             fm.P[order, s, j] -= update
90 | 
91 |       inc(self.it)


--------------------------------------------------------------------------------
/tests/regularizer/l1_slow.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/tensor/tensor, utils
 2 | 
 3 | 
 4 | type
 5 |   L1Slow* = ref object
 6 |     scaling: float64
 7 |     scalings: Vector
 8 |     threshold: float64
 9 |     thresholds: Vector
10 |     norms: Vector
11 |     value*: float64
12 | 
13 | 
14 | proc newL1Slow*(): L1Slow = L1Slow()
15 | 
16 | # P.shape: (nFeatures, nComponents)
17 | proc eval*(self: L1Slow, P: Matrix): float64 =
18 |   result = 0.0
19 |   for j in 0..<P.shape[0]:
20 |     for s in 0..<P.shape[1]:
21 |       result += abs(P[j, s])
22 | 
23 | 
24 | proc eval*(self: L1Slow, P: Matrix, degree: int): float64 = self.eval(P)
25 | 
26 | 
27 | # for pcd
28 | proc prox*(self: L1Slow, P: var Matrix, lam: float64,
29 |            degree, s, j: int) {.inline} =
30 |   P[s, j] = softthreshold(P[s, j], lam)
31 | 
32 | # for bcd
33 | # P.shape = [nComponents, nFeatures]
34 | proc prox*(self: L1Slow, P: var Matrix, lam: float64, degree, j: int) {.inline.} =
35 |   for s in 0..<P.shape[0]:
36 |     P[s, j] = softthreshold(P[s, j], lam)
37 | 
38 | 
39 | # for psgd/pgd
40 | # P.shape = [nComponents, nFeatures]
41 | proc prox*(self: L1Slow, P: var Matrix, lam: float64, degree: int) {.inline.} =
42 |   for s in 0..<len(P):
43 |     for j in 0..<P.shape[1]:
44 |       P[s, j] = softthreshold(P[s, j], lam)
45 | 


--------------------------------------------------------------------------------
/tests/regularizer/l21_slow.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/tensor/tensor
 2 | import math
 3 | 
 4 | type
 5 |   L21Slow* = ref object
 6 |     norms: Vector
 7 |     value*: float64
 8 | 
 9 | 
10 | proc newL21Slow*(): L21Slow = L21Slow()
11 | 
12 | 
13 | # P.shape: (nFeatures, nComponents)
14 | proc eval*(self: L21Slow, P: Matrix): float64 =
15 |   result = 0.0
16 |   for j in 0..<P.shape[0]:
17 |     var norm = 0.0
18 |     for s in 0..<P.shape[1]:
19 |       norm += P[j, s]^2
20 |     result += sqrt(norm)
21 | 
22 | 
23 | proc eval*(self: L21Slow, P: Matrix, degree: int): float64 = self.eval(P)
24 | 
25 | 
26 | # for bcd
27 | # P.shape: [nComponents, nFeatures]
28 | proc prox*(self: L21Slow, P: var Matrix, lam: float64, degree, j: int) {.inline.} =
29 |   var norm = 0.0
30 |   for s in 0..<P.shape[0]:
31 |     norm += P[s, j]^2
32 |   norm = sqrt(norm)
33 |   var shrink = 0.0
34 |   if norm > lam:
35 |     shrink = 1.0 - lam / norm
36 |   for s in 0..<P.shape[0]:
37 |     P[s, j] *= shrink
38 | 
39 | 
40 | # for sgd/gd
41 | # P.shape: [nComponents, nFeatures]
42 | proc prox*(self: L21Slow, P: var Matrix, gamma: float64, degree: int) {.inline.} =
43 |   for j in 0..<P.shape[1]:
44 |     self.prox(P, gamma, degree, j)
45 | 


--------------------------------------------------------------------------------
/tests/regularizer/omegacs_slow.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/tensor/tensor
 2 | import math
 3 | import ../kernels_slow
 4 | 
 5 | type
 6 |   OmegaCSSlow* = ref object
 7 |     norms: Vector
 8 |     value*: float64
 9 | 
10 | 
11 | proc newOmegaCSSlow*(): OmegaCSSlow =
12 |   new(result)
13 | 
14 | 
15 | # P.shape: (nFeatures, nComponents)
16 | proc eval*(self: OmegaCSSlow, P: Matrix, degree: int): float64 =
17 |   var norms = zeros([P.shape[0]])
18 |   for j in 0..<P.shape[0]:
19 |     for pjs in P[j]:
20 |       norms[j] += pjs^2
21 |     norms[j] = sqrt(norms[j])
22 |   let ones = ones([len(norms)])
23 |   result = anovaSlow(norms, ones, degree)
24 | 
25 | 
26 | # for prox BCD
27 | # P.shape: [nComponents, nFeatures]
28 | proc prox*(self: OmegaCSSlow, P: var Matrix, lam: float64, degree, j: int) {.inline.} =
29 |   var norms = zeros([P.shape[1]])
30 |   for s in 0..<P.shape[0]:
31 |     for j1 in 0..<P.shape[1]:
32 |       norms[j1] += P[s, j1]^2
33 |   
34 |   for j1 in 0..<P.shape[1]:
35 |     norms[j1] = sqrt(norms[j1])
36 | 
37 |   let norm = norms[j]
38 |   norms[j] = 0.0
39 |   let ones = ones([len(norms)])
40 |   var strength = lam*anovaSlow(norms, ones, degree-1)
41 |   var shrink = 0.0
42 |   if norm > strength:
43 |     shrink = 1.0 - strength / norm
44 |   for s in 0..<P.shape[0]:
45 |     P[s, j] *= shrink


--------------------------------------------------------------------------------
/tests/regularizer/omegati_slow.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/tensor/tensor, utils, ../kernels_slow
 2 | 
 3 | 
 4 | type
 5 |   OmegaTISlow* = ref object
 6 |     value*: float64
 7 | 
 8 | 
 9 | proc newOmegaTISlow*(): OmegaTISlow =
10 |   new(result)
11 | 
12 | # for coordinate descent
13 | proc prox*(self: OmegaTISlow, P: var Matrix, lam: float64, 
14 |            degree, s, j: int) {.inline.} =
15 |   let psj = P[s, j]
16 |   var absp = zeros([len(P[s])])
17 |   for j1 in 0..<len(absp):
18 |     absp[j1] = abs(P[s, j1])
19 |   absp[j] = 0.0
20 |   var strength = lam*anovaSlow(absp, ones([len(absp)]), degree-1)
21 |   P[s, j] = softthreshold(psj, strength)


--------------------------------------------------------------------------------
/tests/regularizer/regularizers.nim:
--------------------------------------------------------------------------------
1 | import l1_slow, l21_slow, squaredl12_slow, squaredl21_slow, 
2 |        omegati_slow, omegacs_slow
3 | export l1_slow, l21_slow, squaredl12_slow, squaredl21_slow, 
4 |        omegati_slow, omegacs_slow
5 | 


--------------------------------------------------------------------------------
/tests/regularizer/squaredl12_slow.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/tensor/tensor
 2 | import algorithm, sequtils, utils, math, sugar
 3 | 
 4 | 
 5 | type
 6 |   SquaredL12Slow* = ref object
 7 |     transpose: bool
 8 | 
 9 | 
10 | proc proxSquaredL12Slow*(p: var Vector, lam: float64, degree: int) {.inline.} =
11 |   let n = len(p)
12 |   var absp: seq[float64] = sorted(p.map(x=>abs(x)), order=Descending)
13 |   var S = 2.0 * lam * cumsummed(absp)
14 |   for i in 0..<n:
15 |     S[i] /= (1.0 + 2.0*lam*(float64(i)+1.0))
16 |   var theta: int = 0
17 |   for i in 0..<n:
18 |     if absp[i] - S[i] < 0: break
19 |     inc(theta)
20 |   for i in 0..<n:
21 |     if abs(p[i]) < absp[theta-1]:
22 |       p[i] = 0
23 |     else:
24 |       p[i] = softthreshold(p[i], S[theta-1])
25 | 
26 | 
27 | # P.shape: (nFeatures, nComponents)
28 | proc eval*(self: SquaredL12Slow, P: Matrix): float64 =
29 |   ## Evaluates for P
30 |   let axis = if self.transpose: 0 else: 1
31 |   result = norm(norm(P, 1, axis=axis), 2)^2
32 | 
33 |   
34 | proc newSquaredL12Slow*(transpose=true): SquaredL12Slow =
35 |   ## Creates new SquaredL12Slow object.
36 |   new(result)
37 |   result.transpose = transpose
38 | 
39 | 
40 | # for coordinate descent
41 | # P.shape: [nComponents, nFeatures]
42 | proc prox*(self: SquaredL12Slow, P: var Matrix, lam: float64, 
43 |            degree, s, j: int) {.inline.} =
44 |   var strength = 0.0
45 |   let psj = P[s, j]
46 |   if self.transpose: # input P is already transposed
47 |     for j2 in 0..<P.shape[1]:
48 |       strength += abs(P[s, j2])
49 |   else:
50 |     for s2 in 0..<P.shape[0]:
51 |       strength += abs(P[s2, j])
52 |   strength -= abs(psj)
53 | 
54 |   P[s, j] = softthreshold(psj / (1+2*lam), 2*lam*strength / (1+2*lam))
55 | 
56 | 
57 | # for bcd
58 | # P.shape: [nFeatures, nComponents]
59 | proc prox*(self: SquaredL12Slow, P: var Matrix, lam: float64,
60 |            degree, j: int) {.inline.} =
61 |   if degree > 2:
62 |     raise newException(ValueError, "degree > 2")
63 |   if self.transpose:
64 |     raise newException(ValueError, "transpose=true is not supported in PBCD.")
65 |   else:
66 |     proxSquaredL12Slow(P[j], lam, degree)
67 | 
68 | 
69 | # for gd/sgd
70 | # P.shape: [nComponents, nFeatures]
71 | proc prox*(self: SquaredL12Slow, P: var Matrix, 
72 |            lam: float64, degree: int) {.inline.} =
73 |   if degree > 2:
74 |     raise newException(ValueError, "degree > 2")
75 |   
76 |   if self.transpose:
77 |     for s in 0..<P.shape[0]:
78 |       proxSquaredL12Slow(P[s], lam, degree)
79 |   else:
80 |     var p = zeros([P.shape[0]])
81 |     for j in 0..<P.shape[1]:
82 |       for s in 0..<P.shape[0]:
83 |         p[s] = P[s, j]
84 |       proxSquaredL12Slow(p, lam, degree)
85 |       for s in 0..<P.shape[0]:
86 |         P[s, j] = p[s]
87 | 


--------------------------------------------------------------------------------
/tests/regularizer/squaredl21_slow.nim:
--------------------------------------------------------------------------------
 1 | import nimfm/tensor/tensor, squaredl12_slow
 2 | import math
 3 | 
 4 | 
 5 | type
 6 |   SquaredL21Slow* = ref object
 7 |     value*: float64
 8 |     cache: float64
 9 |     transpose: bool
10 | 
11 | 
12 | proc newSquaredL21Slow*(transpose=false): SquaredL21Slow =
13 |   new(result)
14 |   result.transpose = transpose
15 | 
16 | 
17 | # P.shape: (nFeatures, nComponents)
18 | proc eval*(self: SquaredL21Slow, P: Matrix): float64 =
19 |   let axis = if self.transpose: 0 else: 1
20 |   result = norm(norm(P, 2, axis=axis), 1)^2
21 | 
22 | 
23 | proc eval*(self: SquaredL21Slow, P: Matrix, degree: int): float64 =
24 |   if degree != 2:
25 |     raise newException(ValueError, "degree != 2.")
26 |   let axis = if self.transpose: 0 else: 1
27 |   result = norm(norm(P, 2, axis=axis), 1)^2
28 | 
29 | 
30 | # for bcd
31 | # P.shape: [nComponents, nFeatures]
32 | # assume transpose = false
33 | proc prox*(self: SquaredL21Slow, P: var Matrix, lam: float64,
34 |            degree, j: int) {.inline.} =
35 |   for s in 0..<P.shape[0]:
36 |     P[s, j] /= (1+2*lam)
37 |   let norms = norm(P, 2, axis=0)
38 |   let lamScaled = 2.0 * lam / (1.0+2*lam) * (sum(norms) - norms[j])
39 |   var shrink = 0.0
40 |   if norms[j] > lamScaled: 
41 |     shrink = 1.0 - lamScaled / norms[j]
42 |   for s in 0..<P.shape[0]:
43 |     P[s, j] *= shrink
44 | 
45 | # for gd/sgd
46 | # P.shape: [nComponents, nFeatures]
47 | proc prox*(self: SquaredL21Slow, P: var Matrix, 
48 |            lam: float64, degree: int) {.inline.} =
49 |   if not self.transpose:
50 |     var norms = norm(P, 2, 0)
51 |     for j in 0..<P.shape[1]:
52 |       if norms[j] != 0.0:
53 |         for s in 0..<P.shape[0]:
54 |           P[s, j] /= norms[j]
55 |     proxSquaredL12Slow(norms, lam, degree)
56 |     for j in 0..<P.shape[1]:
57 |       for s in 0..<P.shape[0]:
58 |         P[s, j] *= norms[j]
59 |   else:
60 |     var norms = norm(P, 2, 1)
61 |     for s in 0..<P.shape[0]:
62 |       if norms[s] != 0:
63 |         P[s] /= norms[s]
64 |     proxSquaredL12Slow(norms, lam, degree)
65 |     for s in 0..<P.shape[0]:
66 |       P[s] *= norms[s]
67 | 


--------------------------------------------------------------------------------
/tests/regularizer/utils.nim:
--------------------------------------------------------------------------------
 1 | import math, nimfm/tensor/tensor, sequtils, random
 2 | 
 3 | 
 4 | proc softthreshold*(x, alpha: float64): float64 {.inline.} =
 5 |   result = float64(sgn(x)) * max(abs(x) - alpha, 0.0)
 6 | 
 7 | 
 8 | proc projL1ball*(v: var Vector, z: float64) {.inline.} =
 9 |   let n = len(v)
10 |   var
11 |     rho, nG, nL, offset, nCandidates, pivotIdx: int
12 |     cumsum, cumsumCache, pivot, theta: float64
13 |     candidates = toSeq(0..<2*n)
14 |   nCandidates = n
15 |   while nCandidates != 0:
16 |     pivot_idx = candidates[offset+rand(nCandidates-1)]
17 |     pivot = v[pivot_idx]
18 |     nG = 0
19 |     nL = 0
20 |     cumsum_cache = 0
21 |     for i in 0..<nCandidates:
22 |       let j = candidates[offset+i]
23 |       if j != pivotIdx:
24 |         if v[j] >= pivot:
25 |           cumsum_cache += v[j]
26 |           candidates[nG] = j
27 |           nG += 1
28 |         else:
29 |           candidates[n+nL] = j
30 |           nL += 1
31 |     # discard greaters from candidates
32 |     if ((cumsum + cumsumCache) - float(rho+nG)*pivot) < z:
33 |       nCandidates = nL
34 |       offset = n
35 |       cumsum += cumsum_cache + pivot
36 |       candidates[nG] = pivot_idx
37 |       nG += 1
38 |       rho += nG
39 |     else: # discard lessers from candidates
40 |       nCandidates = nG
41 |       offset = 0
42 | 
43 |   theta = (cumsum - z) / float(rho)
44 |   for i in 0..<n:
45 |     v[i] = v[i] - theta
46 |     if v[i] < 0:
47 |         v[i] = 0
48 | 


--------------------------------------------------------------------------------
/tests/test_kernels.nim:
--------------------------------------------------------------------------------
 1 | import unittest, random, sequtils
 2 | import nimfm/kernels, nimfm/dataset, nimfm/tensor/tensor
 3 | import kernels_slow
 4 | 
 5 | 
 6 | suite "Test ANOVA Kernel Computation":
 7 |   let
 8 |     n = 20
 9 |     d = 10
10 |     k = 10
11 |     nAug = 3
12 |   # create data matrix and seq
13 |   var XSeq = newSeqWith(n, newSeqWith(d, 0.0))
14 |   randomize(42)
15 |   for i in 0..<n:
16 |     for j in 0..<d:
17 |       XSeq[i][j] = rand(1.0)
18 |   
19 |   let P = randomNormal([1, k, d+nAug])
20 | 
21 |   let XCSR = toCSRDataset(XSeq)
22 |   let XCSC = toCSCDataset(XSeq)
23 |   var K = zeros([n, d+1])
24 |   let XMat = toMatrix(XSeq)
25 | 
26 |   test "Test ANOVA Kernel for CSC":
27 |     for m in 0..<nAug+1:
28 |       XCSC.addDummyFeature(1.0, m)
29 |       for degree in 2..<6:
30 |         for s in 0..<k:
31 |           anova(XCSC, P[0], K, degree, s)
32 |           for i in 0..<n:
33 |             let expect = anovaSlow(XMat, P[0], i, degree, s, d, m)
34 |             check abs(K[i, degree] -  expect) < 1e-6
35 |       XCSC.removeDummyFeature(m)
36 |     
37 |   test "Test ANOVA Kernel for CSR":
38 |     for m in 0..<nAug+1:
39 |       XCSR.addDummyFeature(1.0, m)
40 |       for degree in 2..<6:
41 |         for s in 0..<k:
42 |           anova(XCSR, P[0], K, degree, s)
43 |           for i in 0..<n:
44 |             let expect = anovaSlow(XMat, P[0], i, degree, s, d, m)
45 |             check abs(K[i, degree] - expect) < 1e-6
46 |       XCSR.removeDummyFeature(m)


--------------------------------------------------------------------------------
/tests/test_label_encoder.nim:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import nimfm/utils
 3 | 
 4 | suite "Test label encoder":
 5 |   let y1 = [4, -3, 100, -3, 4, 100, 100, 4]
 6 |   let y1Enc = [1, 0, 2, 0, 1, 2, 2, 1]
 7 |   let y1Classes = [-3, 4, 100]
 8 |   let y2 = [-1, -1, -1, -1, -1, -1]
 9 |   let y2Enc = [0, 0, 0, 0, 0, 0]
10 |   let y2Classes = [-1]
11 |   let y3 = [0, 1, 2, 3, 4, 5, 6, 7]
12 |   let y3Enc = [0, 1, 2, 3, 4, 5, 6, 7]
13 |   let y3Classes = [0, 1, 2, 3, 4, 5, 6, 7]
14 | 
15 | 
16 |   test "Test transform":
17 |     var le = newLabelEncoder[int]()
18 |     var yEnc: seq[int]
19 |     le.fit(y1)
20 |     check le.transformed(y1) == y1Enc
21 |     le.transform(y1, yEnc)
22 |     check yEnc == y1Enc
23 | 
24 |     le.fit(y2)
25 |     check le.transformed(y2) == y2Enc
26 |     le.transform(y2, yEnc)
27 |     check yEnc == y2Enc
28 | 
29 |     le.fit(y3)
30 |     check le.transformed(y3) == y3Enc
31 |     le.transform(y3, yEnc)
32 |     check yEnc == y3Enc
33 | 
34 | 
35 |   test "Test inverse transform":
36 |     var le = newLabelEncoder[int]()
37 |     var yEnc: seq[int]
38 | 
39 |     le.fit(y1)
40 |     check le.inverseTransformed(y1Enc) == y1
41 |     le.inverseTransform(y1Enc, yEnc)
42 |     check yEnc == y1
43 | 
44 |     le.fit(y2)
45 |     check le.inverseTransformed(y2Enc) == y2
46 |     le.inverseTransform(y2Enc, yEnc)
47 |     check yEnc == y2
48 | 
49 |     le.fit(y3)
50 |     check le.inverseTransformed(y3Enc) == y3
51 |     le.inverseTransform(y3Enc, yEnc)
52 |     check yEnc == y3
53 | 
54 | 
55 |   test "Test classes":
56 |     var le = newLabelEncoder[int]()
57 | 
58 |     le.fit(y1)
59 |     check le.classes == y1Classes
60 |     le.fit(y2)
61 |     check le.classes == y2Classes
62 |     le.fit(y3)
63 |     check le.classes == y3Classes
64 | 


--------------------------------------------------------------------------------
/tests/test_metrics.nim:
--------------------------------------------------------------------------------
  1 | import unittest, sugar, sequtils, math
  2 | import nimfm/metrics
  3 | 
  4 | suite "Test metrics":
  5 |   let yTrue = @[-1, -1, 1, 1]
  6 |   let yTrue01 = @[0, 0, 1, 1]
  7 |   let yScore1 = @[0.1, 0.4, 0.35, 0.8] # tp=1, fp=0, tn=2, fn=1
  8 |   let yScore2 = @[-0.1, 0.1, 0.9, -0.2] # tp=1, fp=1, tn=1, fn=1
  9 |   let prec1 =1.0
 10 |   let recall1 = 0.5
 11 |   let fscore1 = 1.0/1.5
 12 |   let prec2 = 0.5
 13 |   let recall2 = 0.5
 14 |   let fscore2 = 0.5
 15 |   let zeros = @[0.0, 0.0, 0.0, 0.0]
 16 |   let ones = @[1.0, 1.0, 1.0, 1.0]
 17 |   let inverse = @[1, 1, -1, -1]
 18 |   let inverse01 = @[1, 1, 0, 0]
 19 | 
 20 | 
 21 |   test "Test roc auc":
 22 |     check rocauc(yTrue, yScore1) == 0.75
 23 |     check rocauc(yTrue, yScore2) == 0.5
 24 |     check rocauc(yTrue, yTrue.map(x=>toFloat(x))) == 1
 25 |     check rocauc(yTrue, zeros) == 0.5
 26 |     check rocauc(yTrue, ones) == 0.5
 27 |     check rocauc(yTrue, inverse.map(x=>toFloat(x))) == 0
 28 |     check rocauc(yTrue, inverse01.map(x=>toFloat(x))) == 0
 29 | 
 30 |     check rocauc(yTrue01, yScore1) == 0.75
 31 |     check rocauc(yTrue01, yScore2) == 0.5
 32 |     check rocauc(yTrue01, yTrue.map(x=>toFloat(x))) == 1
 33 |     check rocauc(yTrue01, zeros) == 0.5
 34 |     check rocauc(yTrue01, ones) == 0.5
 35 |     check rocauc(yTrue01, inverse.map(x=>toFloat(x))) == 0
 36 |     check rocauc(yTrue01, inverse01.map(x=>toFloat(x))) == 0
 37 | 
 38 | 
 39 |   test "Test accuracy":
 40 |     check accuracy(yTrue, yScore1.map(x=>sgn(x-0.5))) == 0.75
 41 |     check accuracy(yTrue, yScore2.map(x=>sgn(x))) == 0.5
 42 |     check accuracy(yTrue, yTrue) == 1.0
 43 |     check accuracy(yTrue, zeros.map(x=>toInt(x))) == 0
 44 |     check accuracy(yTrue, ones.map(x=>toInt(x))) == 0.5
 45 | 
 46 |     check accuracy(yTrue01, yScore1.map(x=>int((sgn(x-0.5)+1)/2))) == 0.75
 47 |     check accuracy(yTrue01, yScore2.map(x=>int((sgn(x)+1)/2))) == 0.5
 48 |     check accuracy(yTrue01, yTrue01) == 1
 49 |     check accuracy(yTrue, zeros.map(x=>toInt(x))) == 0
 50 |     check accuracy(yTrue, ones.map(x=>toInt(x))) == 0.5
 51 | 
 52 | 
 53 |   test "Test precision, recall, f-score":
 54 |     var actual: (float, float, float)
 55 |     actual = precisionRecallFscore(yTrue, yScore1.map(x=>sgn(x-0.5))) 
 56 |     check actual == (prec1, recall1, fscore1)
 57 |     actual = precisionRecallFscore(yTrue, yScore2.map(x=>sgn(x))) 
 58 |     check actual ==  (prec2, recall2, fscore2)
 59 |     actual = precisionRecallFscore(yTrue, yTrue)
 60 |     check actual == (1.0, 1.0, 1.0)
 61 |     actual = precisionRecallFscore(yTrue, zeros.map(x=>toInt(x))) 
 62 |     check actual == (0.0, 0.0, 0.0)
 63 |     actual = precisionRecallFscore(yTrue, ones.map(x=>toInt(x)))
 64 |     check actual == (0.5, 1.0, 1.0/1.5)
 65 |     actual =  precisionRecallFscore(yTrue01, yScore1.map(x=>sgn(x-0.5)))
 66 |     check actual == (prec1, recall1, fscore1)
 67 |     actual = precisionRecallFscore(yTrue01, yScore2.map(x=>sgn(x))) 
 68 |     check actual ==  (prec2, recall2, fscore2)
 69 |     check precisionRecallFscore(yTrue01, yTrue) == (1.0, 1.0, 1.0)
 70 |     actual = precisionRecallFscore(yTrue01, zeros.map(x=>toInt(x))) 
 71 |     check actual == (0.0, 0.0, 0.0)
 72 |     actual = precisionRecallFscore(yTrue01, ones.map(x=>toInt(x))) 
 73 |     check actual == (0.5, 1.0, 1.0/1.5)
 74 |     actual = precisionRecallFscore(
 75 |       zeros.map(x=>toInt(x)), zeros.map(x=>toInt(x))) 
 76 |     check actual == (0.0, 0.0, 0.0)
 77 |     actual = precisionRecallFscore(
 78 |       ones.map(x=>toInt(x)), ones.map(x=>toInt(x)))  
 79 |     check actual == (1.0, 1.0, 1.0)
 80 |     actual = precisionRecallFscore(
 81 |       zeros.map(x=>toInt(x)), zeros.map(x=>toInt(x)),
 82 |       pos=0) 
 83 |     check actual == (1.0, 1.0, 1.0)
 84 |     actual = precisionRecallFscore(
 85 |       ones.map(x=>toInt(x)), ones.map(x=>toInt(x)),
 86 |       pos=0)  
 87 |     check actual == (0.0, 0.0, 0.0)
 88 | 
 89 |   test "Test RMSE":
 90 |     check rmse(yScore1, yScore2) == 0.5984354601792912
 91 |     check rmse(yScore2, yScore1) == 0.5984354601792912
 92 |     check rmse(yScore1, yScore1) == 0.0
 93 |     check rmse(yScore2, yScore2) == 0.0
 94 |     check rmse(zeros, ones) == 1.0
 95 |     check rmse(ones, zeros) == 1.0
 96 |     check rmse(ones, ones) == 0.0
 97 |     check rmse(zeros, zeros) == 0.0
 98 | 
 99 | 
100 |   test "Test r2 score":
101 |     check r2(yScore1, yScore2) == -4.687344913151364
102 |     check r2(yScore2, yScore1) ==  -0.9163879598662203
103 |     check r2(yScore1, yScore1) == 1.0
104 |     check r2(yScore2, yScore2) == 1.0
105 |     check r2(zeros, yScore1) == 0.0
106 |     check r2(zeros, yScore2) == 0.0
107 |     check r2(zeros, zeros) == 1.0
108 |     check r2(ones, yScore1) == 0.0
109 |     check r2(ones, yScore2) == 0.0
110 |     check r2(ones, ones) == 1.0


--------------------------------------------------------------------------------
/tests/test_sgd_ffm.nim:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import utils, optimizer/sgd_ffm_slow
  3 | import nimfm/loss, nimfm/dataset, nimfm/tensor/tensor
  4 | import nimfm/model/field_aware_factorization_machine, nimfm/model/fm_base
  5 | import nimfm/optimizer/sgd_ffm
  6 | import model/ffm_slow
  7 | 
  8 | 
  9 | suite "Test stochastic gradient descent for FFM":
 10 |   let
 11 |     n = 80
 12 |     d = 20
 13 |     nFields = 5
 14 |     nComponents = 4
 15 | 
 16 |   test "Test fitLinear":
 17 |     for fitIntercept in [true, false]:
 18 |       var
 19 |         X: CSRFieldDataset
 20 |         y: seq[float64]
 21 |         fieldDict: seq[int]
 22 |       createFFMDataset(X, y, fieldDIct, n, d, nFields, nComponents, 42,
 23 |                        false, fitIntercept)
 24 |       # fit fast version
 25 |       var ffm = newFieldAwareFactorizationMachine(
 26 |         task = regression, nComponents = nComponents,
 27 |         fitLinear = false,
 28 |         fitIntercept = fitIntercept, randomState = 1)
 29 |       var sgd = newSGD(maxIter = 10, verbose = 0, tol = 0)
 30 |       sgd.fit(X, y, ffm)
 31 |       for j in 0..<d:
 32 |         check ffm.w[j] == 0.0
 33 | 
 34 | 
 35 |   test "Test fitIntercept":
 36 |       for fitLinear in [true, false]:
 37 |         var
 38 |           X: CSRFieldDataset
 39 |           y: seq[float64]
 40 |           fieldDict: seq[int]
 41 |         createFFMDataset(X, y, fieldDict, n, d, nFields, nComponents, 42,
 42 |                          fitLinear, false)
 43 |         # fit fast version
 44 |         var ffm = newFieldAwareFactorizationMachine(
 45 |           task = regression, nComponents = nComponents,
 46 |           fitLinear = fitLinear,
 47 |           fitIntercept = false, randomState = 1)
 48 |         var sgd = newSGD(
 49 |           maxIter = 10, verbose = 0, tol = 0
 50 |         )
 51 |         sgd.fit(X, y, ffm)
 52 |         check ffm.intercept == 0.0
 53 | 
 54 | 
 55 |   test "Test warmStart":
 56 |     for fitLinear in [false, true]:
 57 |       for fitIntercept in [false, true]:
 58 |         var
 59 |           X: CSRFieldDataset
 60 |           y: seq[float64]
 61 |           fieldDict: seq[int]
 62 |         createFFMDataset(X, y, fieldDict, n, d, nFields,  nComponents, 42,
 63 |                          fitLinear, fitIntercept)
 64 |         var ffmWarm = newFieldAwareFactorizationMachine(
 65 |           task = regression, nComponents = nComponents,
 66 |           fitLinear = fitLinear, warmStart = true,
 67 |           fitIntercept = fitIntercept, randomState = 1)
 68 |         var sgdWarm = newSGD(
 69 |           maxIter = 1, verbose = 0, tol = 0, shuffle = false
 70 |         )
 71 |         for i in 0..<10:
 72 |           sgdWarm.fit(X, y, ffmWarm)
 73 |         var ffm = newFieldAwareFactorizationMachine(
 74 |           task = regression, nComponents = nComponents,
 75 |           fitLinear = fitLinear, 
 76 |           fitIntercept = fitIntercept, randomState = 1)
 77 | 
 78 |         var sgd = newSGD(
 79 |           maxIter = 10, verbose = 0, tol = 0, shuffle = false
 80 |         )
 81 |         sgd.fit(X, y, ffm)
 82 | 
 83 |         check abs(ffm.intercept-ffmWarm.intercept) < 1e-8
 84 |         checkAlmostEqual(ffm.w, ffmWarm.w, atol = 1e-8)
 85 |         checkAlmostEqual(ffm.P, ffmWarm.P, atol = 1e-8)
 86 | 
 87 |   test "Comparison to naive implementation":
 88 |     for fitLinear in [false, true]:
 89 |       for fitIntercept in [false, true]:
 90 |         var
 91 |           X: CSRFieldDataset
 92 |           y: seq[float64]
 93 |           XMat = zeros([n, d])
 94 |           fieldDict: seq[int]
 95 |         createFFMDataset(X, y, fieldDict, n, d, nFields, nComponents, 42,
 96 |                          fitLinear, fitIntercept, threshold=0.3)
 97 |         for i in 0..<n:
 98 |           for (j, val) in X.getRow(i):
 99 |             XMat[i, j] = val
100 |         # fit slow version
101 |         var ffmSlow = newFFMSlow(
102 |           task = regression, nComponents = nComponents, fitLinear = fitLinear,
103 |           fitIntercept = fitIntercept, randomState = 1)
104 |         var sgdSlow = newSGDSlow(maxIter = 5, tol = 0, shuffle=false)
105 |         sgdSlow.fit(XMat, fieldDict, y, ffmSlow)
106 |         
107 |         # fit fast version
108 |         var ffm = newFieldAwareFactorizationMachine(
109 |           task = regression, nComponents = nComponents, fitLinear = fitLinear,
110 |           fitIntercept = fitIntercept, randomState = 1)
111 |         var sgd = newSGD( maxIter = 5, verbose = 0, tol = 0, shuffle=false)
112 |         sgd.fit(X, y, ffm)
113 |         check abs(ffm.intercept-ffmSlow.intercept) < 1e-7
114 |         checkAlmostEqual(ffm.w, ffmSlow.w)
115 |         checkAlmostEqual(ffm.P, ffmSlow.P)
116 | 
117 |   test "Test score":
118 |     for fitLinear in [true, false]:
119 |       for fitIntercept in [true, false]:
120 |         var
121 |           X: CSRFieldDataset
122 |           y: seq[float64]
123 |           fieldDict: seq[int]
124 |         createFFMDataset(X, y, fieldDict, n, d, nFields, nComponents, 42,
125 |                          fitLinear, fitIntercept)
126 | 
127 |         var ffm = newFieldAwareFactorizationMachine(
128 |           task = regression, nComponents = nComponents, fitLinear = fitLinear,
129 |           fitIntercept = fitIntercept, randomState = 1)
130 |         var sgd = newSGD(
131 |           maxIter = 20, verbose = 0, tol = 0,
132 |           alpha0 = 1e-9, alpha = 1e-9, beta = 1e-9,
133 |         )
134 |         ffm.init(X)
135 |         let scoreBefore = ffm.score(X, y)
136 |         sgd.fit(X, y, ffm)
137 |         check ffm.score(X, y) < scoreBefore
138 | 
139 | 
140 |   test "Test regularization":
141 |     for fitLinear in [true, false]:
142 |       for fitIntercept in [true, false]:
143 |         var
144 |           X: CSRFieldDataset
145 |           y: seq[float64]
146 |           fieldDict: seq[int]
147 |         createFFMDataset(X, y, fieldDict, n, d, nFields, nComponents, 42,
148 |                          fitLinear, fitIntercept, scale=1.0)
149 | 
150 |         var ffmWeakReg = newFieldAwareFactorizationMachine(
151 |           task = regression, nComponents = nComponents, fitLinear = fitLinear,
152 |           warmStart = false, fitIntercept = fitIntercept, randomState = 1)
153 |         var sgd = newSGD(
154 |           maxIter = 200, verbose = 0, tol = 0,
155 |           alpha0 = 1e-9, alpha = 1e-9, beta = 1e-9,
156 |         )
157 |         sgd.fit(X, y, ffmWeakReg)
158 |             
159 |         var ffmStrongReg = newFieldAwareFactorizationMachine(
160 |           task = regression, nComponents = nComponents, fitLinear = fitLinear,
161 |           warmStart = false, fitIntercept = fitIntercept, randomState = 1)
162 |         var sgdStrong = newSGD(
163 |           maxIter = 200, verbose = 0, tol = 0,
164 |           alpha0 = 1000000, alpha = 1000000, beta = 1000000,
165 |         )
166 |         sgdStrong.fit(X, y, ffmStrongReg)
167 | 
168 |         var normWeak = norm(ffmWeakReg.P, 2) + norm(ffmWeakReg.w, 2)
169 |         normWeak += ffmWeakReg.intercept * ffmWeakReg.intercept
170 |         var normStrong = norm(ffmStrongReg.P, 2) + norm(ffmStrongReg.w, 2)
171 |         normStrong = ffmStrongReg.intercept * ffmStrongReg.intercept
172 |         check normWeak >= normStrong


--------------------------------------------------------------------------------
/tests/test_squaredl12.nim:
--------------------------------------------------------------------------------
 1 | import unittest, sequtils
 2 | import nimfm/regularizer/regularizers
 3 | import regularizer/squaredl12_slow, random
 4 | 
 5 | 
 6 | suite "Test proximal operator of SquaredL12":
 7 |   let
 8 |     d = 100
 9 | 
10 |   test "Test proximal operation":
11 |     var
12 |       reg = newSquaredL12()
13 |     
14 |     reg.initSGD(nFeatures = d, nComponents=4, degree=2)
15 |     var q = newSeqWith(d, 0.0)
16 |     for i in 0..<1000:
17 |       for j in 0..<d:
18 |         q[j] = float(rand(400))/100-2.0
19 |       for lam in [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 3, 4]:
20 |         var p1 = q
21 |         var candidates = @[0,1,2,3,4,5]
22 |         proxSquaredL12(p1, lam, candidates)
23 |         var p2 = q
24 |         proxSquaredL12Slow(p2, lam, 2)
25 |         for j in 0..<6:
26 |           check abs(p1[j] - p2[j]) < 1e-10


--------------------------------------------------------------------------------
/tests/utils.nim:
--------------------------------------------------------------------------------
  1 | from nimfm/model/factorization_machine import FitLowerKind
  2 | import nimfm/tensor/tensor, nimfm/dataset, nimfm/model/fm_base
  3 | import sequtils, random, unittest
  4 | import model/fm_slow, model/ffm_slow
  5 | 
  6 | 
  7 | proc createFMDataset*(X: var CSCDataset, y: var seq[float64],
  8 |                       n, d, degree, nComponents, randomstate: int,
  9 |                       fitLower: FitLowerKind = explicit,
 10 |                       fitLinear = true, fitIntercept = true,
 11 |                       scale=1.0, threshold=0.0) =
 12 |   var fm = newFMSlow(
 13 |     task = regression, fitLinear = fitLinear, degree = degree,
 14 |     nComponents = nComponents, fitLower = fitLower,
 15 |     fitIntercept = fitIntercept, randomState = randomState,
 16 |     scale=scale)
 17 | 
 18 |   var data: seq[seq[float64]] = newSeqWith(n, newSeqWith(d, 0.0))
 19 |   for i in 0..<n:
 20 |     for j in 0..<d:
 21 |       data[i][j] = rand(2.0) - 1.0
 22 |       if abs(data[i][j]) < threshold :
 23 |         data[i][j] = 0
 24 |   X = toCSCDataset(data)
 25 |   fm.init(toMatrix(data))
 26 |   y = fm.decisionFunction(toMatrix(data))
 27 | 
 28 | 
 29 | proc createFMDataset*(X: var CSRDataset, y: var seq[float64],
 30 |                       n, d, degree, nComponents, randomstate: int,
 31 |                       fitLower: FitLowerKind = explicit,
 32 |                       fitLinear = true, fitIntercept = true,
 33 |                       scale=1.0, threshold=0.0) =
 34 |   var fm = newFMSlow(
 35 |     task = regression, fitLinear = fitLinear, degree = degree,
 36 |     nComponents = nComponents, fitLower = fitLower,
 37 |     fitIntercept = fitIntercept, randomState = randomState,
 38 |     scale=scale)
 39 | 
 40 |   var data: seq[seq[float64]] = newSeqWith(n, newSeqWith(d, 0.0))
 41 |   for i in 0..<n:
 42 |     for j in 0..<d:
 43 |       data[i][j] = rand(2.0) - 1.0
 44 |       if abs(data[i][j]) < threshold: data[i][j] = 0.0
 45 |   X = toCSRDataset(data)
 46 |   fm.init(toMatrix(data))
 47 |   y = fm.decisionFunction(toMatrix(data))
 48 | 
 49 | 
 50 | proc createFFMDataset*(X: var CSRFieldDataset, y: var seq[float64],
 51 |                        fieldDict: var seq[int],
 52 |                        n, d, nFields, nComponents, randomstate: int,
 53 |                        fitLinear = true, fitIntercept = true,
 54 |                        scale=1.0, threshold=0.0) =
 55 |   var ffm = newFFMSlow(
 56 |     task = regression, fitLinear = fitLinear, nComponents = nComponents, 
 57 |     fitIntercept = fitIntercept, randomState = randomState,
 58 |     scale=scale)
 59 | 
 60 |   var data: seq[seq[float64]] = newSeqWith(n, newSeqWith(d, 0.0))
 61 |   for i in 0..<n:
 62 |     for j in 0..<d:
 63 |       data[i][j] = rand(2.0) - 1.0
 64 |       if abs(data[i][j]) < threshold: data[i][j] = 0.0
 65 |   let tmp = toCSRDataset(data)
 66 |   fieldDict.setLen(d)
 67 |   for j in 0..<d:
 68 |     fieldDict[j] = j div (d div nFields)
 69 | 
 70 |   var fields: seq[int] = newSeqWith(tmp.nnz, 0)
 71 |   for ii in 0..<tmp.nnz:
 72 |     let j = tmp.data.indices[ii]
 73 |     fields[ii] = fieldDict[j]
 74 | 
 75 |   ffm.init(toMatrix(data), fieldDict)
 76 |   y = ffm.decisionFunction(toMatrix(data), fieldDict)
 77 |   X = newCSRFieldDataset(data=tmp.data.data, indices=tmp.data.indices,
 78 |                          indptr=tmp.data.indptr, fields=fields,
 79 |                          nSamples=n, nFeatures=d, nFields=nFields)
 80 | 
 81 | 
 82 | proc checkAlmostEqual*(actual, desired: Tensor, rtol = 1e-6, atol = 1e-9) =
 83 |   check actual.shape == desired.shape
 84 |   if actual.shape == desired.shape:
 85 |     for i in 0..<actual.shape[0]:
 86 |       for j in 0..<actual.shape[1]:
 87 |         for k in 0..<actual.shape[2]:
 88 |           let diff = abs(actual[i, j, k] - desired[i, j, k])
 89 |           check diff <= (atol + abs(desired[i, j, k])*rtol)
 90 | 
 91 | 
 92 | proc checkAlmostEqual*(actual, desired: Matrix, rtol = 1e-6, atol = 1e-9) =
 93 |   check actual.shape == desired.shape
 94 |   if actual.shape == desired.shape:
 95 |     for i in 0..<actual.shape[0]:
 96 |       for j in 0..<actual.shape[1]:
 97 |         let diff = abs(actual[i, j] - desired[i, j])
 98 |         check diff <= (atol + abs(desired[i, j])*rtol)
 99 | 
100 | 
101 | proc checkAlmostEqual*(actual, desired: Vector, rtol = 1e-6, atol = 1e-9) =
102 |   check actual.shape == desired.shape
103 |   if actual.shape == desired.shape:
104 |     for i in 0..<actual.shape[0]:
105 |       check abs(actual[i] - desired[i]) <= (atol + abs(desired[i])*rtol)


--------------------------------------------------------------------------------