├── .formatter.exs
├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── README.md
├── benchmarks
    └── knn.exs
├── config
    └── config.exs
├── images
    ├── scholar.png
    └── scholar_simplified.png
├── lib
    └── scholar
    │   ├── cluster
    │       ├── affinity_propagation.ex
    │       ├── dbscan.ex
    │       ├── gmm.ex
    │       ├── hierarchical.ex
    │       ├── k_means.ex
    │       └── optics.ex
    │   ├── covariance
    │       ├── ledoit_wolf.ex
    │       ├── shrunk_covariance.ex
    │       └── utils.ex
    │   ├── cross_decomposition
    │       └── pls_svd.ex
    │   ├── decomposition
    │       ├── pca.ex
    │       ├── truncated_svd.ex
    │       └── utils.ex
    │   ├── feature_extraction
    │       └── count_vectorizer.ex
    │   ├── impute
    │       ├── knn_imputter.ex
    │       └── simple_imputer.ex
    │   ├── integrate
    │       └── integrate.ex
    │   ├── interpolation
    │       ├── bezier_spline.ex
    │       ├── cubic_spline.ex
    │       └── linear.ex
    │   ├── linear
    │       ├── bayesian_ridge_regression.ex
    │       ├── isotonic_regression.ex
    │       ├── linear_helpers.ex
    │       ├── linear_regression.ex
    │       ├── logistic_regression.ex
    │       ├── polynomial_regression.ex
    │       ├── ridge_regression.ex
    │       └── svm.ex
    │   ├── manifold
    │       ├── mds.ex
    │       ├── trimap.ex
    │       └── tsne.ex
    │   ├── metrics
    │       ├── classification.ex
    │       ├── clustering.ex
    │       ├── distance.ex
    │       ├── neighbors.ex
    │       ├── ranking.ex
    │       ├── regression.ex
    │       └── similarity.ex
    │   ├── model_selection.ex
    │   ├── naive_bayes
    │       ├── bernoulli.ex
    │       ├── complement.ex
    │       ├── gaussian.ex
    │       └── multinomial.ex
    │   ├── neighbors
    │       ├── brute_knn.ex
    │       ├── kd_tree.ex
    │       ├── knn_classifier.ex
    │       ├── knn_regressor.ex
    │       ├── large_vis.ex
    │       ├── nn_descent.ex
    │       ├── random_projection_forest.ex
    │       ├── rnn_classifier.ex
    │       ├── rnn_regressor.ex
    │       └── utils.ex
    │   ├── options.ex
    │   ├── preprocessing.ex
    │   ├── preprocessing
    │       ├── binarizer.ex
    │       ├── max_abs_scaler.ex
    │       ├── min_max_scaler.ex
    │       ├── normalizer.ex
    │       ├── one_hot_encoder.ex
    │       ├── ordinal_encoder.ex
    │       ├── robust_scaler.ex
    │       └── standard_scaler.ex
    │   ├── shared.ex
    │   └── stats.ex
├── mix.exs
├── mix.lock
├── notebooks
    ├── cv_gradient_boosting_tree.livemd
    ├── files
    │   ├── knn_gyms.png
    │   ├── mammoth.png
    │   ├── rbf_and_kdtree.png
    │   └── rpKNN.png
    ├── hierarchical_clustering.livemd
    ├── k_means.livemd
    ├── k_nearest_neighbors.livemd
    ├── linear_regression.livemd
    ├── manifold_learning.livemd
    ├── mds.livemd
    └── nearest_neighbors.livemd
└── test
    ├── data
        └── pima.csv
    ├── scholar
        ├── cluster
        │   ├── affinity_propagation_test.exs
        │   ├── dbscan_test.exs
        │   ├── gmm_test.exs
        │   ├── hierarchical_test.exs
        │   └── k_means_test.exs
        ├── covariance
        │   ├── ledoit_wolf_test.exs
        │   └── shrunk_covariance_test.exs
        ├── cross_decomposition
        │   └── pls_svd_test.exs
        ├── decomposition
        │   ├── pca_test.exs
        │   └── truncated_svd_test.exs
        ├── feature_extraction
        │   └── count_vectorizer_test.exs
        ├── impute
        │   ├── knn_imputter_test.exs
        │   └── simple_imputer_test.exs
        ├── integrate
        │   └── integrate_test.exs
        ├── interpolation
        │   ├── bezier_spline_test.exs
        │   ├── cubic_spline_test.exs
        │   └── linear_test.exs
        ├── linear
        │   ├── bayesian_ridge_regression_test.exs
        │   ├── isotonic_regression_test.exs
        │   ├── linear_regression_test.exs
        │   ├── logistic_regression_test.exs
        │   ├── polynomial_regression_test.exs
        │   ├── ridge_regression_test.exs
        │   └── svm_test.exs
        ├── manifold
        │   ├── mds_test.exs
        │   ├── trimap_test.exs
        │   └── tsne_test.exs
        ├── metrics
        │   ├── classification_test.exs
        │   ├── clustering_test.exs
        │   ├── distance_test.exs
        │   ├── neighbors_test.exs
        │   ├── ranking_test.exs
        │   ├── regression_test.exs
        │   └── similarity_test.exs
        ├── model_selection_test.exs
        ├── naive_bayes
        │   ├── bernoulli_test.exs
        │   ├── complement_test.exs
        │   ├── gaussian_test.exs
        │   └── multinomial_test.exs
        ├── neighbors
        │   ├── brute_knn_test.exs
        │   ├── kd_tree_test.exs
        │   ├── knn_classifier_test.exs
        │   ├── knn_regressor_test.exs
        │   ├── large_vis_test.exs
        │   ├── nn_descent_test.exs
        │   ├── random_projection_forest_test.exs
        │   ├── rnn_classifier_test.exs
        │   └── rnn_regressor_test.exs
        ├── preprocessing
        │   ├── binarizer_test.exs
        │   ├── max_abs_scaler_test.exs
        │   ├── min_max_scaler_test.exs
        │   ├── normalizer_test.exs
        │   ├── one_hot_encoder_test.exs
        │   ├── ordinal_encoder_test.exs
        │   ├── robust_scaler_test.exs
        │   └── standard_scaler_test.exs
        ├── preprocessing_test.exs
        └── stats_test.exs
    ├── support
        ├── diabetes_data_raw.csv
        ├── diabetes_target.csv
        └── scholar_case.ex
    └── test_helper.exs


/.formatter.exs:
--------------------------------------------------------------------------------
1 | # Used by "mix format"
2 | [
3 |   import_deps: [:nx],
4 |   inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"]
5 | ]
6 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | on:
 3 |   pull_request:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | jobs:
 8 |   main:
 9 |     runs-on: ubuntu-latest
10 |     strategy:
11 |       fail-fast: false
12 |       matrix:
13 |         include:
14 |           - elixir: "1.18.1"
15 |             otp: "27.1"
16 |             lint: true
17 |           - elixir: "1.15.8"
18 |             otp: "26.1"
19 |     steps:
20 |       - uses: actions/checkout@v2
21 | 
22 |       - uses: erlef/setup-beam@v1
23 |         with:
24 |           otp-version: ${{matrix.otp}}
25 |           elixir-version: ${{matrix.elixir}}
26 | 
27 |       - uses: actions/cache@v2
28 |         with:
29 |           path: |
30 |             deps
31 |             _build
32 |           key: ${{ runner.os }}-mix-${{matrix.elixir}}-${{matrix.otp}}-${{ hashFiles('**/mix.lock') }}
33 |           restore-keys: |
34 |             ${{ runner.os }}-mix-
35 | 
36 |       - run: mix deps.get
37 | 
38 |       - run: mix format --check-formatted
39 |         if: ${{ matrix.lint }}
40 | 
41 |       - run: mix deps.unlock --check-unused
42 |         if: ${{ matrix.lint }}
43 | 
44 |       - run: mix deps.compile
45 | 
46 |       - run: mix compile --warnings-as-errors
47 |         if: ${{ matrix.lint }}
48 | 
49 |       - run: mix test
50 | 
51 |       - run: USE_EXLA_AT_COMPILE_TIME=1 mix compile --force
52 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # The directory Mix will write compiled artifacts to.
 2 | /_build/
 3 | 
 4 | # If you run "mix test --cover", coverage assets end up here.
 5 | /cover/
 6 | 
 7 | # The directory Mix downloads your dependencies sources to.
 8 | /deps/
 9 | 
10 | # Where third-party dependencies like ExDoc output generated docs.
11 | /doc/
12 | 
13 | # Ignore .fetch files in case you like to edit your project deps locally.
14 | /.fetch
15 | 
16 | # If the VM crashes, it generates a dump, let's ignore it too.
17 | erl_crash.dump
18 | 
19 | # Also ignore archive artifacts (built via "mix archive.build").
20 | *.ez
21 | 
22 | # Ignore package tarball (built via "mix hex.build").
23 | scholar-*.tar
24 | 
25 | # Temporary files, for example, from tests.
26 | /tmp/
27 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | ## v0.4.0 (2025-01-15)
 4 | 
 5 |   * Require Nx `~> 0.9`
 6 |   * Add batching to regression metrics
 7 |   * Add `Scholar.Cluster.OPTICS`
 8 |   * Add `Scholar.Covariance.LedoitWolf`
 9 |   * Add `Scholar.Covariance.ShrunkCovariance`
10 |   * Add `Scholar.CrossDecomposition.PLSSVD`
11 |   * Add `Scholar.Decomposition.TruncatedSVD`
12 |   * Add `Scholar.Impute.KNNImputter`
13 |   * Add `Scholar.NaiveBayes.Bernoulli`
14 |   * Add `Scholar.Preprocessing.Binarizer`
15 |   * Add `Scholar.Preprocessing.RobustScaler`
16 |   * Add `partial_fit/2` and `incremental_fit/2` to PCA
17 |   * Split `RNN` into `Scholar.Neighbors.RadiusNNClassifier` and `Scholar.Neighbors.RadiusNNRegressor`
18 |   * Unify shape checks across all APIs
19 | 
20 | ## v0.3.1 (2024-06-18)
21 | 
22 | ### Enhancements
23 | 
24 |   * Add a notebook about manifold learning
25 |   * Make knn algorithm configurable on Trimap
26 |   * Add `d2_pinball_score` and `d2_absolute_error_score`
27 | 
28 | ## v0.3.0 (2024-05-29)
29 | 
30 | ### Enhancements
31 | 
32 |   * Add LargeVis for visualization of large-scale and high-dimensional data in a low-dimensional (typically 2D or 3D) space
33 |   * Add `Scholar.Neighbors.KDTree` and `Scholar.Neighbors.RandomProjectionForest`
34 |   * Add `Scholar.Metrics.Neighbors`
35 |   * Add `Scholar.Linear.BayesianRidgeRegression`
36 |   * Add `Scholar.Cluster.Hierarchical`
37 |   * Add `Scholar.Manifold.Trimap`
38 |   * Add Mean Pinball Loss function
39 |   * Add Matthews Correlation Coefficient function
40 |   * Add D2 Tweedie Score function
41 |   * Add Mean Tweedie Deviance function
42 |   * Add Discounted Cumulative Gain function
43 |   * Add Precision Recall f-score function
44 |   * Add f-beta score function
45 |   * Add convergence check to AffinityPropagation
46 |   * Default Affinity Propagation preference to `reduce_min` and make it customizable
47 |   * Move preprocessing functionality to their own modules with `fit` and `fit_transform` callbacks
48 | 
49 | ### Breaking changes
50 | 
51 |   * Split `KNearestNeighbors` into `KNNClassifier` and `KNNRegressor` with custom algorithm support
52 | 
53 | ## v0.2.1 (2023-08-30)
54 | 
55 | ### Enhancements
56 | 
57 |   * Remove `VegaLite.Data` in favour of future use of `Tucan`
58 |   * Do not use EXLA at compile time in `Metrics`
59 | 
60 | ## v0.2.0 (2023-08-29)
61 | 
62 | This version requires Elixir v1.14+.
63 | 
64 | ### Enhancements
65 | 
66 |   * Update notebooks
67 |   * Add support for `:f16` and `:bf16` types in `SVD`
68 |   * Add `Affinity Propagation`
69 |   * Add `t-SNE`
70 |   * Add `Polynomial Regression`
71 |   * Replace seeds with `Random.key`
72 |   * Add 'unrolling loops' option
73 |   * Add support for custom optimizers in `Logistic Regression`
74 |   * Add `Trapezoidal Integration`
75 |   * Add `AUC-ROC`, `AUC`, and `ROC Curve`
76 |   * Add `Simpson rule integration`
77 |   * Unify tests
78 |   * Add `Radius Nearest Neighbors`
79 |   * Add `DBSCAN`
80 |   * Add classification metrics: `Average Precision Score`, `Balanced Accuracy Score`,
81 |   `Cohen Kappa Score`, `Brier Score Loss`, `Zero-One Loss`, `Top-k Accuracy Score`
82 |   * Add regression metrics: `R2 Score`, `MSLE`, `MAPE`, `Maximum Residual Error`
83 |   * Add support for axes in `Confusion Matrix`
84 |   * Add support for broadcasting in `Metrics.Distances`
85 |   * Update CI
86 |   * Add `Gaussian Mixtures`
87 |   * Add Model selection functionalities: `K-fold`, `K-fold Cross Validation`, `Grid Search`
88 |   * Change structure of metrics in `Scholar`
89 |   * Add a guide with `Cross-Validation` and `Grid Search`
90 | 
91 | ## v0.1.0 (2023-03-29)
92 | 
93 | First release.
94 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <p align="center">
 2 |   <img src="https://github.com/elixir-nx/scholar/raw/main/images/scholar.png" alt="Scholar" width="400">
 3 |   <br />
 4 |   <a href="https://hexdocs.pm/scholar"><img src="http://img.shields.io/badge/hex.pm-docs-green.svg?style=flat" title="Documentation" /></a>
 5 |   <a href="https://hex.pm/packages/scholar"><img src="https://img.shields.io/hexpm/v/scholar.svg" title="Package" /></a>
 6 | </p>
 7 | 
 8 | <br />
 9 | 
10 | Traditional machine learning tools built on top of Nx. Scholar implements
11 | several algorithms for classification, regression, clustering, dimensionality
12 | reduction, metrics, and preprocessing.
13 | 
14 | For deep learning, see [Axon](https://github.com/elixir-nx/axon).
15 | For decision trees/forests, see [EXGBoost](https://github.com/acalejos/exgboost).
16 | 
17 | ## Installation
18 | 
19 | ### Mix projects
20 | 
21 | Add to your `mix.exs`:
22 | 
23 | ```elixir
24 | def deps do
25 |   [
26 |     {:scholar, "~> 0.3.0"}
27 |   ]
28 | end
29 | ```
30 | 
31 | Besides Scholar, you will most likely want to use an existing Nx compiler/backend,
32 | such as EXLA:
33 | 
34 | ```elixir
35 | def deps do
36 |   [
37 |     {:scholar, "~> 0.3.0"},
38 |     {:exla, ">= 0.0.0"}
39 |   ]
40 | end
41 | ```
42 | 
43 | And then in your `config/config.exs` file:
44 | 
45 | ```elixir
46 | import Config
47 | config :nx, :default_backend, EXLA.Backend
48 | # Client can also be set to :cuda / :rocm
49 | config :nx, :default_defn_options, [compiler: EXLA, client: :host]
50 | ```
51 | 
52 | > #### JIT required! {: .warning}
53 | >
54 | > It is important you set the `default_defn_options` as shown in the snippet above,
55 | > as many algorithms in Scholar use loops which are much more memory efficient when
56 | > JIT compiled.
57 | >
58 | > If for some reason you cannot set a default `defn` compiler, you can explicitly
59 | > JIT any function, for example: `EXLA.jit(&Scholar.Cluster.AffinityPropagation.fit/1)`.
60 | 
61 | ### Notebooks
62 | 
63 | To use Scholar inside code notebooks, run:
64 | 
65 | ```elixir
66 | Mix.install([
67 |   {:scholar, "~> 0.3.0"},
68 |   {:exla, ">= 0.0.0"}
69 | ])
70 | 
71 | Nx.global_default_backend(EXLA.Backend)
72 | # Client can also be set to :cuda / :rocm
73 | Nx.Defn.global_default_options(compiler: EXLA, client: :host)
74 | ```
75 | 
76 | > #### JIT required! {: .warning}
77 | >
78 | > It is important you set the `Nx.Defn.global_default_options/1` as shown in the snippet
79 | > above, as many algorithms in Scholar use loops which are much more memory efficient
80 | > when JIT compiled.
81 | >
82 | > If for some reason you cannot set a default `defn` compiler, you can explicitly
83 | > JIT any function, for example: `EXLA.jit(&Scholar.Cluster.AffinityPropagation.fit/1)`.
84 | 
85 | ## License
86 | 
87 | Copyright (c) 2022 The Machine Learning Working Group of the Erlang Ecosystem Foundation
88 | 
89 | Licensed under the Apache License, Version 2.0 (the "License");
90 | you may not use this file except in compliance with the License.
91 | You may obtain a copy of the License at [http://www.apache.org/licenses/LICENSE-2.0](http://www.apache.org/licenses/LICENSE-2.0)
92 | 
93 | Unless required by applicable law or agreed to in writing, software
94 | distributed under the License is distributed on an "AS IS" BASIS,
95 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
96 | See the License for the specific language governing permissions and
97 | limitations under the License.
98 | 


--------------------------------------------------------------------------------
/benchmarks/knn.exs:
--------------------------------------------------------------------------------
 1 | # mix run benchmarks/knn.exs
 2 | Nx.global_default_backend(EXLA.Backend)
 3 | Nx.Defn.global_default_options(compiler: EXLA)
 4 | 
 5 | key = Nx.Random.key(System.os_time())
 6 | 
 7 | inputs_knn = %{
 8 |   "100x10" => elem(Nx.Random.uniform(key, 0, 100, shape: {100, 10}), 0),
 9 |   "1000x10" => elem(Nx.Random.uniform(key, 0, 1000, shape: {1000, 10}), 0),
10 |   "10000x10" => elem(Nx.Random.uniform(key, 0, 10000, shape: {10000, 10}), 0)
11 | }
12 | 
13 | Benchee.run(
14 |   %{
15 |     "kdtree" => fn x ->
16 |       kdtree = Scholar.Neighbors.KDTree.fit(x)
17 |       Scholar.Neighbors.KDTree.predict(kdtree, x, k: 4)
18 |     end,
19 |     "brute force knn" => fn x ->
20 |       model =
21 |         Scholar.Neighbors.KNearestNeighbors.fit(x, Nx.broadcast(1, {Nx.axis_size(x, 0)}),
22 |           num_classes: 2,
23 |           num_neighbors: 4
24 |         )
25 | 
26 |       Scholar.Neighbors.KNearestNeighbors.k_neighbors(model, x)
27 |     end
28 |   },
29 |   time: 10,
30 |   memory_time: 2,
31 |   inputs: inputs_knn
32 | )
33 | 


--------------------------------------------------------------------------------
/config/config.exs:
--------------------------------------------------------------------------------
1 | import Config
2 | 
3 | config :exla, :add_backend_on_inspect, config_env() != :test
4 | 
5 | if System.get_env("USE_EXLA_AT_COMPILE_TIME") do
6 |   config :nx, :default_backend, EXLA.Backend
7 | end
8 | 


--------------------------------------------------------------------------------
/images/scholar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/elixir-nx/scholar/48aa36e95dd14fa51e3ad5e23fc72c3a3eed3877/images/scholar.png


--------------------------------------------------------------------------------
/images/scholar_simplified.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/elixir-nx/scholar/48aa36e95dd14fa51e3ad5e23fc72c3a3eed3877/images/scholar_simplified.png


--------------------------------------------------------------------------------
/lib/scholar/covariance/ledoit_wolf.ex:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Covariance.LedoitWolf do
  2 |   @moduledoc """
  3 |   Ledoit-Wolf is a particular form of shrinkage covariance estimator,
  4 |   where the shrinkage coefficient is computed using O. Ledoit and M. Wolf’s formula.
  5 | 
  6 |   Ledoit and M. Wolf's formula as
  7 |   described in "A Well-Conditioned Estimator for Large-Dimensional
  8 |   Covariance Matrices", Ledoit and Wolf, Journal of Multivariate
  9 |   Analysis, Volume 88, Issue 2, February 2004, pages 365-411.
 10 |   """
 11 |   import Nx.Defn
 12 | 
 13 |   @derive {Nx.Container, containers: [:covariance, :shrinkage, :location]}
 14 |   defstruct [:covariance, :shrinkage, :location]
 15 | 
 16 |   opts_schema = [
 17 |     assume_centered?: [
 18 |       default: false,
 19 |       type: :boolean,
 20 |       doc: """
 21 |         If `true`, data will not be centered before computation.
 22 |         Useful when working with data whose mean is almost, but not exactly
 23 |         zero.
 24 |         If `false`, data will be centered before computation.
 25 |       """
 26 |     ]
 27 |   ]
 28 | 
 29 |   @opts_schema NimbleOptions.new!(opts_schema)
 30 |   @doc """
 31 |   Estimate the shrunk Ledoit-Wolf covariance matrix.
 32 | 
 33 |   ## Options
 34 | 
 35 |   #{NimbleOptions.docs(@opts_schema)}
 36 | 
 37 |   ## Return Values
 38 | 
 39 |     The function returns a struct with the following parameters:
 40 | 
 41 |     * `:covariance` - Tensor of shape `{num_features, num_features}`. Estimated covariance matrix.
 42 | 
 43 |     * `:shrinkage` - Coefficient in the convex combination used for the computation of the shrunken estimate. Range is `[0, 1]`.
 44 | 
 45 |     * `:location` - Tensor of shape `{num_features,}`.
 46 |       Estimated location, i.e. the estimated mean.
 47 | 
 48 |   ## Examples
 49 | 
 50 |       iex> key = Nx.Random.key(0)
 51 |       iex> {x, _new_key} = Nx.Random.multivariate_normal(key, Nx.tensor([0.0, 0.0]), Nx.tensor([[0.4, 0.2], [0.2, 0.8]]), shape: {50}, type: :f32)
 52 |       iex> model = Scholar.Covariance.LedoitWolf.fit(x)
 53 |       iex> model.covariance
 54 |       #Nx.Tensor<
 55 |         f32[2][2]
 56 |         [
 57 |           [0.3557686507701874, 0.17340737581253052],
 58 |           [0.17340737581253052, 1.0300586223602295]
 59 |         ]
 60 |       >
 61 |       iex> model.shrinkage
 62 |       #Nx.Tensor<
 63 |         f32
 64 |         0.15034137666225433
 65 |       >
 66 |       iex> model.location
 67 |       #Nx.Tensor<
 68 |         f32[2]
 69 |         [0.17184630036354065, 0.3276958167552948]
 70 |       >
 71 | 
 72 |       iex> key = Nx.Random.key(0)
 73 |       iex> {x, _new_key} = Nx.Random.multivariate_normal(key, Nx.tensor([0.0, 0.0, 0.0]), Nx.tensor([[3.0, 2.0, 1.0], [1.0, 2.0, 3.0], [1.3, 1.0, 2.2]]), shape: {10}, type: :f32)
 74 |       iex> model = Scholar.Covariance.LedoitWolf.fit(x)
 75 |       iex> model.covariance
 76 |       #Nx.Tensor<
 77 |         f32[3][3]
 78 |         [
 79 |           [2.5945029258728027, 1.5078359842300415, 1.1623677015304565],
 80 |           [1.5078359842300415, 2.106797456741333, 1.1812156438827515],
 81 |           [1.1623677015304565, 1.1812156438827515, 1.4606266021728516]
 82 |         ]
 83 |       >
 84 |       iex> model.shrinkage
 85 |       #Nx.Tensor<
 86 |         f32
 87 |         0.1908363401889801
 88 |       >
 89 |       iex> model.location
 90 |       #Nx.Tensor<
 91 |         f32[3]
 92 |         [1.1228725910186768, 0.5419300198554993, 0.8678852319717407]
 93 |       >
 94 | 
 95 |       iex> key = Nx.Random.key(0)
 96 |       iex> {x, _new_key} = Nx.Random.multivariate_normal(key, Nx.tensor([0.0, 0.0, 0.0]), Nx.tensor([[3.0, 2.0, 1.0], [1.0, 2.0, 3.0], [1.3, 1.0, 2.2]]), shape: {10}, type: :f32)
 97 |       iex> cov = Scholar.Covariance.LedoitWolf.fit(x, assume_centered?: true)
 98 |       iex> cov.covariance
 99 |       #Nx.Tensor<
100 |         f32[3][3]
101 |         [
102 |           [3.8574986457824707, 2.2048025131225586, 2.1504499912261963],
103 |           [2.2048025131225586, 2.4572863578796387, 1.7215262651443481],
104 |           [2.1504499912261963, 1.7215262651443481, 2.154898166656494]
105 |         ]
106 |       >
107 |   """
108 | 
109 |   deftransform fit(x, opts \\ []) do
110 |     fit_n(x, NimbleOptions.validate!(opts, @opts_schema))
111 |   end
112 | 
113 |   defnp fit_n(x, opts) do
114 |     {x, location} = Scholar.Covariance.Utils.center(x, opts[:assume_centered?])
115 | 
116 |     {covariance, shrinkage} =
117 |       ledoit_wolf(x)
118 | 
119 |     %__MODULE__{
120 |       covariance: covariance,
121 |       shrinkage: shrinkage,
122 |       location: location
123 |     }
124 |   end
125 | 
126 |   defnp ledoit_wolf(x) do
127 |     case Nx.shape(x) do
128 |       {_n, 1} ->
129 |         {Nx.mean(x ** 2) |> Nx.reshape({1, 1}), 0.0}
130 | 
131 |       _ ->
132 |         ledoit_wolf_shrinkage(x)
133 |     end
134 |   end
135 | 
136 |   defnp ledoit_wolf_shrinkage(x) do
137 |     case Nx.shape(x) do
138 |       {_, 1} ->
139 |         0
140 | 
141 |       {n} ->
142 |         Nx.reshape(x, {1, n})
143 |         |> ledoit_wolf_shrinkage_complex()
144 | 
145 |       _ ->
146 |         ledoit_wolf_shrinkage_complex(x)
147 |     end
148 |   end
149 | 
150 |   defnp ledoit_wolf_shrinkage_complex(x) do
151 |     {num_samples, num_features} = Nx.shape(x)
152 |     emp_cov = Scholar.Covariance.Utils.empirical_covariance(x)
153 | 
154 |     emp_cov_trace = Scholar.Covariance.Utils.trace(emp_cov)
155 |     mu = Nx.sum(emp_cov_trace) / num_features
156 | 
157 |     flatten_delta = Nx.flatten(emp_cov)
158 | 
159 |     indices =
160 |       Nx.shape(flatten_delta)
161 |       |> Nx.iota()
162 | 
163 |     subtract = Nx.select(Nx.remainder(indices, num_features + 1) == 0, mu, 0)
164 | 
165 |     delta =
166 |       (flatten_delta - subtract)
167 |       |> Nx.pow(2)
168 |       |> Nx.sum()
169 | 
170 |     delta = delta / num_features
171 | 
172 |     x2 = Nx.pow(x, 2)
173 | 
174 |     beta =
175 |       (Nx.dot(x2, [0], x2, [0]) / num_samples - emp_cov ** 2)
176 |       |> Nx.sum()
177 |       |> Nx.divide(num_features * num_samples)
178 | 
179 |     beta = Nx.min(beta, delta)
180 |     shrinkage = beta / delta
181 | 
182 |     shrunk_cov = (1.0 - shrinkage) * emp_cov
183 |     mask = Nx.iota(Nx.shape(shrunk_cov))
184 |     selector = Nx.remainder(mask, num_features + 1) == 0
185 |     shrunk_cov = Nx.select(selector, shrunk_cov + shrinkage * mu, shrunk_cov)
186 | 
187 |     {shrunk_cov, shrinkage}
188 |   end
189 | end
190 | 


--------------------------------------------------------------------------------
/lib/scholar/covariance/shrunk_covariance.ex:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Covariance.ShrunkCovariance do
  2 |   @moduledoc """
  3 |   Covariance estimator with shrinkage.
  4 |   """
  5 |   import Nx.Defn
  6 | 
  7 |   @derive {Nx.Container, containers: [:covariance, :location]}
  8 |   defstruct [:covariance, :location]
  9 | 
 10 |   opts_schema = [
 11 |     assume_centered?: [
 12 |       default: false,
 13 |       type: :boolean,
 14 |       doc: """
 15 |         If `true`, data will not be centered before computation.
 16 |         Useful when working with data whose mean is almost, but not exactly
 17 |         zero.
 18 |         If `false`, data will be centered before computation.
 19 |       """
 20 |     ],
 21 |     shrinkage: [
 22 |       default: 0.1,
 23 |       type: :float,
 24 |       doc: "Coefficient in the convex combination used for the computation
 25 |         of the shrunk estimate. Range is [0, 1]."
 26 |     ]
 27 |   ]
 28 | 
 29 |   @opts_schema NimbleOptions.new!(opts_schema)
 30 |   @doc """
 31 |   Fit the shrunk covariance model to `x`.
 32 | 
 33 |   ## Options
 34 | 
 35 |   #{NimbleOptions.docs(@opts_schema)}
 36 | 
 37 |   ## Return Values
 38 | 
 39 |   The function returns a struct with the following parameters:
 40 | 
 41 |     * `:covariance` - Tensor of shape `{num_features, num_features}`. Estimated covariance matrix.
 42 |     * `:location` - Tensor of shape `{num_features,}`.
 43 |       Estimated location, i.e. the estimated mean.
 44 | 
 45 |   ## Examples
 46 | 
 47 |       iex> key = Nx.Random.key(0)
 48 |       iex> {x, _new_key} = Nx.Random.multivariate_normal(key, Nx.tensor([0.0, 0.0]), Nx.tensor([[0.8, 0.3], [0.2, 0.4]]), shape: {10}, type: :f32)
 49 |       iex> model = Scholar.Covariance.ShrunkCovariance.fit(x)
 50 |       iex> model.covariance
 51 |       #Nx.Tensor<
 52 |         f32[2][2]
 53 |         [
 54 |           [0.7721845507621765, 0.19141492247581482],
 55 |           [0.19141492247581482, 0.33952537178993225]
 56 |         ]
 57 |       >
 58 |       iex> model.location
 59 |       #Nx.Tensor<
 60 |         f32[2]
 61 |         [0.18202415108680725, -0.09216632694005966]
 62 |       >
 63 |       iex> key = Nx.Random.key(0)
 64 |       iex> {x, _new_key} = Nx.Random.multivariate_normal(key, Nx.tensor([0.0, 0.0]), Nx.tensor([[0.8, 0.3], [0.2, 0.4]]), shape: {10}, type: :f32)
 65 |       iex> model = Scholar.Covariance.ShrunkCovariance.fit(x, shrinkage: 0.4)
 66 |       iex> model.covariance
 67 |       #Nx.Tensor<
 68 |         f32[2][2]
 69 |         [
 70 |           [0.7000747323036194, 0.1276099532842636],
 71 |           [0.1276099532842636, 0.41163527965545654]
 72 |         ]
 73 |       >
 74 |       iex> model.location
 75 |       #Nx.Tensor<
 76 |         f32[2]
 77 |         [0.18202415108680725, -0.09216632694005966]
 78 |       >
 79 |   """
 80 | 
 81 |   deftransform fit(x, opts \\ []) do
 82 |     fit_n(x, NimbleOptions.validate!(opts, @opts_schema))
 83 |   end
 84 | 
 85 |   defnp fit_n(x, opts) do
 86 |     shrinkage = opts[:shrinkage]
 87 | 
 88 |     if shrinkage < 0 or shrinkage > 1 do
 89 |       raise ArgumentError,
 90 |             """
 91 |             expected :shrinkage option to be in [0, 1] range, \
 92 |             got shrinkage: #{inspect(Nx.shape(x))}\
 93 |             """
 94 |     end
 95 | 
 96 |     {x, location} = Scholar.Covariance.Utils.center(x, opts[:assume_centered?])
 97 | 
 98 |     covariance =
 99 |       Scholar.Covariance.Utils.empirical_covariance(x)
100 |       |> shrunk_covariance(shrinkage)
101 | 
102 |     %__MODULE__{
103 |       covariance: covariance,
104 |       location: location
105 |     }
106 |   end
107 | 
108 |   defnp shrunk_covariance(emp_cov, shrinkage) do
109 |     num_features = Nx.axis_size(emp_cov, 1)
110 |     shrunk_cov = (1.0 - shrinkage) * emp_cov
111 |     emp_cov_trace = Scholar.Covariance.Utils.trace(emp_cov)
112 |     mu = Nx.sum(emp_cov_trace) / num_features
113 | 
114 |     mask = Nx.iota(Nx.shape(shrunk_cov))
115 |     selector = Nx.remainder(mask, num_features + 1) == 0
116 | 
117 |     shrunk_cov + shrinkage * mu * selector
118 |   end
119 | end
120 | 


--------------------------------------------------------------------------------
/lib/scholar/covariance/utils.ex:
--------------------------------------------------------------------------------
 1 | defmodule Scholar.Covariance.Utils do
 2 |   @moduledoc false
 3 |   import Nx.Defn
 4 |   require Nx
 5 | 
 6 |   defn center(x, assume_centered? \\ false) do
 7 |     x =
 8 |       case Nx.shape(x) do
 9 |         {_} -> Nx.new_axis(x, 1)
10 |         _ -> x
11 |       end
12 | 
13 |     location =
14 |       if assume_centered? do
15 |         0
16 |       else
17 |         Nx.mean(x, axes: [0])
18 |       end
19 | 
20 |     {x - location, location}
21 |   end
22 | 
23 |   defn empirical_covariance(x) do
24 |     n = Nx.axis_size(x, 0)
25 | 
26 |     covariance = Nx.dot(x, [0], x, [0]) / n
27 | 
28 |     case Nx.shape(covariance) do
29 |       {} -> Nx.reshape(covariance, {1, 1})
30 |       _ -> covariance
31 |     end
32 |   end
33 | 
34 |   defn trace(x) do
35 |     x
36 |     |> Nx.take_diagonal()
37 |     |> Nx.sum()
38 |   end
39 | end
40 | 


--------------------------------------------------------------------------------
/lib/scholar/decomposition/utils.ex:
--------------------------------------------------------------------------------
 1 | defmodule Scholar.Decomposition.Utils do
 2 |   @moduledoc false
 3 |   import Nx.Defn
 4 |   require Nx
 5 | 
 6 |   defn flip_svd(u, v, u_based \\ true) do
 7 |     base =
 8 |       if u_based do
 9 |         u
10 |       else
11 |         Nx.transpose(v)
12 |       end
13 | 
14 |     max_abs_cols_idx = base |> Nx.abs() |> Nx.argmax(axis: 0, keep_axis: true)
15 |     signs = base |> Nx.take_along_axis(max_abs_cols_idx, axis: 0) |> Nx.sign() |> Nx.squeeze()
16 |     u = u * signs
17 |     v = v * Nx.new_axis(signs, -1)
18 |     {u, v}
19 |   end
20 | end
21 | 


--------------------------------------------------------------------------------
/lib/scholar/feature_extraction/count_vectorizer.ex:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.FeatureExtraction.CountVectorizer do
  2 |   @moduledoc """
  3 |   A `CountVectorizer` converts already indexed collection of text documents to a matrix of token counts.
  4 |   """
  5 |   import Nx.Defn
  6 | 
  7 |   opts_schema = [
  8 |     max_token_id: [
  9 |       type: :pos_integer,
 10 |       required: true,
 11 |       doc: ~S"""
 12 |       Maximum token id in the input tensor.
 13 |       """
 14 |     ]
 15 |   ]
 16 | 
 17 |   @opts_schema NimbleOptions.new!(opts_schema)
 18 | 
 19 |   @doc """
 20 |   Generates a count matrix where each row corresponds to a document in the input corpus,
 21 |   and each column corresponds to a unique token in the vocabulary of the corpus.
 22 | 
 23 |   The input must be a 2D tensor where:
 24 | 
 25 |   * Each row represents a document.
 26 |   * Each document has integer values representing tokens.
 27 | 
 28 |   The same number represents the same token in the vocabulary. Tokens should start from 0
 29 |   and be consecutive. Negative values are ignored, making them suitable for padding.
 30 | 
 31 |   ## Options
 32 | 
 33 |   #{NimbleOptions.docs(@opts_schema)}
 34 | 
 35 |   ## Examples
 36 | 
 37 |       iex> t = Nx.tensor([[0, 1, 2], [1, 3, 4]])
 38 |       iex> Scholar.FeatureExtraction.CountVectorizer.fit_transform(t, max_token_id: Scholar.FeatureExtraction.CountVectorizer.max_token_id(t))
 39 |       Nx.tensor([
 40 |           [1, 1, 1, 0, 0],
 41 |           [0, 1, 0, 1, 1]
 42 |         ])
 43 | 
 44 |   With padding:
 45 | 
 46 |       iex> t = Nx.tensor([[0, 1, -1], [1, 3, 4]])
 47 |       iex> Scholar.FeatureExtraction.CountVectorizer.fit_transform(t, max_token_id: Scholar.FeatureExtraction.CountVectorizer.max_token_id(t))
 48 |       Nx.tensor([
 49 |             [1, 1, 0, 0, 0],
 50 |             [0, 1, 0, 1, 1]
 51 |         ])
 52 |   """
 53 |   deftransform fit_transform(tensor, opts \\ []) do
 54 |     fit_transform_n(tensor, NimbleOptions.validate!(opts, @opts_schema))
 55 |   end
 56 | 
 57 |   @doc """
 58 |   Computes the max_token_id option from given tensor.
 59 | 
 60 |   This function cannot be called inside `defn` (and it will raise
 61 |   if you try to do so).
 62 | 
 63 |   ## Examples
 64 | 
 65 |       iex> t = Nx.tensor([[1, -1, 2], [2, 0, 0], [0, 1, -1]])
 66 |       iex> Scholar.FeatureExtraction.CountVectorizer.max_token_id(t)
 67 |       2
 68 |   """
 69 |   def max_token_id(tensor) do
 70 |     tensor |> Nx.reduce_max() |> Nx.to_number()
 71 |   end
 72 | 
 73 |   defnp fit_transform_n(tensor, opts) do
 74 |     check_for_rank(tensor)
 75 |     counts = Nx.broadcast(0, {Nx.axis_size(tensor, 0), opts[:max_token_id] + 1})
 76 | 
 77 |     {_, counts} =
 78 |       while {{i = 0, tensor}, counts}, Nx.less(i, Nx.axis_size(tensor, 0)) do
 79 |         {_, counts} =
 80 |           while {{j = 0, i, tensor}, counts}, Nx.less(j, Nx.axis_size(tensor, 1)) do
 81 |             index = tensor[i][j]
 82 | 
 83 |             counts =
 84 |               if Nx.any(Nx.less(index, 0)),
 85 |                 do: counts,
 86 |                 else: Nx.indexed_add(counts, Nx.stack([i, index]), 1)
 87 | 
 88 |             {{j + 1, i, tensor}, counts}
 89 |           end
 90 | 
 91 |         {{i + 1, tensor}, counts}
 92 |       end
 93 | 
 94 |     counts
 95 |   end
 96 | 
 97 |   defnp check_for_rank(tensor) do
 98 |     if Nx.rank(tensor) != 2 do
 99 |       raise ArgumentError,
100 |             """
101 |             expected tensor to have shape {num_documents, num_tokens}, \
102 |             got tensor with shape: #{inspect(Nx.shape(tensor))}\
103 |             """
104 |     end
105 |   end
106 | end
107 | 


--------------------------------------------------------------------------------
/lib/scholar/linear/linear_helpers.ex:
--------------------------------------------------------------------------------
 1 | defmodule Scholar.Linear.LinearHelpers do
 2 |   require Nx
 3 |   import Nx.Defn
 4 |   import Scholar.Shared
 5 | 
 6 |   @moduledoc false
 7 | 
 8 |   defp valid_column_vector?(y, n_samples) do
 9 |     Nx.shape(y) == {n_samples, 1} and Nx.rank(y) == 2
10 |   end
11 | 
12 |   @doc false
13 |   def flatten_column_vector(y, n_samples) do
14 |     is_column_vector? = valid_column_vector?(y, n_samples)
15 | 
16 |     if is_column_vector? do
17 |       y |> Nx.flatten()
18 |     else
19 |       y
20 |     end
21 |   end
22 | 
23 |   @doc false
24 |   def validate_y_shape(y, n_samples, module_name) do
25 |     y = flatten_column_vector(y, n_samples)
26 |     is_valid_target? = Nx.rank(y) == 1
27 | 
28 |     if not is_valid_target? do
29 |       message =
30 |         "#{inspect(module_name)} expected y to have shape {n_samples}, got tensor with shape: #{inspect(Nx.shape(y))}"
31 | 
32 |       raise ArgumentError, message
33 |     else
34 |       y
35 |     end
36 |   end
37 | 
38 |   @doc false
39 |   def build_sample_weights(x, opts) do
40 |     x_type = to_float_type(x)
41 |     {num_samples, _} = Nx.shape(x)
42 |     default_sample_weights = Nx.broadcast(Nx.as_type(1.0, x_type), {num_samples})
43 |     {sample_weights, _} = Keyword.pop(opts, :sample_weights, default_sample_weights)
44 | 
45 |     # this is required for ridge regression
46 |     sample_weights =
47 |       if Nx.is_tensor(sample_weights),
48 |         do: Nx.as_type(sample_weights, x_type),
49 |         else: Nx.tensor(sample_weights, type: x_type)
50 | 
51 |     sample_weights
52 |   end
53 | 
54 |   @doc false
55 |   defn preprocess_data(x, y, sample_weights, opts) do
56 |     if opts[:sample_weights_flag],
57 |       do:
58 |         {Nx.weighted_mean(x, sample_weights, axes: [0]),
59 |          Nx.weighted_mean(y, sample_weights, axes: [0])},
60 |       else: {Nx.mean(x, axes: [0]), Nx.mean(y, axes: [0])}
61 |   end
62 | 
63 |   @doc false
64 |   defn set_intercept(coeff, x_offset, y_offset, fit_intercept?) do
65 |     if fit_intercept? do
66 |       y_offset - Nx.dot(coeff, [-1], x_offset, [-1])
67 |     else
68 |       Nx.tensor(0.0, type: Nx.type(coeff))
69 |     end
70 |   end
71 | 
72 |   # Implements sample weighting by rescaling inputs and
73 |   # targets by sqrt(sample_weight).
74 |   @doc false
75 |   defn rescale(x, y, sample_weights) do
76 |     factor = Nx.sqrt(sample_weights)
77 | 
78 |     x_scaled =
79 |       case Nx.shape(factor) do
80 |         {} -> factor * x
81 |         _ -> x * Nx.new_axis(factor, -1)
82 |       end
83 | 
84 |     y_scaled =
85 |       case Nx.rank(y) do
86 |         1 -> factor * y
87 |         _ -> y * Nx.new_axis(factor, -1)
88 |       end
89 | 
90 |     {x_scaled, y_scaled}
91 |   end
92 | end
93 | 


--------------------------------------------------------------------------------
/lib/scholar/linear/linear_regression.ex:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Linear.LinearRegression do
  2 |   @moduledoc """
  3 |   Ordinary least squares linear regression.
  4 | 
  5 |   Time complexity of linear regression is $O((K^2) * (K+N))$ where $N$ is the number of samples
  6 |   and $K$ is the number of features.
  7 |   """
  8 |   require Nx
  9 |   import Nx.Defn
 10 |   import Scholar.Shared
 11 |   alias Scholar.Linear.LinearHelpers
 12 | 
 13 |   @derive {Nx.Container, containers: [:coefficients, :intercept]}
 14 |   defstruct [:coefficients, :intercept]
 15 | 
 16 |   opts = [
 17 |     sample_weights: [
 18 |       type: {:custom, Scholar.Options, :weights, []},
 19 |       doc: """
 20 |       The weights for each observation. If not provided,
 21 |       all observations are assigned equal weight.
 22 |       """
 23 |     ],
 24 |     fit_intercept?: [
 25 |       type: :boolean,
 26 |       default: true,
 27 |       doc: """
 28 |       If set to `true`, a model will fit the intercept. Otherwise,
 29 |       the intercept is set to `0.0`. The intercept is an independent term
 30 |       in a linear model. Specifically, it is the expected mean value
 31 |       of targets for a zero-vector on input.
 32 |       """
 33 |     ]
 34 |   ]
 35 | 
 36 |   @opts_schema NimbleOptions.new!(opts)
 37 | 
 38 |   @doc """
 39 |   Fits a linear regression model for sample inputs `x` and
 40 |   sample targets `y`.
 41 | 
 42 |   ## Options
 43 | 
 44 |   #{NimbleOptions.docs(@opts_schema)}
 45 | 
 46 |   ## Return Values
 47 | 
 48 |     The function returns a struct with the following parameters:
 49 | 
 50 |     * `:coefficients` - Estimated coefficients for the linear regression problem.
 51 | 
 52 |     * `:intercept` - Independent term in the linear model.
 53 | 
 54 |   ## Examples
 55 | 
 56 |       iex> x = Nx.tensor([[1.0, 2.0], [3.0, 2.0], [4.0, 7.0]])
 57 |       iex> y = Nx.tensor([4.0, 3.0, -1.0])
 58 |       iex> model = Scholar.Linear.LinearRegression.fit(x, y)
 59 |       iex> model.coefficients
 60 |       #Nx.Tensor<
 61 |         f32[2]
 62 |         [-0.49724647402763367, -0.7010394930839539]
 63 |       >
 64 |       iex> model.intercept
 65 |       #Nx.Tensor<
 66 |         f32
 67 |         5.8964691162109375
 68 |       >
 69 |   """
 70 |   deftransform fit(x, y, opts \\ []) do
 71 |     {n_samples, _} = Nx.shape(x)
 72 |     y = LinearHelpers.flatten_column_vector(y, n_samples)
 73 |     opts = NimbleOptions.validate!(opts, @opts_schema)
 74 | 
 75 |     opts =
 76 |       [
 77 |         sample_weights_flag: opts[:sample_weights] != nil
 78 |       ] ++
 79 |         opts
 80 | 
 81 |     sample_weights = LinearHelpers.build_sample_weights(x, opts)
 82 |     {n_samples, _} = Nx.shape(x)
 83 |     y = LinearHelpers.flatten_column_vector(y, n_samples)
 84 | 
 85 |     fit_n(x, y, sample_weights, opts)
 86 |   end
 87 | 
 88 |   defnp fit_n(a, b, sample_weights, opts) do
 89 |     a = to_float(a)
 90 |     b = to_float(b)
 91 | 
 92 |     {a_offset, b_offset} =
 93 |       if opts[:fit_intercept?] do
 94 |         LinearHelpers.preprocess_data(a, b, sample_weights, opts)
 95 |       else
 96 |         a_offset_shape = Nx.axis_size(a, 1)
 97 |         b_reshaped = if Nx.rank(b) > 1, do: b, else: Nx.reshape(b, {:auto, 1})
 98 |         b_offset_shape = Nx.axis_size(b_reshaped, 1)
 99 | 
100 |         {Nx.broadcast(Nx.tensor(0.0, type: Nx.type(a)), {a_offset_shape}),
101 |          Nx.broadcast(Nx.tensor(0.0, type: Nx.type(b)), {b_offset_shape})}
102 |       end
103 | 
104 |     {a, b} = {a - a_offset, b - b_offset}
105 | 
106 |     {a, b} =
107 |       if opts[:sample_weights_flag] do
108 |         LinearHelpers.rescale(a, b, sample_weights)
109 |       else
110 |         {a, b}
111 |       end
112 | 
113 |     {coeff, intercept} = lstsq(a, b, a_offset, b_offset, opts[:fit_intercept?])
114 |     %__MODULE__{coefficients: coeff, intercept: intercept}
115 |   end
116 | 
117 |   @doc """
118 |   Makes predictions with the given `model` on input `x`.
119 | 
120 |   Output predictions have shape `{n_samples}` when train target is shaped either `{n_samples}` or `{n_samples, 1}`.  
121 |   Otherwise, predictions match train target shape.  
122 | 
123 |   ## Examples
124 | 
125 |       iex> x = Nx.tensor([[1.0, 2.0], [3.0, 2.0], [4.0, 7.0]])
126 |       iex> y = Nx.tensor([4.0, 3.0, -1.0])
127 |       iex> model = Scholar.Linear.LinearRegression.fit(x, y)
128 |       iex> Scholar.Linear.LinearRegression.predict(model, Nx.tensor([[2.0, 1.0]]))
129 |       Nx.tensor(
130 |         [4.200936794281006]
131 |       )
132 |   """
133 |   defn predict(%__MODULE__{coefficients: coeff, intercept: intercept} = _model, x) do
134 |     Nx.dot(x, [-1], coeff, [-1]) + intercept
135 |   end
136 | 
137 |   # Implements ordinary least-squares by estimating the
138 |   # solution A to the equation A.X = b.
139 |   defnp lstsq(a, b, a_offset, b_offset, fit_intercept?) do
140 |     pinv = Nx.LinAlg.pinv(a)
141 |     coeff = Nx.dot(b, [0], pinv, [1])
142 |     intercept = LinearHelpers.set_intercept(coeff, a_offset, b_offset, fit_intercept?)
143 |     {coeff, intercept}
144 |   end
145 | end
146 | 


--------------------------------------------------------------------------------
/lib/scholar/metrics/clustering.ex:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Metrics.Clustering do
  2 |   @moduledoc """
  3 |   Metrics related to clustering algorithms.
  4 |   """
  5 | 
  6 |   import Nx.Defn
  7 |   import Scholar.Shared
  8 | 
  9 |   opts = [
 10 |     num_clusters: [
 11 |       required: true,
 12 |       type: :pos_integer,
 13 |       doc: "Number of clusters in clustering."
 14 |     ]
 15 |   ]
 16 | 
 17 |   @opts_schema NimbleOptions.new!(opts)
 18 | 
 19 |   @doc """
 20 |   Compute the Silhouette Coefficient for each sample.
 21 | 
 22 |   The silhouette value is a measure of how similar an object is to its own cluster (cohesion)
 23 |   compared to other clusters (separation). The silhouette ranges from −1 to +1, where a high
 24 |   value indicates that the object is well matched to its own cluster and poorly
 25 |   matched to neighboring clusters. If most objects have a high value, then the
 26 |   clustering configuration is appropriate. If many points have a low or negative
 27 |   value, then the clustering configuration may have too many or too few clusters.
 28 | 
 29 |   Time complexity of silhouette score is $O(N^2)$ where $N$ is the number of samples.
 30 | 
 31 |   ## Options
 32 | 
 33 |   #{NimbleOptions.docs(@opts_schema)}
 34 | 
 35 |   ## Examples
 36 | 
 37 |       iex> x = Nx.tensor([[0, 0], [1, 0], [1, 1], [3, 3], [4, 4.5]])
 38 |       iex> labels = Nx.tensor([0, 0, 0, 1, 1])
 39 |       iex> Scholar.Metrics.Clustering.silhouette_samples(x, labels, num_clusters: 2)
 40 |       #Nx.Tensor<
 41 |         f32[5]
 42 |         [0.7647753357887268, 0.7781199216842651, 0.6754303574562073, 0.49344196915626526, 0.6627992987632751]
 43 |       >
 44 | 
 45 |       iex> x = Nx.tensor([[0.1, 0], [0, 1], [22, 65], [42, 3], [4.2, 51]])
 46 |       iex> labels = Nx.tensor([0, 1, 2, 1, 1])
 47 |       iex> Scholar.Metrics.Clustering.silhouette_samples(x, labels, num_clusters: 3)
 48 |       #Nx.Tensor<
 49 |         f32[5]
 50 |         [0.0, -0.9782054424285889, 0.0, -0.18546827137470245, -0.5929659008979797]
 51 |       >
 52 |   """
 53 |   deftransform silhouette_samples(x, labels, opts \\ []) do
 54 |     silhouette_samples_n(x, labels, NimbleOptions.validate!(opts, @opts_schema))
 55 |   end
 56 | 
 57 |   defnp silhouette_samples_n(x, labels, opts) do
 58 |     verify_num_clusters(x, opts)
 59 |     {inner, alone?, outer} = inner_and_outer_dist(x, labels, opts)
 60 |     result = (outer - inner) / Nx.max(outer, inner)
 61 |     Nx.select(alone?, 0, result)
 62 |   end
 63 | 
 64 |   @doc """
 65 |   Compute the mean Silhouette Coefficient of all samples.
 66 | 
 67 |   ## Options
 68 | 
 69 |   #{NimbleOptions.docs(@opts_schema)}
 70 | 
 71 |   ## Examples
 72 | 
 73 |       iex> x = Nx.tensor([[0, 0], [1, 0], [1, 1], [3, 3], [4, 4.5]])
 74 |       iex> labels = Nx.tensor([0, 0, 0, 1, 1])
 75 |       iex> Scholar.Metrics.Clustering.silhouette_score(x, labels, num_clusters: 2)
 76 |       #Nx.Tensor<
 77 |         f32
 78 |         0.6749133467674255
 79 |       >
 80 | 
 81 |       iex> x = Nx.tensor([[0.1, 0], [0, 1], [22, 65], [42, 3], [4.2, 51]])
 82 |       iex> labels = Nx.tensor([0, 1, 2, 1, 1])
 83 |       iex> Scholar.Metrics.Clustering.silhouette_score(x, labels, num_clusters: 3)
 84 |       #Nx.Tensor<
 85 |         f32
 86 |         -0.35132792592048645
 87 |       >
 88 |   """
 89 |   deftransform silhouette_score(x, labels, opts \\ []) do
 90 |     silhouette_score_n(x, labels, NimbleOptions.validate!(opts, @opts_schema))
 91 |   end
 92 | 
 93 |   defnp silhouette_score_n(x, labels, opts) do
 94 |     Nx.mean(silhouette_samples(x, labels, opts))
 95 |   end
 96 | 
 97 |   defnp inner_and_outer_dist(x, labels, opts) do
 98 |     num_clusters = opts[:num_clusters]
 99 |     num_samples = Nx.axis_size(x, 0)
100 |     inf = Nx.Constants.infinity(to_float_type(x))
101 |     pairwise_dist = Scholar.Metrics.Distance.pairwise_euclidean(x)
102 |     membership_mask = Nx.reshape(labels, {num_samples, 1}) == Nx.iota({1, num_clusters})
103 |     cluster_size = membership_mask |> Nx.sum(axes: [0]) |> Nx.reshape({1, num_clusters})
104 |     dist_in_cluster = Nx.dot(pairwise_dist, membership_mask)
105 |     mean_dist_in_cluster = dist_in_cluster / cluster_size
106 | 
107 |     alone? = (cluster_size == 1) |> Nx.squeeze() |> Nx.take(labels)
108 | 
109 |     inner_dist =
110 |       (dist_in_cluster / Nx.max(cluster_size - 1, 1))
111 |       |> Nx.take_along_axis(Nx.reshape(labels, {num_samples, 1}), axis: 1)
112 |       |> Nx.squeeze(axes: [1])
113 | 
114 |     outer_dist =
115 |       membership_mask
116 |       |> Nx.select(inf, mean_dist_in_cluster)
117 |       |> Nx.reduce_min(axes: [1])
118 | 
119 |     {inner_dist, alone?, outer_dist}
120 |   end
121 | 
122 |   deftransformp verify_num_clusters(x, opts) do
123 |     {num_samples, _} = Nx.shape(x)
124 | 
125 |     unless opts[:num_clusters] <= num_samples do
126 |       raise ArgumentError,
127 |             "expected :num_clusters to to be a positive integer in range 1 to #{inspect(num_samples)}, got: #{inspect(opts[:num_clusters])}"
128 |     end
129 |   end
130 | end
131 | 


--------------------------------------------------------------------------------
/lib/scholar/metrics/neighbors.ex:
--------------------------------------------------------------------------------
 1 | defmodule Scholar.Metrics.Neighbors do
 2 |   @moduledoc """
 3 |   Metrics for evaluating the results of approximate k-nearest neighbor search algorithms.
 4 |   """
 5 | 
 6 |   import Nx.Defn
 7 | 
 8 |   @doc """
 9 |   Computes the recall of predicted k-nearest neighbors given the true k-nearest neighbors.
10 |   Recall is defined as the average fraction of nearest neighbors the algorithm predicted correctly.
11 | 
12 |   ## Examples
13 | 
14 |       iex> neighbors_true = Nx.tensor([[0, 1], [1, 2], [2, 1]])
15 |       iex> Scholar.Metrics.Neighbors.recall(neighbors_true, neighbors_true)
16 |       #Nx.Tensor<
17 |         f32
18 |         1.0
19 |       >
20 | 
21 |       iex> neighbors_true = Nx.tensor([[0, 1], [1, 2], [2, 1]])
22 |       iex> neighbors_pred = Nx.tensor([[0, 1], [1, 0], [2, 0]])
23 |       iex> Scholar.Metrics.Neighbors.recall(neighbors_true, neighbors_pred)
24 |       #Nx.Tensor<
25 |         f32
26 |         0.6666666865348816
27 |       >
28 |   """
29 |   defn recall(neighbors_true, neighbors_pred) do
30 |     if Nx.rank(neighbors_true) != 2 do
31 |       raise ArgumentError,
32 |             """
33 |             expected true neighbors to have shape {num_samples, num_neighbors}, \
34 |             got tensor with shape: #{inspect(Nx.shape(neighbors_true))}\
35 |             """
36 |     end
37 | 
38 |     if Nx.rank(neighbors_pred) != 2 do
39 |       raise ArgumentError,
40 |             """
41 |             expected predicted neighbors to have shape {num_samples, num_neighbors}, \
42 |             got tensor with shape: #{inspect(Nx.shape(neighbors_pred))}\
43 |             """
44 |     end
45 | 
46 |     if Nx.axis_size(neighbors_true, 0) != Nx.axis_size(neighbors_pred, 0) do
47 |       raise ArgumentError,
48 |             """
49 |             expected true and predicted neighbors to have the same axis 0 size, \
50 |             got #{inspect(Nx.axis_size(neighbors_true, 0))} and #{inspect(Nx.axis_size(neighbors_pred, 0))}\
51 |             """
52 |     end
53 | 
54 |     if Nx.axis_size(neighbors_true, 1) != Nx.axis_size(neighbors_pred, 1) do
55 |       raise ArgumentError,
56 |             """
57 |             expected true and predicted neighbors to have the same axis 1 size, \
58 |             got #{inspect(Nx.axis_size(neighbors_true, 1))} and #{inspect(Nx.axis_size(neighbors_pred, 1))}\
59 |             """
60 |     end
61 | 
62 |     {n, k} = Nx.shape(neighbors_true)
63 |     concatenated = Nx.concatenate([neighbors_true, neighbors_pred], axis: 1) |> Nx.sort(axis: 1)
64 |     duplicate_mask = concatenated[[.., 0..(2 * k - 2)]] == concatenated[[.., 1..(2 * k - 1)]]
65 |     duplicate_mask |> Nx.sum() |> Nx.divide(n * k)
66 |   end
67 | end
68 | 


--------------------------------------------------------------------------------
/lib/scholar/metrics/ranking.ex:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Metrics.Ranking do
  2 |   @moduledoc """
  3 |   Provides metrics and calculations related to ranking quality.
  4 | 
  5 |   Ranking metrics evaluate the quality of ordered lists of items,
  6 |   often used in information retrieval and recommendation systems.
  7 | 
  8 |   This module currently supports the following ranking metrics:
  9 |     * Discounted Cumulative Gain (DCG)
 10 |   """
 11 | 
 12 |   import Nx.Defn
 13 |   import Scholar.Shared
 14 |   require Nx
 15 | 
 16 |   @dcg_opts [
 17 |     k: [
 18 |       type: {:custom, Scholar.Options, :positive_number, []},
 19 |       doc: "Truncation parameter to consider only the top-k elements."
 20 |     ]
 21 |   ]
 22 | 
 23 |   @dcg_opts_schema NimbleOptions.new!(@dcg_opts)
 24 | 
 25 |   deftransform dcg(y_true, y_score, opts \\ []) do
 26 |     dcg_n(y_true, y_score, NimbleOptions.validate!(opts, @dcg_opts_schema))
 27 |   end
 28 | 
 29 |   @doc """
 30 |   ## Options
 31 |   #{NimbleOptions.docs(@dcg_opts_schema)}
 32 | 
 33 |   Computes the DCG based on true relevance scores (`y_true`) and their respective predicted scores (`y_score`).
 34 |   """
 35 |   defn dcg_n(y_true, y_score, opts) do
 36 |     y_true_shape = Nx.shape(y_true)
 37 |     y_score_shape = Nx.shape(y_score)
 38 | 
 39 |     check_shape(y_true_shape, y_score_shape)
 40 | 
 41 |     {adjusted_y_true, adjusted_y_score} = handle_ties(y_true, y_score)
 42 | 
 43 |     sorted_indices = Nx.argsort(adjusted_y_score, axis: 0, direction: :desc)
 44 |     sorted_y_true = Nx.take(adjusted_y_true, sorted_indices)
 45 | 
 46 |     truncated_y_true = truncate_at_k(sorted_y_true, opts)
 47 |     dcg_value(truncated_y_true)
 48 |   end
 49 | 
 50 |   defnp check_shape(y_true, y_pred) do
 51 |     assert_same_shape!(y_true, y_pred)
 52 |   end
 53 | 
 54 |   defnp handle_ties(y_true, y_score) do
 55 |     sorted_indices = Nx.argsort(y_score, axis: 0, direction: :desc)
 56 | 
 57 |     sorted_y_true = Nx.take(y_true, sorted_indices)
 58 |     sorted_y_score = Nx.take(y_score, sorted_indices)
 59 | 
 60 |     tie_sorted_indices = Nx.argsort(sorted_y_true, axis: 0, direction: :desc)
 61 |     adjusted_y_true = Nx.take(sorted_y_true, tie_sorted_indices)
 62 |     adjusted_y_score = Nx.take(sorted_y_score, tie_sorted_indices)
 63 | 
 64 |     {adjusted_y_true, adjusted_y_score}
 65 |   end
 66 | 
 67 |   defnp dcg_value(y_true) do
 68 |     float_y_true = Nx.as_type(y_true, :f32)
 69 | 
 70 |     log_tensor =
 71 |       y_true
 72 |       |> Nx.shape()
 73 |       |> Nx.iota()
 74 |       |> Nx.as_type(:f32)
 75 |       |> Nx.add(2.0)
 76 |       |> Nx.log2()
 77 | 
 78 |     div_result = Nx.divide(float_y_true, log_tensor)
 79 | 
 80 |     Nx.sum(div_result)
 81 |   end
 82 | 
 83 |   defnp truncate_at_k(tensor, opts) do
 84 |     case opts[:k] do
 85 |       nil ->
 86 |         tensor
 87 | 
 88 |       _ ->
 89 |         if opts[:k] > Nx.axis_size(tensor, 0) do
 90 |           tensor
 91 |         else
 92 |           {top_k, _rest} = Nx.split(tensor, opts[:k], axis: 0)
 93 |           top_k
 94 |         end
 95 |     end
 96 |   end
 97 | 
 98 |   @doc """
 99 |   Computes the normalized discounted cumulative gain (NDCG) based on true relevance scores `y_true` and their respective predicted scores `y_score`.
100 | 
101 |   ## Options
102 | 
103 |   #{NimbleOptions.docs(@dcg_opts_schema)}
104 | 
105 |   ## Examples
106 | 
107 |       iex> true_relevance = Nx.tensor([10, 0, 0, 1, 5])
108 |       iex> scores = Nx.tensor([0.1, 0.2, 0.3, 4, 70])
109 |       iex> Scholar.Metrics.Ranking.ndcg_n(true_relevance, scores)
110 |       #Nx.Tensor<
111 |         f32
112 |         0.6956940293312073
113 |       >
114 |       iex> scores = Nx.tensor([0.05, 1.1, 1.0, 0.5, 0.0])
115 |       iex> Scholar.Metrics.Ranking.ndcg_n(true_relevance, scores)
116 |       #Nx.Tensor<
117 |         f32
118 |         0.4936802089214325
119 |       >
120 |       iex> scores = Nx.tensor([0.05, 1.1, 1.0, 0.5, 0.0])
121 |       iex> Scholar.Metrics.Ranking.ndcg_n(true_relevance, scores, k: 4)
122 |       #Nx.Tensor<
123 |         f32
124 |         0.352024108171463
125 |       >
126 |       iex> Scholar.Metrics.Ranking.ndcg_n(true_relevance, true_relevance, k: 4)
127 |       #Nx.Tensor<
128 |         f32
129 |         1.0
130 |       >
131 |   """
132 |   defn ndcg_n(y_true, y_score, opts \\ []) do
133 |     dcg_n(y_true, y_score, opts) / dcg_n(y_true, y_true, opts)
134 |   end
135 | end
136 | 


--------------------------------------------------------------------------------
/lib/scholar/neighbors/knn_regressor.ex:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Neighbors.KNNRegressor do
  2 |   @moduledoc """
  3 |   K-Nearest Neighbors Regressor.
  4 | 
  5 |   Performs regression by computing the (weighted) mean of k-nearest neighbor labels.
  6 |   """
  7 |   import Nx.Defn
  8 |   require Nx
  9 | 
 10 |   @derive {Nx.Container, keep: [:weights], containers: [:algorithm, :labels]}
 11 |   defstruct [:algorithm, :weights, :labels]
 12 | 
 13 |   opts = [
 14 |     algorithm: [
 15 |       type: :atom,
 16 |       default: :brute,
 17 |       doc: """
 18 |       Algorithm used to compute the k-nearest neighbors. Possible values:
 19 | 
 20 |         * `:brute` - Brute-force search. See `Scholar.Neighbors.BruteKNN` for more details.
 21 | 
 22 |         * `:kd_tree` - k-d tree. See `Scholar.Neighbors.KDTree` for more details.
 23 | 
 24 |         * `:random_projection_forest` - Random projection forest. See `Scholar.Neighbors.RandomProjectionForest` for more details.
 25 | 
 26 |         * Module implementing `fit(data, opts)` and `predict(model, query)`. predict/2 must return a tuple containing indices
 27 |         of k-nearest neighbors of query points as well as distances between query points and their k-nearest neighbors.
 28 |       """
 29 |     ],
 30 |     weights: [
 31 |       type: {:in, [:uniform, :distance]},
 32 |       default: :uniform,
 33 |       doc: """
 34 |       Weight function used in prediction. Possible values:
 35 | 
 36 |         * `:uniform` - uniform weights. All points in each neighborhood are weighted equally.
 37 | 
 38 |         * `:distance` - weight points by the inverse of their distance. in this case, closer neighbors of
 39 |           a query point will have a greater influence than neighbors which are further away.
 40 |       """
 41 |     ]
 42 |   ]
 43 | 
 44 |   @opts_schema NimbleOptions.new!(opts)
 45 | 
 46 |   @doc """
 47 |   Fits a k-NN regressor model.
 48 | 
 49 |   ## Options
 50 | 
 51 |   #{NimbleOptions.docs(@opts_schema)}
 52 | 
 53 |   Algorithm-specific options (e.g. `:num_neighbors`, `:metric`) should be provided together with the regressor options.
 54 | 
 55 |   ## Examples
 56 | 
 57 |       iex> x = Nx.tensor([[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]])
 58 |       iex> y = Nx.tensor([[1], [2], [3], [4], [5]])
 59 |       iex> model = Scholar.Neighbors.KNNRegressor.fit(x, y, num_neighbors: 3)
 60 |       iex> model.algorithm
 61 |       Scholar.Neighbors.BruteKNN.fit(x, num_neighbors: 3)
 62 |       iex> model.labels
 63 |       Nx.tensor([[1], [2], [3], [4], [5]])
 64 | 
 65 |       iex> x = Nx.tensor([[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]])
 66 |       iex> y = Nx.tensor([[1], [2], [3], [4], [5]])
 67 |       iex> model = Scholar.Neighbors.KNNRegressor.fit(x, y, algorithm: :kd_tree, num_neighbors: 3, metric: {:minkowski, 1})
 68 |       iex> model.algorithm
 69 |       Scholar.Neighbors.KDTree.fit(x, num_neighbors: 3, metric: {:minkowski, 1})
 70 |       iex> model.labels
 71 |       Nx.tensor([[1], [2], [3], [4], [5]])
 72 | 
 73 |       iex> x = Nx.tensor([[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]])
 74 |       iex> y = Nx.tensor([[1], [2], [3], [4], [5]])
 75 |       iex> key = Nx.Random.key(12)
 76 |       iex> model = Scholar.Neighbors.KNNRegressor.fit(x, y, algorithm: :random_projection_forest, num_neighbors: 2, num_trees: 4, key: key)
 77 |       iex> model.algorithm
 78 |       Scholar.Neighbors.RandomProjectionForest.fit(x, num_neighbors: 2, num_trees: 4, key: key)
 79 |       iex> model.labels
 80 |       Nx.tensor([[1], [2], [3], [4], [5]])
 81 |   """
 82 |   deftransform fit(x, y, opts) do
 83 |     if Nx.rank(x) != 2 do
 84 |       raise ArgumentError,
 85 |             """
 86 |             expected x to have shape {num_samples, num_features_in}, \
 87 |             got tensor with shape: #{inspect(Nx.shape(x))}
 88 |             """
 89 |     end
 90 | 
 91 |     if Nx.rank(y) != 2 do
 92 |       raise ArgumentError,
 93 |             """
 94 |             expected y to have shape {num_samples, num_features_out}, \
 95 |             got tensor with shape: #{inspect(Nx.shape(y))}
 96 |             """
 97 |     end
 98 | 
 99 |     if Nx.axis_size(x, 0) != Nx.axis_size(y, 0) do
100 |       raise ArgumentError,
101 |             """
102 |             expected x and y to have the same first dimension, \
103 |             got #{Nx.axis_size(x, 0)} and #{Nx.axis_size(y, 0)}
104 |             """
105 |     end
106 | 
107 |     {opts, algorithm_opts} = Keyword.split(opts, [:algorithm, :weights])
108 |     opts = NimbleOptions.validate!(opts, @opts_schema)
109 | 
110 |     algorithm_module =
111 |       case opts[:algorithm] do
112 |         :brute ->
113 |           Scholar.Neighbors.BruteKNN
114 | 
115 |         :kd_tree ->
116 |           Scholar.Neighbors.KDTree
117 | 
118 |         :random_projection_forest ->
119 |           Scholar.Neighbors.RandomProjectionForest
120 | 
121 |         module when is_atom(module) ->
122 |           module
123 |       end
124 | 
125 |     algorithm = algorithm_module.fit(x, algorithm_opts)
126 | 
127 |     %__MODULE__{
128 |       algorithm: algorithm,
129 |       labels: y,
130 |       weights: opts[:weights]
131 |     }
132 |   end
133 | 
134 |   @doc """
135 |   Predicts labels using a k-NN regressor model.
136 | 
137 |   ## Examples
138 | 
139 |       iex> x_train = Nx.tensor([[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]])
140 |       iex> y_train = Nx.tensor([[1], [2], [3], [4], [5]])
141 |       iex> model = Scholar.Neighbors.KNNRegressor.fit(x_train, y_train, num_neighbors: 3)
142 |       iex> x = Nx.tensor([[1, 3], [4, 2], [3, 6]])
143 |       iex> Scholar.Neighbors.KNNRegressor.predict(model, x)
144 |       Nx.tensor([[2.0], [2.0], [4.0]])
145 |   """
146 |   defn predict(model, x) do
147 |     {neighbors, distances} = compute_knn(model.algorithm, x)
148 |     neighbor_labels = Nx.take(model.labels, neighbors)
149 | 
150 |     case model.weights do
151 |       :uniform ->
152 |         Nx.mean(neighbor_labels, axes: [1])
153 | 
154 |       :distance ->
155 |         weights =
156 |           Scholar.Neighbors.Utils.check_weights(distances)
157 |           |> Nx.new_axis(2)
158 |           |> Nx.broadcast(neighbor_labels)
159 | 
160 |         Nx.weighted_mean(neighbor_labels, weights, axes: [1])
161 |     end
162 |   end
163 | 
164 |   deftransformp compute_knn(algorithm, x) do
165 |     algorithm.__struct__.predict(algorithm, x)
166 |   end
167 | end
168 | 


--------------------------------------------------------------------------------
/lib/scholar/neighbors/large_vis.ex:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Neighbors.LargeVis do
  2 |   @moduledoc """
  3 |   LargeVis algorithm for approximate k-nearest neighbor (k-NN) graph construction.
  4 | 
  5 |   The algorithms works in the following way. First, the approximate k-NN graph is constructed
  6 |   using a random projection forest. Then, the graph is refined by looking at the neighbors of
  7 |   neighbors of every point for a fixed number of iterations. This step is called NN-expansion.
  8 | 
  9 |   ## References
 10 | 
 11 |     * [Visualizing Large-scale and High-dimensional Data](https://arxiv.org/abs/1602.00370).
 12 |   """
 13 | 
 14 |   import Nx.Defn
 15 |   import Scholar.Shared
 16 |   require Nx
 17 |   alias Scholar.Neighbors.RandomProjectionForest
 18 |   alias Scholar.Neighbors.Utils
 19 | 
 20 |   opts = [
 21 |     num_neighbors: [
 22 |       required: true,
 23 |       type: :pos_integer,
 24 |       doc: "The number of neighbors in the graph."
 25 |     ],
 26 |     metric: [
 27 |       type: {:in, [:squared_euclidean, :euclidean]},
 28 |       default: :euclidean,
 29 |       doc: "The function that measures distance between two points."
 30 |     ],
 31 |     min_leaf_size: [
 32 |       type: :pos_integer,
 33 |       doc: """
 34 |       The minimum number of points in every leaf.
 35 |       Must be at least num_neighbors.
 36 |       If not provided, it is set based on the number of neighbors.
 37 |       """
 38 |     ],
 39 |     num_trees: [
 40 |       type: :pos_integer,
 41 |       doc: """
 42 |       The number of trees in random projection forest.
 43 |       If not provided, it is set based on the dataset size.
 44 |       """
 45 |     ],
 46 |     num_iters: [
 47 |       type: :non_neg_integer,
 48 |       default: 3,
 49 |       doc: "The number of times to perform neighborhood expansion."
 50 |     ],
 51 |     key: [
 52 |       type: {:custom, Scholar.Options, :key, []},
 53 |       doc: """
 54 |       Used for random number generation in parameter initialization.
 55 |       If the key is not provided, it is set to `Nx.Random.key(System.system_time())`.
 56 |       """
 57 |     ]
 58 |   ]
 59 | 
 60 |   @opts_schema NimbleOptions.new!(opts)
 61 | 
 62 |   @doc """
 63 |   Constructs the approximate k-NN graph with LargeVis.
 64 | 
 65 |   Returns neighbor indices and distances.
 66 | 
 67 |   ## Examples
 68 | 
 69 |       iex> key = Nx.Random.key(12)
 70 |       iex> tensor = Nx.iota({5, 2})
 71 |       iex> {graph, distances} = Scholar.Neighbors.LargeVis.fit(tensor, num_neighbors: 2, metric: :squared_euclidean, min_leaf_size: 2, num_trees: 3, key: key)
 72 |       iex> graph
 73 |       #Nx.Tensor<
 74 |         u32[5][2]
 75 |         [
 76 |           [0, 1],
 77 |           [1, 0],
 78 |           [2, 1],
 79 |           [3, 2],
 80 |           [4, 3]
 81 |         ]
 82 |       >
 83 |       iex> distances
 84 |       #Nx.Tensor<
 85 |         f32[5][2]
 86 |         [
 87 |           [0.0, 8.0],
 88 |           [0.0, 8.0],
 89 |           [0.0, 8.0],
 90 |           [0.0, 8.0],
 91 |           [0.0, 8.0]
 92 |         ]
 93 |       >
 94 |   """
 95 |   deftransform fit(tensor, opts) do
 96 |     if Nx.rank(tensor) != 2 do
 97 |       raise ArgumentError,
 98 |             """
 99 |             expected input tensor to have shape {num_samples, num_features}, \
100 |             got tensor with shape: #{inspect(Nx.shape(tensor))}\
101 |             """
102 |     end
103 | 
104 |     opts = NimbleOptions.validate!(opts, @opts_schema)
105 |     k = opts[:num_neighbors]
106 | 
107 |     metric =
108 |       case opts[:metric] do
109 |         :euclidean -> &Scholar.Metrics.Distance.euclidean/2
110 |         :squared_euclidean -> &Scholar.Metrics.Distance.squared_euclidean/2
111 |       end
112 | 
113 |     min_leaf_size = opts[:min_leaf_size] || max(10, 2 * k)
114 | 
115 |     size = Nx.axis_size(tensor, 0)
116 |     num_trees = opts[:num_trees] || 5 + round(:math.pow(size, 0.25))
117 |     key = Keyword.get_lazy(opts, :key, fn -> Nx.Random.key(System.system_time()) end)
118 | 
119 |     fit_n(
120 |       tensor,
121 |       key,
122 |       num_neighbors: k,
123 |       metric: metric,
124 |       min_leaf_size: min_leaf_size,
125 |       num_trees: num_trees,
126 |       num_iters: opts[:num_iters]
127 |     )
128 |   end
129 | 
130 |   defnp fit_n(tensor, key, opts) do
131 |     forest =
132 |       RandomProjectionForest.fit(tensor,
133 |         num_neighbors: opts[:num_neighbors],
134 |         min_leaf_size: opts[:min_leaf_size],
135 |         num_trees: opts[:num_trees],
136 |         key: key
137 |       )
138 | 
139 |     {graph, _} = RandomProjectionForest.predict(forest, tensor)
140 |     expand(graph, tensor, metric: opts[:metric], num_iters: opts[:num_iters])
141 |   end
142 | 
143 |   defn expand(graph, tensor, opts) do
144 |     num_iters = opts[:num_iters]
145 |     {n, k} = Nx.shape(graph)
146 | 
147 |     {result, _} =
148 |       while {
149 |               {
150 |                 graph,
151 |                 _distances = Nx.broadcast(Nx.tensor(:nan, type: to_float_type(tensor)), {n, k})
152 |               },
153 |               {tensor, iter = 0}
154 |             },
155 |             iter < num_iters do
156 |         {expansion_iter(graph, tensor, metric: opts[:metric]), {tensor, iter + 1}}
157 |       end
158 | 
159 |     result
160 |   end
161 | 
162 |   defnp expansion_iter(graph, tensor, opts) do
163 |     {size, k} = Nx.shape(graph)
164 |     candidate_indices = Nx.take(graph, graph) |> Nx.reshape({size, k * k})
165 |     candidate_indices = Nx.concatenate([graph, candidate_indices], axis: 1)
166 | 
167 |     Utils.brute_force_search_with_candidates(tensor, tensor, candidate_indices,
168 |       num_neighbors: k,
169 |       metric: opts[:metric]
170 |     )
171 |   end
172 | end
173 | 


--------------------------------------------------------------------------------
/lib/scholar/neighbors/utils.ex:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Neighbors.Utils do
  2 |   @moduledoc false
  3 |   import Nx.Defn
  4 |   require Nx
  5 | 
  6 |   def metric(:cosine), do: {:ok, &Scholar.Metrics.Distance.cosine/2}
  7 | 
  8 |   def metric({:minkowski, p}) when p == :infinity or (is_number(p) and p > 0) do
  9 |     {:ok, &Scholar.Metrics.Distance.minkowski(&1, &2, p: p)}
 10 |   end
 11 | 
 12 |   def metric(metric) when is_function(metric, 2), do: {:ok, metric}
 13 | 
 14 |   def metric(metric) do
 15 |     {:error,
 16 |      "expected metric to be a 2-arity function, :cosine, tuple {:minkowski, p} where p is a positive number or :infinity, got: #{inspect(metric)}"}
 17 |   end
 18 | 
 19 |   def pairwise_metric(:cosine), do: {:ok, &Scholar.Metrics.Distance.pairwise_cosine/2}
 20 | 
 21 |   def pairwise_metric({:minkowski, p}) when p == :infinity or (is_number(p) and p > 0) do
 22 |     {:ok, &Scholar.Metrics.Distance.pairwise_minkowski(&1, &2, p: p)}
 23 |   end
 24 | 
 25 |   def pairwise_metric(:euclidean), do: {:ok, &Scholar.Metrics.Distance.pairwise_euclidean/2}
 26 | 
 27 |   def pairwise_metric(:squared_euclidean),
 28 |     do: {:ok, &Scholar.Metrics.Distance.pairwise_squared_euclidean/2}
 29 | 
 30 |   def pairwise_metric(:manhattan),
 31 |     do: {:ok, &Scholar.Metrics.Distance.pairwise_minkowski(&1, &2, p: 1)}
 32 | 
 33 |   def pairwise_metric(metric) when is_function(metric, 2), do: {:ok, metric}
 34 | 
 35 |   def pairwise_metric(metric) do
 36 |     {:error,
 37 |      "expected metric to be a 2-arity function, :cosine or tuple {:minkowski, p} where p is a positive number or :infinity, got: #{inspect(metric)}"}
 38 |   end
 39 | 
 40 |   defn brute_force_search_with_candidates(data, query, candidate_indices, opts) do
 41 |     k = opts[:num_neighbors]
 42 |     metric = opts[:metric]
 43 |     dim = Nx.axis_size(data, 1)
 44 |     {size, length} = Nx.shape(candidate_indices)
 45 | 
 46 |     x =
 47 |       query
 48 |       |> Nx.new_axis(1)
 49 |       |> Nx.broadcast({size, length, dim})
 50 |       |> Nx.vectorize([:query, :candidates])
 51 | 
 52 |     y = Nx.take(data, candidate_indices) |> Nx.vectorize([:query, :candidates])
 53 |     distances = metric.(x, y) |> Nx.devectorize() |> Nx.rename(nil)
 54 | 
 55 |     distances =
 56 |       if length > 1 do
 57 |         sorted_indices = Nx.argsort(candidate_indices, axis: 1, stable: true)
 58 |         inverse = inverse_permutation(sorted_indices)
 59 |         sorted = Nx.take_along_axis(candidate_indices, sorted_indices, axis: 1)
 60 | 
 61 |         duplicate_mask =
 62 |           Nx.concatenate(
 63 |             [
 64 |               Nx.broadcast(0, {size, 1}),
 65 |               Nx.equal(sorted[[.., 0..-2//1]], sorted[[.., 1..-1//1]])
 66 |             ],
 67 |             axis: 1
 68 |           )
 69 |           |> Nx.take_along_axis(inverse, axis: 1)
 70 | 
 71 |         Nx.select(duplicate_mask, :infinity, distances)
 72 |       else
 73 |         distances
 74 |       end
 75 | 
 76 |     indices = Nx.argsort(distances, axis: 1) |> Nx.slice_along_axis(0, k, axis: 1)
 77 | 
 78 |     neighbor_indices =
 79 |       Nx.take(
 80 |         Nx.vectorize(candidate_indices, :samples),
 81 |         Nx.vectorize(indices, :samples)
 82 |       )
 83 |       |> Nx.devectorize()
 84 |       |> Nx.rename(nil)
 85 | 
 86 |     neighbor_distances = Nx.take_along_axis(distances, indices, axis: 1)
 87 | 
 88 |     {neighbor_indices, neighbor_distances}
 89 |   end
 90 | 
 91 |   defnp inverse_permutation(indices) do
 92 |     {size, length} = Nx.shape(indices)
 93 |     target = Nx.broadcast(Nx.u32(0), {size, length})
 94 |     samples = Nx.iota({size, length, 1}, axis: 0)
 95 | 
 96 |     target_indices =
 97 |       Nx.concatenate([samples, Nx.new_axis(indices, 2)], axis: 2)
 98 |       |> Nx.reshape({size * length, 2})
 99 | 
100 |     updates = Nx.iota({size, length}, axis: 1) |> Nx.reshape({size * length})
101 |     Nx.indexed_add(target, target_indices, updates)
102 |   end
103 | 
104 |   defn check_weights(weights) do
105 |     zero_mask = weights == 0
106 |     zero_rows = zero_mask |> Nx.any(axes: [1], keep_axes: true) |> Nx.broadcast(weights)
107 |     weights = Nx.select(zero_mask, 1, weights)
108 |     weights_inv = 1 / weights
109 |     Nx.select(zero_rows, zero_mask, weights_inv)
110 |   end
111 | end
112 | 


--------------------------------------------------------------------------------
/lib/scholar/options.ex:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Options do
  2 |   # Useful NimbleOptions validations.
  3 |   @moduledoc false
  4 | 
  5 |   require Nx
  6 | 
  7 |   def optimizer(value) do
  8 |     error =
  9 |       {:error,
 10 |        "expected :optimizer to be either a valid 0-arity function in Polaris.Optimizers or a valid {init_fn, update_fn} tuple"}
 11 | 
 12 |     case value do
 13 |       {init_fn, update_fn} when is_function(init_fn, 1) and is_function(update_fn, 3) ->
 14 |         {:ok, value}
 15 | 
 16 |       atom when is_atom(atom) ->
 17 |         mod = Polaris.Optimizers
 18 | 
 19 |         if Code.ensure_loaded(mod) == {:module, mod} and function_exported?(mod, atom, 0) do
 20 |           {:ok, atom}
 21 |         else
 22 |           error
 23 |         end
 24 | 
 25 |       _ ->
 26 |         error
 27 |     end
 28 |   end
 29 | 
 30 |   def axes(axes) do
 31 |     # Axes are further validated by Nx, including against the tensor.
 32 |     if axes == nil or is_list(axes) do
 33 |       {:ok, axes}
 34 |     else
 35 |       {:error, "expected :axes to be a list positive integers as axis"}
 36 |     end
 37 |   end
 38 | 
 39 |   def axis(axis) do
 40 |     # Axis is further validated by Nx, including against the tensor.
 41 |     if axis == nil or is_integer(axis) or is_atom(axis) do
 42 |       {:ok, axis}
 43 |     else
 44 |       {:error, "expected :axis to be an integers, atom or nil"}
 45 |     end
 46 |   end
 47 | 
 48 |   def type(type) do
 49 |     {:ok, Nx.Type.normalize!(type)}
 50 |   end
 51 | 
 52 |   def positive_number(num) do
 53 |     if is_number(num) and num > 0 do
 54 |       {:ok, num}
 55 |     else
 56 |       {:error, "expected positive number, got: #{inspect(num)}"}
 57 |     end
 58 |   end
 59 | 
 60 |   def non_negative_number(num) do
 61 |     if is_number(num) and num >= 0 do
 62 |       {:ok, num}
 63 |     else
 64 |       {:error, "expected a non-negative number, got: #{inspect(num)}"}
 65 |     end
 66 |   end
 67 | 
 68 |   def non_negative_integer(num) do
 69 |     if is_integer(num) and num >= 0 do
 70 |       {:ok, num}
 71 |     else
 72 |       {:error, "expected a non-negative integer, got: #{inspect(num)}"}
 73 |     end
 74 |   end
 75 | 
 76 |   def weights(weights) do
 77 |     if is_nil(weights) or
 78 |          (Nx.is_tensor(weights) and Nx.rank(weights) in 0..1) or
 79 |          (is_list(weights) and Enum.all?(weights, &is_number/1)) do
 80 |       {:ok, weights}
 81 |     else
 82 |       {:error, "expected weights to be a flat tensor or a flat list, got: #{inspect(weights)}"}
 83 |     end
 84 |   end
 85 | 
 86 |   def multi_weights(weights) do
 87 |     if is_nil(weights) or
 88 |          (Nx.is_tensor(weights) and Nx.rank(weights) > 1) do
 89 |       {:ok, weights}
 90 |     else
 91 |       {:error,
 92 |        "expected weights to be a tensor with rank greater than 1, got: #{inspect(weights)}"}
 93 |     end
 94 |   end
 95 | 
 96 |   def key(key) do
 97 |     if Nx.is_tensor(key) and Nx.type(key) == {:u, 32} and Nx.shape(key) == {2} do
 98 |       {:ok, key}
 99 |     else
100 |       {:error, "expected key to be a key (use Nx.Random.key/1), got: #{inspect(key)}"}
101 |     end
102 |   end
103 | 
104 |   def beta(beta) do
105 |     if (is_number(beta) and beta >= 0) or (Nx.is_tensor(beta) and Nx.rank(beta) == 0) do
106 |       {:ok, beta}
107 |     else
108 |       {:error, "expected 'beta' to be in the range [0, inf]"}
109 |     end
110 |   end
111 | 
112 |   def quantile_range(value) do
113 |     case value do
114 |       {q_min, q_max}
115 |       when is_number(q_min) and is_number(q_max) and 0.0 < q_min and q_min < q_max and
116 |              q_max < 100.0 ->
117 |         {:ok, {q_min, q_max}}
118 | 
119 |       _ ->
120 |         {:error,
121 |          "expected :quantile_range to be a tuple {q_min, q_max} such that 0.0 < q_min < q_max < 100.0, got: #{inspect(value)}"}
122 |     end
123 |   end
124 | end
125 | 


--------------------------------------------------------------------------------
/lib/scholar/preprocessing.ex:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Preprocessing do
  2 |   @moduledoc """
  3 |   Set of functions for preprocessing data.
  4 |   """
  5 | 
  6 |   import Nx.Defn
  7 | 
  8 |   binarize_schema = [
  9 |     type: [
 10 |       type: {:custom, Scholar.Options, :type, []},
 11 |       default: :f32,
 12 |       doc: """
 13 |       Type of the resultant tensor.
 14 |       """
 15 |     ],
 16 |     threshold: [
 17 |       type: {:or, [:integer, :float]},
 18 |       default: 0,
 19 |       doc: """
 20 |       Feature values below or equal to this are replaced by 0, above it by 1.
 21 |       """
 22 |     ]
 23 |   ]
 24 | 
 25 |   @binarize_schema NimbleOptions.new!(binarize_schema)
 26 | 
 27 |   @doc """
 28 |   Standardizes the tensor by removing the mean and scaling to unit variance.
 29 | 
 30 |   It is a shortcut for `Scholar.Preprocessing.StandardScale.fit_transform/3`.
 31 |   See `Scholar.Preprocessing.StandardScale` for more information.
 32 | 
 33 |   ## Examples
 34 | 
 35 |       iex> Scholar.Preprocessing.standard_scale(Nx.tensor([1,2,3]))
 36 |       #Nx.Tensor<
 37 |         f32[3]
 38 |         [-1.2247447967529297, 0.0, 1.2247447967529297]
 39 |       >
 40 | 
 41 |   """
 42 |   defn standard_scale(tensor, opts \\ []) do
 43 |     Scholar.Preprocessing.StandardScaler.fit_transform(tensor, opts)
 44 |   end
 45 | 
 46 |   @doc """
 47 |   Scales a tensor by dividing each sample in batch by maximum absolute value in the batch.
 48 | 
 49 |   It is a shortcut for `Scholar.Preprocessing.MaxAbsScaler.fit_transform/2`.
 50 |   See `Scholar.Preprocessing.MaxAbsScaler` for more information.
 51 | 
 52 |   ## Examples
 53 | 
 54 |       iex> Scholar.Preprocessing.max_abs_scale(Nx.tensor([1, 2, 3]))
 55 |       #Nx.Tensor<
 56 |         f32[3]
 57 |         [0.3333333432674408, 0.6666666865348816, 1.0]
 58 |       >
 59 | 
 60 |       iex> Scholar.Preprocessing.max_abs_scale(Nx.tensor([[1, -1, 2], [3, 0, 0], [0, 1, -1], [2, 3, 1]]), axes: [0])
 61 |       #Nx.Tensor<
 62 |         f32[4][3]
 63 |         [
 64 |           [0.3333333432674408, -0.3333333432674408, 1.0],
 65 |           [1.0, 0.0, 0.0],
 66 |           [0.0, 0.3333333432674408, -0.5],
 67 |           [0.6666666865348816, 1.0, 0.5]
 68 |         ]
 69 |       >
 70 | 
 71 |       iex> Scholar.Preprocessing.max_abs_scale(42)
 72 |       #Nx.Tensor<
 73 |         f32
 74 |         1.0
 75 |       >
 76 |   """
 77 |   defn max_abs_scale(tensor, opts \\ []) do
 78 |     Scholar.Preprocessing.MaxAbsScaler.fit_transform(tensor, opts)
 79 |   end
 80 | 
 81 |   @doc """
 82 |   Scales a tensor by a given range.
 83 | 
 84 |   It is a shortcut for `Scholar.Preprocessing.MinMaxScaler.fit_transform/2`.
 85 |   See `Scholar.Preprocessing.MinMaxScaler` for more information.
 86 | 
 87 |   ## Examples
 88 | 
 89 |       iex> Scholar.Preprocessing.min_max_scale(Nx.tensor([1, 2, 3]))
 90 |       #Nx.Tensor<
 91 |         f32[3]
 92 |         [0.0, 0.5, 1.0]
 93 |       >
 94 | 
 95 |       iex> Scholar.Preprocessing.min_max_scale(42)
 96 |       #Nx.Tensor<
 97 |         f32
 98 |         0.0
 99 |       >
100 |   """
101 |   defn min_max_scale(tensor, opts \\ []) do
102 |     Scholar.Preprocessing.MinMaxScaler.fit_transform(tensor, opts)
103 |   end
104 | 
105 |   @doc """
106 |   Converts a tensor into binary values based on the given threshold.
107 | 
108 |   ## Options
109 | 
110 |   #{NimbleOptions.docs(@binarize_schema)}
111 | 
112 |   ## Examples
113 | 
114 |       iex> Scholar.Preprocessing.binarize(Nx.tensor([[1.0, -1.0, 2.0], [2.0, 0.0, 0.0], [0.0, 1.0, -1.0]]))
115 |       #Nx.Tensor<
116 |         f32[3][3]
117 |         [
118 |           [1.0, 0.0, 1.0],
119 |           [1.0, 0.0, 0.0],
120 |           [0.0, 1.0, 0.0]
121 |         ]
122 |       >
123 | 
124 |       iex> Scholar.Preprocessing.binarize(Nx.tensor([[1.0, -1.0, 2.0], [2.0, 0.0, 0.0], [0.0, 1.0, -1.0]]), threshold: 1.3, type: {:u, 8})
125 |       #Nx.Tensor<
126 |         u8[3][3]
127 |         [
128 |           [0, 0, 1],
129 |           [1, 0, 0],
130 |           [0, 0, 0]
131 |         ]
132 |       >
133 |   """
134 |   deftransform binarize(tensor, opts \\ []) do
135 |     binarize_n(tensor, NimbleOptions.validate!(opts, @binarize_schema))
136 |   end
137 | 
138 |   defnp binarize_n(tensor, opts) do
139 |     (tensor > opts[:threshold]) |> Nx.as_type(opts[:type])
140 |   end
141 | 
142 |   @doc """
143 |   It is a shortcut for `Scholar.Preprocessing.OrdinalEncoder.fit_transform/1`.
144 | 
145 |   See `Scholar.Preprocessing.OrdinalEncoder` for more information.
146 | 
147 |   ## Examples
148 | 
149 |       iex> Scholar.Preprocessing.ordinal_encode(Nx.tensor([3, 2, 4, 56, 2, 4, 2]))
150 |       #Nx.Tensor<
151 |         u64[7]
152 |         [1, 0, 2, 3, 0, 2, 0]
153 |       >
154 |   """
155 |   defn ordinal_encode(tensor) do
156 |     Scholar.Preprocessing.OrdinalEncoder.fit_transform(tensor)
157 |   end
158 | 
159 |   @doc """
160 |   It is a shortcut for `Scholar.Preprocessing.OneHotEncoder.fit_transform/2`.
161 |   See `Scholar.Preprocessing.OneHotEncoder` for more information.
162 | 
163 |   ## Examples
164 | 
165 |       iex> Scholar.Preprocessing.one_hot_encode(Nx.tensor([2, 0, 3, 2, 1, 1, 0]), num_categories: 4)
166 |       #Nx.Tensor<
167 |         u8[7][4]
168 |         [
169 |           [0, 0, 1, 0],
170 |           [1, 0, 0, 0],
171 |           [0, 0, 0, 1],
172 |           [0, 0, 1, 0],
173 |           [0, 1, 0, 0],
174 |           [0, 1, 0, 0],
175 |           [1, 0, 0, 0]
176 |         ]
177 |       >
178 |   """
179 |   defn one_hot_encode(tensor, opts) do
180 |     Scholar.Preprocessing.OneHotEncoder.fit_transform(tensor, opts)
181 |   end
182 | 
183 |   @doc """
184 |   Normalize samples individually to unit norm.
185 | 
186 |   The zero-tensors cannot be normalized and they stay the same
187 |   after normalization.
188 | 
189 |   It is a shortcut for `Scholar.Preprocessing.Normalizer.fit_transform/2`.
190 |   See `Scholar.Preprocessing.Normalizer` for more information.
191 | 
192 |   ## Examples
193 | 
194 |       iex> Scholar.Preprocessing.normalize(Nx.tensor([[0, 0, 0], [3, 4, 5], [-2, 4, 3]]), axes: [1])
195 |       #Nx.Tensor<
196 |         f32[3][3]
197 |         [
198 |           [0.0, 0.0, 0.0],
199 |           [0.4242640733718872, 0.5656854510307312, 0.7071067690849304],
200 |           [-0.3713906705379486, 0.7427813410758972, 0.5570860505104065]
201 |         ]
202 |       >
203 | 
204 |       iex> Scholar.Preprocessing.normalize(Nx.tensor([[0, 0, 0], [3, 4, 5], [-2, 4, 3]]))
205 |       #Nx.Tensor<
206 |         f32[3][3]
207 |         [
208 |           [0.0, 0.0, 0.0],
209 |           [0.3375263810157776, 0.4500351846218109, 0.5625439882278442],
210 |           [-0.22501759231090546, 0.4500351846218109, 0.3375263810157776]
211 |         ]
212 |       >
213 |   """
214 |   deftransform normalize(tensor, opts \\ []) do
215 |     Scholar.Preprocessing.Normalizer.fit_transform(tensor, opts)
216 |   end
217 | end
218 | 


--------------------------------------------------------------------------------
/lib/scholar/preprocessing/binarizer.ex:
--------------------------------------------------------------------------------
 1 | defmodule Scholar.Preprocessing.Binarizer do
 2 |   @moduledoc """
 3 |   Binarize data according to a threshold.
 4 |   """
 5 |   import Nx.Defn
 6 | 
 7 |   binarize_schema = [
 8 |     threshold: [
 9 |       type: :float,
10 |       default: 0.0,
11 |       doc: """
12 |       Feature values below or equal to this are replaced by 0, above it by 1.
13 |         Threshold may not be less than 0 for operations on sparse matrices.
14 |       """
15 |     ]
16 |   ]
17 | 
18 |   @binarize_schema NimbleOptions.new!(binarize_schema)
19 | 
20 |   @doc """
21 |   Values greater than the threshold map to 1, while values less than
22 |     or equal to the threshold map to 0. With the default threshold of 0,
23 |     only positive values map to 1.
24 |   ## Options
25 |   #{NimbleOptions.docs(@binarize_schema)}
26 |   ## Examples
27 |       iex> t = Nx.tensor([[0, 0, 0], [3, 4, 5], [-2, 4, 3]])
28 |       iex> Scholar.Preprocessing.Binarizer.fit_transform(t, threshold: 3.0)
29 |       #Nx.Tensor<
30 |         u8[3][3]
31 |         [
32 |           [0, 0, 0],
33 |           [0, 1, 1],
34 |           [0, 1, 0]
35 |         ]
36 |       >
37 |       iex> t = Nx.tensor([[0, 0, 0], [3, 4, 5], [-2, 4, 3]])
38 |       iex> Scholar.Preprocessing.Binarizer.fit_transform(t,threshold: 0.4)
39 |       #Nx.Tensor<
40 |         u8[3][3]
41 |         [
42 |           [0, 0, 0],
43 |           [1, 1, 1],
44 |           [0, 1, 1]
45 |         ]
46 |       >
47 |   """
48 |   deftransform fit_transform(tensor, opts \\ []) do
49 |     binarize_n(tensor, NimbleOptions.validate!(opts, @binarize_schema))
50 |   end
51 | 
52 |   defnp binarize_n(tensor, opts) do
53 |     threshold = opts[:threshold]
54 |     tensor > threshold
55 |   end
56 | end
57 | 


--------------------------------------------------------------------------------
/lib/scholar/preprocessing/max_abs_scaler.ex:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Preprocessing.MaxAbsScaler do
  2 |   @moduledoc """
  3 |   Scales a tensor by dividing each sample in batch by the maximum absolute value in the batch.
  4 | 
  5 |   Centering and scaling happen independently on each feature by computing the relevant
  6 |   statistics on the samples in the training set. The maximum absolute value is then
  7 |   stored to be used on new samples.
  8 |   """
  9 | 
 10 |   import Nx.Defn
 11 | 
 12 |   @derive {Nx.Container, containers: [:max_abs]}
 13 |   defstruct [:max_abs]
 14 | 
 15 |   opts_schema = [
 16 |     axes: [
 17 |       type: {:custom, Scholar.Options, :axes, []},
 18 |       doc: """
 19 |       Axes to calculate the max absolute value over. By default the absolute values
 20 |       are calculated between the whole tensors.
 21 |       """
 22 |     ]
 23 |   ]
 24 | 
 25 |   @opts_schema NimbleOptions.new!(opts_schema)
 26 | 
 27 |   @doc """
 28 |   Compute the maximum absolute value of samples to be used for later scaling.
 29 | 
 30 |   ## Options
 31 | 
 32 |   #{NimbleOptions.docs(@opts_schema)}
 33 | 
 34 |   ## Return values
 35 | 
 36 |     Returns a struct with the following parameters:
 37 | 
 38 |     * `max_abs`: the calculated maximum absolute value of samples.
 39 | 
 40 |   ## Examples
 41 | 
 42 |       iex> t = Nx.tensor([[1, -1, 2], [2, 0, 0], [0, 1, -1]])
 43 |       iex> Scholar.Preprocessing.MaxAbsScaler.fit(t)
 44 |       %Scholar.Preprocessing.MaxAbsScaler{
 45 |         max_abs: Nx.tensor(
 46 |           [
 47 |             [2]
 48 |           ]
 49 |         )
 50 |       }
 51 |   """
 52 |   deftransform fit(tensor, opts \\ []) do
 53 |     fit_n(tensor, NimbleOptions.validate!(opts, @opts_schema))
 54 |   end
 55 | 
 56 |   defnp fit_n(tensor, opts) do
 57 |     max_abs =
 58 |       Nx.abs(tensor)
 59 |       |> Nx.reduce_max(axes: opts[:axes], keep_axes: true)
 60 | 
 61 |     max_abs = Nx.select(max_abs == 0, 1, max_abs)
 62 | 
 63 |     %__MODULE__{max_abs: max_abs}
 64 |   end
 65 | 
 66 |   @doc """
 67 |   Performs the standardization of the tensor using a fitted scaler.
 68 | 
 69 |   ## Examples
 70 | 
 71 |       iex> t = Nx.tensor([[1, -1, 2], [2, 0, 0], [0, 1, -1]])
 72 |       iex> scaler = Scholar.Preprocessing.MaxAbsScaler.fit(t)
 73 |       iex> Scholar.Preprocessing.MaxAbsScaler.transform(scaler, t)
 74 |       #Nx.Tensor<
 75 |         f32[3][3]
 76 |         [
 77 |           [0.5, -0.5, 1.0],
 78 |           [1.0, 0.0, 0.0],
 79 |           [0.0, 0.5, -0.5]
 80 |         ]
 81 |       >
 82 |       iex> t = Nx.tensor([[1, -1, 2], [2, 0, 0], [0, 1, -1]])
 83 |       iex> scaler = Scholar.Preprocessing.MaxAbsScaler.fit(t)
 84 |       iex> new_tensor = Nx.tensor([[0.5, 1, -1], [0.3, 0.8, -1.6]])
 85 |       iex> Scholar.Preprocessing.MaxAbsScaler.transform(scaler, new_tensor)
 86 |       #Nx.Tensor<
 87 |         f32[2][3]
 88 |         [
 89 |           [0.25, 0.5, -0.5],
 90 |           [0.15000000596046448, 0.4000000059604645, -0.800000011920929]
 91 |         ]
 92 |       >
 93 |   """
 94 |   defn transform(%__MODULE__{max_abs: max_abs}, tensor) do
 95 |     tensor / max_abs
 96 |   end
 97 | 
 98 |   @doc """
 99 |   Standardizes the tensor by removing the mean and scaling to unit variance.
100 | 
101 |   ## Examples
102 | 
103 |       iex> t = Nx.tensor([[1, -1, 2], [2, 0, 0], [0, 1, -1]])
104 |       iex> Scholar.Preprocessing.MaxAbsScaler.fit_transform(t)
105 |       #Nx.Tensor<
106 |         f32[3][3]
107 |         [
108 |           [0.5, -0.5, 1.0],
109 |           [1.0, 0.0, 0.0],
110 |           [0.0, 0.5, -0.5]
111 |         ]
112 |       >
113 |   """
114 |   defn fit_transform(tensor, opts \\ []) do
115 |     tensor
116 |     |> fit(opts)
117 |     |> transform(tensor)
118 |   end
119 | end
120 | 


--------------------------------------------------------------------------------
/lib/scholar/preprocessing/min_max_scaler.ex:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Preprocessing.MinMaxScaler do
  2 |   @moduledoc """
  3 |   Scales a tensor by dividing each sample in batch by maximum absolute value in the batch
  4 | 
  5 |   Centering and scaling happen independently on each feature by computing the relevant
  6 |   statistics on the samples in the training set. Maximum absolute value then is
  7 |   stored to be used on new samples.
  8 |   """
  9 | 
 10 |   import Nx.Defn
 11 | 
 12 |   @derive {Nx.Container, containers: [:min_data, :max_data, :min_bound, :max_bound]}
 13 |   defstruct [:min_data, :max_data, :min_bound, :max_bound]
 14 | 
 15 |   opts_schema = [
 16 |     axes: [
 17 |       type: {:custom, Scholar.Options, :axes, []},
 18 |       doc: """
 19 |       Axes to calculate the max absolute value over. By default the absolute values
 20 |       are calculated between the whole tensors.
 21 |       """
 22 |     ],
 23 |     min_bound: [
 24 |       type: {:or, [:integer, :float]},
 25 |       default: 0,
 26 |       doc: """
 27 |       The lower boundary of the desired range of transformed data.
 28 |       """
 29 |     ],
 30 |     max_bound: [
 31 |       type: {:or, [:integer, :float]},
 32 |       default: 1,
 33 |       doc: """
 34 |       The upper boundary of the desired range of transformed data.
 35 |       """
 36 |     ]
 37 |   ]
 38 | 
 39 |   @opts_schema NimbleOptions.new!(opts_schema)
 40 | 
 41 |   @doc """
 42 |   Compute the maximum absolute value of samples to be used for later scaling.
 43 | 
 44 |   ## Options
 45 | 
 46 |   #{NimbleOptions.docs(@opts_schema)}
 47 | 
 48 |   ## Return values
 49 | 
 50 |     Returns a struct with the following parameters:
 51 | 
 52 |     * `min_data`: the calculated minimum value of samples.
 53 | 
 54 |     * `max_data`: the calculated maximum value of samples.
 55 | 
 56 |     * `min_bound`: The lower boundary of the desired range of transformed data.
 57 | 
 58 |     * `max_bound`: The upper boundary of the desired range of transformed data.
 59 | 
 60 |   ## Examples
 61 | 
 62 |       iex> t = Nx.tensor([[1, -1, 2], [2, 0, 0], [0, 1, -1]])
 63 |       iex> Scholar.Preprocessing.MinMaxScaler.fit(t)
 64 |       %Scholar.Preprocessing.MinMaxScaler{
 65 |         min_data: Nx.tensor(
 66 |           [
 67 |             [-1]
 68 |           ]
 69 |         ),
 70 |         max_data: Nx.tensor(
 71 |           [
 72 |             [2]
 73 |           ]
 74 |         ),
 75 |         min_bound: Nx.tensor(
 76 |           0
 77 |         ),
 78 |         max_bound: Nx.tensor(
 79 |           1
 80 |         )
 81 |       }
 82 |   """
 83 |   deftransform fit(tensor, opts \\ []) do
 84 |     fit_n(tensor, NimbleOptions.validate!(opts, @opts_schema))
 85 |   end
 86 | 
 87 |   defnp fit_n(tensor, opts) do
 88 |     if opts[:max_bound] <= opts[:min_bound] do
 89 |       raise ArgumentError,
 90 |             "expected :max to be greater than :min"
 91 |     else
 92 |       reduced_max = Nx.reduce_max(tensor, axes: opts[:axes], keep_axes: true)
 93 |       reduced_min = Nx.reduce_min(tensor, axes: opts[:axes], keep_axes: true)
 94 | 
 95 |       %__MODULE__{
 96 |         min_data: reduced_min,
 97 |         max_data: reduced_max,
 98 |         min_bound: opts[:min_bound],
 99 |         max_bound: opts[:max_bound]
100 |       }
101 |     end
102 |   end
103 | 
104 |   @doc """
105 |   Performs the standardization of the tensor using a fitted scaler.
106 | 
107 |   ## Examples
108 | 
109 |       iex> t = Nx.tensor([[1, -1, 2], [2, 0, 0], [0, 1, -1]])
110 |       iex> scaler = Scholar.Preprocessing.MinMaxScaler.fit(t)
111 |       iex> Scholar.Preprocessing.MinMaxScaler.transform(scaler, t)
112 |       #Nx.Tensor<
113 |         f32[3][3]
114 |         [
115 |           [0.6666666865348816, 0.0, 1.0],
116 |           [1.0, 0.3333333432674408, 0.3333333432674408],
117 |           [0.3333333432674408, 0.6666666865348816, 0.0]
118 |         ]
119 |       >
120 | 
121 |       iex> t = Nx.tensor([[1, -1, 2], [2, 0, 0], [0, 1, -1]])
122 |       iex> scaler = Scholar.Preprocessing.MinMaxScaler.fit(t)
123 |       iex> new_tensor = Nx.tensor([[0.5, 1, -1], [0.3, 0.8, -1.6]])
124 |       iex> Scholar.Preprocessing.MinMaxScaler.transform(scaler, new_tensor)
125 |       #Nx.Tensor<
126 |         f32[2][3]
127 |         [
128 |           [0.5, 0.6666666865348816, 0.0],
129 |           [0.43333330750465393, 0.5999999642372131, -0.20000000298023224]
130 |         ]
131 |       >
132 |   """
133 |   defn transform(
134 |          %__MODULE__{
135 |            min_data: min_data,
136 |            max_data: max_data,
137 |            min_bound: min_bound,
138 |            max_bound: max_bound
139 |          },
140 |          tensor
141 |        ) do
142 |     denominator = max_data - min_data
143 |     denominator = Nx.select(denominator == 0, 1, denominator)
144 |     x_std = (tensor - min_data) / denominator
145 |     x_std * (max_bound - min_bound) + min_bound
146 |   end
147 | 
148 |   @doc """
149 |   Standardizes the tensor by removing the mean and scaling to unit variance.
150 | 
151 |   ## Examples
152 | 
153 |       iex> t = Nx.tensor([[1, -1, 2], [2, 0, 0], [0, 1, -1]])
154 |       iex> Scholar.Preprocessing.MinMaxScaler.fit_transform(t)
155 |       #Nx.Tensor<
156 |         f32[3][3]
157 |         [
158 |           [0.6666666865348816, 0.0, 1.0],
159 |           [1.0, 0.3333333432674408, 0.3333333432674408],
160 |           [0.3333333432674408, 0.6666666865348816, 0.0]
161 |         ]
162 |       >
163 |   """
164 |   defn fit_transform(tensor, opts \\ []) do
165 |     tensor
166 |     |> fit(opts)
167 |     |> transform(tensor)
168 |   end
169 | end
170 | 


--------------------------------------------------------------------------------
/lib/scholar/preprocessing/normalizer.ex:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Preprocessing.Normalizer do
  2 |   @moduledoc """
  3 |   Implements functionality for rescaling tensor to unit norm. It enables to apply normalization along any combination of axes.
  4 |   """
  5 |   import Nx.Defn
  6 |   import Scholar.Shared
  7 | 
  8 |   normalize_schema = [
  9 |     axes: [
 10 |       type: {:custom, Scholar.Options, :axes, []},
 11 |       doc: """
 12 |       Axes to calculate the distance over. By default the distance
 13 |       is calculated between the whole tensors.
 14 |       """
 15 |     ],
 16 |     norm: [
 17 |       type: {:in, [:euclidean, :chebyshev, :manhattan]},
 18 |       default: :euclidean,
 19 |       doc: """
 20 |       The norm to use to normalize each non zero sample.
 21 |       Possible options are `:euclidean`, `:manhattan`, and `:chebyshev`
 22 |       """
 23 |     ]
 24 |   ]
 25 | 
 26 |   @normalize_schema NimbleOptions.new!(normalize_schema)
 27 | 
 28 |   @doc """
 29 |   Normalize samples individually to unit norm.
 30 | 
 31 |   The zero-tensors cannot be normalized and they stay the same
 32 |   after normalization.
 33 | 
 34 |   ## Options
 35 | 
 36 |   #{NimbleOptions.docs(@normalize_schema)}
 37 | 
 38 |   ## Examples
 39 | 
 40 |       iex> t = Nx.tensor([[0, 0, 0], [3, 4, 5], [-2, 4, 3]])
 41 |       iex> Scholar.Preprocessing.Normalizer.fit_transform(t, axes: [1])
 42 |       #Nx.Tensor<
 43 |         f32[3][3]
 44 |         [
 45 |           [0.0, 0.0, 0.0],
 46 |           [0.4242640733718872, 0.5656854510307312, 0.7071067690849304],
 47 |           [-0.3713906705379486, 0.7427813410758972, 0.5570860505104065]
 48 |         ]
 49 |       >
 50 | 
 51 |       iex> t = Nx.tensor([[0, 0, 0], [3, 4, 5], [-2, 4, 3]])
 52 |       iex> Scholar.Preprocessing.Normalizer.fit_transform(t)
 53 |       #Nx.Tensor<
 54 |         f32[3][3]
 55 |         [
 56 |           [0.0, 0.0, 0.0],
 57 |           [0.3375263810157776, 0.4500351846218109, 0.5625439882278442],
 58 |           [-0.22501759231090546, 0.4500351846218109, 0.3375263810157776]
 59 |         ]
 60 |       >
 61 |   """
 62 |   deftransform fit_transform(tensor, opts \\ []) do
 63 |     normalize_n(tensor, NimbleOptions.validate!(opts, @normalize_schema))
 64 |   end
 65 | 
 66 |   defnp normalize_n(tensor, opts) do
 67 |     shape = Nx.shape(tensor)
 68 |     type = to_float_type(tensor)
 69 |     zeros = Nx.broadcast(Nx.tensor(0.0, type: type), shape)
 70 | 
 71 |     norm =
 72 |       case opts[:norm] do
 73 |         :euclidean ->
 74 |           Scholar.Metrics.Distance.euclidean(tensor, zeros, axes: opts[:axes])
 75 | 
 76 |         :manhattan ->
 77 |           Scholar.Metrics.Distance.manhattan(tensor, zeros, axes: opts[:axes])
 78 | 
 79 |         :chebyshev ->
 80 |           Scholar.Metrics.Distance.chebyshev(tensor, zeros, axes: opts[:axes])
 81 | 
 82 |         other ->
 83 |           raise ArgumentError,
 84 |                 "expected :norm to be one of: :euclidean, :manhattan, and :chebyshev, got: #{inspect(other)}"
 85 |       end
 86 | 
 87 |     shape_to_broadcast = unsqueezed_reduced_shape(shape, opts[:axes])
 88 | 
 89 |     norm =
 90 |       Nx.select(norm == 0.0, Nx.tensor(1.0, type: type), norm) |> Nx.reshape(shape_to_broadcast)
 91 | 
 92 |     tensor / norm
 93 |   end
 94 | 
 95 |   deftransformp unsqueezed_reduced_shape(shape, axes) do
 96 |     if axes != nil do
 97 |       Enum.reduce(axes, shape, &put_elem(&2, &1, 1))
 98 |     else
 99 |       Tuple.duplicate(1, Nx.rank(shape))
100 |     end
101 |   end
102 | end
103 | 


--------------------------------------------------------------------------------
/lib/scholar/preprocessing/one_hot_encoder.ex:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Preprocessing.OneHotEncoder do
  2 |   @moduledoc """
  3 |   Implements encoder that converts integer value (substitute of categorical data in tensors) into 0-1 vector.
  4 |   The index of 1 in the vector is aranged in sorted manner. This means that for x < y => one_index(x) < one_index(y).
  5 | 
  6 |   Currently the module supports only 1D tensors.
  7 |   """
  8 |   import Nx.Defn
  9 | 
 10 |   @derive {Nx.Container, containers: [:ordinal_encoder]}
 11 |   defstruct [:ordinal_encoder]
 12 | 
 13 |   encode_schema = [
 14 |     num_categories: [
 15 |       required: true,
 16 |       type: :pos_integer,
 17 |       doc: """
 18 |       The number of categories to be encoded.
 19 |       """
 20 |     ]
 21 |   ]
 22 | 
 23 |   @encode_schema NimbleOptions.new!(encode_schema)
 24 | 
 25 |   @doc """
 26 |   Creates mapping from values into one-hot vectors.
 27 | 
 28 |   ## Options
 29 | 
 30 |   #{NimbleOptions.docs(@encode_schema)}
 31 | 
 32 |   ## Examples
 33 | 
 34 |       iex> tensor = Nx.tensor([3, 2, 4, 56, 2, 4, 2])
 35 |       iex> Scholar.Preprocessing.OneHotEncoder.fit(tensor, num_categories: 4)
 36 |       %Scholar.Preprocessing.OneHotEncoder{
 37 |         ordinal_encoder: %Scholar.Preprocessing.OrdinalEncoder{
 38 |           categories: Nx.tensor([2, 3, 4, 56]
 39 |           )
 40 |         }
 41 |       }
 42 |   """
 43 |   deftransform fit(tensor, opts) do
 44 |     if Nx.rank(tensor) != 1 do
 45 |       raise ArgumentError,
 46 |             """
 47 |             expected input tensor to have shape {num_samples}, \
 48 |             got tensor with shape: #{inspect(Nx.shape(tensor))}
 49 |             """
 50 |     end
 51 | 
 52 |     opts = NimbleOptions.validate!(opts, @encode_schema)
 53 | 
 54 |     fit_n(tensor, opts)
 55 |   end
 56 | 
 57 |   defnp fit_n(tensor, opts) do
 58 |     ordinal_encoder = Scholar.Preprocessing.OrdinalEncoder.fit(tensor, opts)
 59 |     %__MODULE__{ordinal_encoder: ordinal_encoder}
 60 |   end
 61 | 
 62 |   @doc """
 63 |   Encode labels as a one-hot numeric tensor. All values provided to `transform/2` must be seen
 64 |   in `fit/2` function, otherwise an error occurs.
 65 | 
 66 |   ## Examples
 67 | 
 68 |       iex> tensor = Nx.tensor([3, 2, 4, 56, 2, 4, 2])
 69 |       iex> encoder = Scholar.Preprocessing.OneHotEncoder.fit(tensor, num_categories: 4)
 70 |       iex> Scholar.Preprocessing.OneHotEncoder.transform(encoder, tensor)
 71 |       #Nx.Tensor<
 72 |         u8[7][4]
 73 |         [
 74 |           [0, 1, 0, 0],
 75 |           [1, 0, 0, 0],
 76 |           [0, 0, 1, 0],
 77 |           [0, 0, 0, 1],
 78 |           [1, 0, 0, 0],
 79 |           [0, 0, 1, 0],
 80 |           [1, 0, 0, 0]
 81 |         ]
 82 |       >
 83 | 
 84 |       iex> tensor = Nx.tensor([3, 2, 4, 56, 2, 4, 2])
 85 |       iex> encoder = Scholar.Preprocessing.OneHotEncoder.fit(tensor, num_categories: 4)
 86 |       iex> new_tensor = Nx.tensor([2, 3, 4, 3, 4, 56, 2])
 87 |       iex> Scholar.Preprocessing.OneHotEncoder.transform(encoder, new_tensor)
 88 |       #Nx.Tensor<
 89 |         u8[7][4]
 90 |         [
 91 |           [1, 0, 0, 0],
 92 |           [0, 1, 0, 0],
 93 |           [0, 0, 1, 0],
 94 |           [0, 1, 0, 0],
 95 |           [0, 0, 1, 0],
 96 |           [0, 0, 0, 1],
 97 |           [1, 0, 0, 0]
 98 |         ]
 99 |       >
100 |   """
101 |   defn transform(%__MODULE__{ordinal_encoder: ordinal_encoder}, tensor) do
102 |     num_categories = Nx.size(ordinal_encoder.categories)
103 |     num_samples = Nx.size(tensor)
104 | 
105 |     encoded =
106 |       ordinal_encoder
107 |       |> Scholar.Preprocessing.OrdinalEncoder.transform(tensor)
108 |       |> Nx.new_axis(1)
109 |       |> Nx.broadcast({num_samples, num_categories})
110 | 
111 |     encoded == Nx.iota({num_samples, num_categories}, axis: 1)
112 |   end
113 | 
114 |   @doc """
115 |   Appl
116 |    encoding on the provided tensor directly. It's equivalent to `fit/2` and then `transform/2` on the same data.
117 | 
118 |   ## Examples
119 | 
120 |       iex> tensor = Nx.tensor([3, 2, 4, 56, 2, 4, 2])
121 |       iex> Scholar.Preprocessing.OneHotEncoder.fit_transform(tensor, num_categories: 4)
122 |       #Nx.Tensor<
123 |         u8[7][4]
124 |         [
125 |           [0, 1, 0, 0],
126 |           [1, 0, 0, 0],
127 |           [0, 0, 1, 0],
128 |           [0, 0, 0, 1],
129 |           [1, 0, 0, 0],
130 |           [0, 0, 1, 0],
131 |           [1, 0, 0, 0]
132 |         ]
133 |       >
134 |   """
135 |   deftransform fit_transform(tensor, opts) do
136 |     if Nx.rank(tensor) != 1 do
137 |       raise ArgumentError,
138 |             """
139 |             expected input tensor to have shape {num_samples}, \
140 |             got tensor with shape: #{inspect(Nx.shape(tensor))}
141 |             """
142 |     end
143 | 
144 |     opts = NimbleOptions.validate!(opts, @encode_schema)
145 |     fit_transform_n(tensor, opts)
146 |   end
147 | 
148 |   defnp fit_transform_n(tensor, opts) do
149 |     num_samples = Nx.size(tensor)
150 |     num_categories = opts[:num_categories]
151 | 
152 |     encoded =
153 |       tensor
154 |       |> Scholar.Preprocessing.OrdinalEncoder.fit_transform()
155 |       |> Nx.new_axis(1)
156 |       |> Nx.broadcast({num_samples, num_categories})
157 | 
158 |     encoded == Nx.iota({num_samples, num_categories}, axis: 1)
159 |   end
160 | end
161 | 


--------------------------------------------------------------------------------
/lib/scholar/preprocessing/robust_scaler.ex:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Preprocessing.RobustScaler do
  2 |   @moduledoc ~S"""
  3 |   Scale features using statistics that are robust to outliers.
  4 | 
  5 |   This Scaler removes the median and scales the data according to
  6 |   the quantile range (defaults to IQR: Interquartile Range).
  7 |   The IQR is the range between the 1st quartile (25th quantile)
  8 |   and the 3rd quartile (75th quantile).
  9 |   """
 10 | 
 11 |   import Nx.Defn
 12 | 
 13 |   @derive {Nx.Container, containers: [:medians, :iqr]}
 14 |   defstruct [:medians, :iqr]
 15 | 
 16 |   opts_schema = [
 17 |     quantile_range: [
 18 |       type: {:custom, Scholar.Options, :quantile_range, []},
 19 |       default: {25.0, 75.0},
 20 |       doc: """
 21 |       Quantile range as a tuple {q_min, q_max} defining the range of quantiles
 22 |       to include. Must satisfy 0.0 < q_min < q_max < 100.0.
 23 |       """
 24 |     ]
 25 |   ]
 26 | 
 27 |   @opts_schema NimbleOptions.new!(opts_schema)
 28 | 
 29 |   @doc """
 30 |   Compute the median and quantiles to be used for scaling.
 31 | 
 32 |   ## Options
 33 | 
 34 |   #{NimbleOptions.docs(@opts_schema)}
 35 | 
 36 |   ## Return values
 37 | 
 38 |   Returns a struct with the following parameters:
 39 | 
 40 |   * `:iqr` - the calculated interquartile range.
 41 | 
 42 |   * `:medians` - the calculated medians of each feature across samples.
 43 | 
 44 |   ## Examples
 45 | 
 46 |       iex> Scholar.Preprocessing.RobustScaler.fit(Nx.tensor([[1, -1, 2], [2, 0, 0], [0, 1, -1]]))
 47 |       %Scholar.Preprocessing.RobustScaler{
 48 |         medians: Nx.tensor([1, 0, 0]),
 49 |         iqr: Nx.tensor([1.0, 1.0, 1.5])
 50 |       }
 51 |   """
 52 |   deftransform fit(tensor, opts \\ []) do
 53 |     fit_n(tensor, NimbleOptions.validate!(opts, @opts_schema))
 54 |   end
 55 | 
 56 |   defnp fit_n(tensor, opts) do
 57 |     check_for_rank(tensor)
 58 | 
 59 |     {q_min, q_max} = opts[:quantile_range]
 60 | 
 61 |     medians = Nx.median(tensor, axis: 0)
 62 | 
 63 |     sorted_tensor = Nx.sort(tensor, axis: 0)
 64 | 
 65 |     q_min = percentile(sorted_tensor, q_min)
 66 |     q_max = percentile(sorted_tensor, q_max)
 67 | 
 68 |     iqr = q_max - q_min
 69 | 
 70 |     %__MODULE__{medians: medians, iqr: iqr}
 71 |   end
 72 | 
 73 |   @doc """
 74 |   Performs centering and scaling of the tensor using a fitted scaler.
 75 | 
 76 |   ## Examples
 77 | 
 78 |       iex> t = Nx.tensor([[1, -1, 2], [2, 0, 0], [0, 1, -1]])
 79 |       iex> scaler = Scholar.Preprocessing.RobustScaler.fit(t)
 80 |       %Scholar.Preprocessing.RobustScaler{
 81 |         medians: Nx.tensor([1, 0, 0]),
 82 |         iqr: Nx.tensor([1.0, 1.0, 1.5])
 83 |       }
 84 |       iex> Scholar.Preprocessing.RobustScaler.transform(scaler, t)
 85 |       #Nx.Tensor<
 86 |         f32[3][3]
 87 |         [
 88 |           [0.0, -1.0, 1.3333333730697632],
 89 |           [1.0, 0.0, 0.0],
 90 |           [-1.0, 1.0, -0.6666666865348816]
 91 |         ]
 92 |       >
 93 |   """
 94 |   defn transform(%__MODULE__{medians: medians, iqr: iqr}, tensor) do
 95 |     check_for_rank(tensor)
 96 |     scale(tensor, medians, iqr)
 97 |   end
 98 | 
 99 |   @doc """
100 |   Computes the scaling parameters and applies them to transform the tensor.
101 | 
102 |   ## Examples
103 | 
104 |       iex> t = Nx.tensor([[1, -1, 2], [2, 0, 0], [0, 1, -1]])
105 |       iex> Scholar.Preprocessing.RobustScaler.fit_transform(t)
106 |       #Nx.Tensor<
107 |         f32[3][3]
108 |         [
109 |           [0.0, -1.0, 1.3333333730697632],
110 |           [1.0, 0.0, 0.0],
111 |           [-1.0, 1.0, -0.6666666865348816]
112 |         ]
113 |       >
114 |   """
115 |   defn fit_transform(tensor, opts \\ []) do
116 |     tensor
117 |     |> fit(opts)
118 |     |> transform(tensor)
119 |   end
120 | 
121 |   defnp scale(tensor, medians, iqr) do
122 |     (tensor - medians) / Nx.select(iqr == 0, 1.0, iqr)
123 |   end
124 | 
125 |   defnp percentile(sorted_tensor, p) do
126 |     num_rows = Nx.axis_size(sorted_tensor, 0)
127 |     idx = p / 100 * (num_rows - 1)
128 | 
129 |     lower_idx = Nx.floor(idx) |> Nx.as_type(:s64)
130 |     upper_idx = Nx.ceil(idx) |> Nx.as_type(:s64)
131 | 
132 |     lower_values = Nx.take(sorted_tensor, lower_idx, axis: 0)
133 |     upper_values = Nx.take(sorted_tensor, upper_idx, axis: 0)
134 | 
135 |     weight_upper = idx - Nx.floor(idx)
136 |     weight_lower = 1.0 - weight_upper
137 |     lower_values * weight_lower + upper_values * weight_upper
138 |   end
139 | 
140 |   defnp check_for_rank(tensor) do
141 |     if Nx.rank(tensor) != 2 do
142 |       raise ArgumentError,
143 |             """
144 |             expected tensor to have shape {num_samples, num_features}, \
145 |             got tensor with shape: #{inspect(Nx.shape(tensor))}\
146 |             """
147 |     end
148 |   end
149 | end
150 | 


--------------------------------------------------------------------------------
/lib/scholar/preprocessing/standard_scaler.ex:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Preprocessing.StandardScaler do
  2 |   @moduledoc ~S"""
  3 |   Standardizes the tensor by removing the mean and scaling to unit variance.
  4 | 
  5 |   Formula for input tensor $x$:
  6 | 
  7 |   $$
  8 |   z = \frac{x - \mu}{\sigma}
  9 |   $$
 10 | 
 11 |   Where $\mu$ is the mean of the samples, and $\sigma$ is the standard deviation.
 12 |   Standardization can be helpful in cases where the data follows
 13 |   a Gaussian distribution (or Normal distribution) without outliers.
 14 | 
 15 |   Centering and scaling happen independently on each feature by computing the relevant
 16 |   statistics on the samples in the training set. Mean and standard deviation are then
 17 |   stored to be used on new samples.
 18 |   """
 19 | 
 20 |   import Nx.Defn
 21 | 
 22 |   @derive {Nx.Container, containers: [:standard_deviation, :mean]}
 23 |   defstruct [:standard_deviation, :mean]
 24 | 
 25 |   opts_schema = [
 26 |     axes: [
 27 |       type: {:custom, Scholar.Options, :axes, []},
 28 |       doc: """
 29 |       Axes to calculate the distance over. By default the distance
 30 |       is calculated between the whole tensors.
 31 |       """
 32 |     ]
 33 |   ]
 34 | 
 35 |   @opts_schema NimbleOptions.new!(opts_schema)
 36 | 
 37 |   @doc """
 38 |   Compute the standard deviation and mean of samples to be used for later scaling.
 39 | 
 40 |   ## Options
 41 | 
 42 |   #{NimbleOptions.docs(@opts_schema)}
 43 | 
 44 |   ## Return values
 45 | 
 46 |   Returns a struct with the following parameters:
 47 | 
 48 |     * `standard_deviation`: the calculated standard deviation of samples.
 49 | 
 50 |     * `mean`: the calculated mean of samples.
 51 | 
 52 |   ## Examples
 53 | 
 54 |       iex> Scholar.Preprocessing.StandardScaler.fit(Nx.tensor([[1, -1, 2], [2, 0, 0], [0, 1, -1]]))
 55 |       %Scholar.Preprocessing.StandardScaler{
 56 |         standard_deviation: Nx.tensor(
 57 |           [
 58 |             [1.0657403469085693]
 59 |           ]
 60 |         ),
 61 |         mean: Nx.tensor(
 62 |           [
 63 |             [0.4444444477558136]
 64 |           ]
 65 |         )
 66 |       }
 67 |   """
 68 |   deftransform fit(tensor, opts \\ []) do
 69 |     NimbleOptions.validate!(opts, @opts_schema)
 70 |     fit_n(tensor, opts)
 71 |   end
 72 | 
 73 |   defnp fit_n(tensor, opts) do
 74 |     std = Nx.standard_deviation(tensor, axes: opts[:axes], keep_axes: true)
 75 |     mean_reduced = Nx.mean(tensor, axes: opts[:axes], keep_axes: true)
 76 |     mean_reduced = Nx.select(std == 0, 0.0, mean_reduced)
 77 |     %__MODULE__{standard_deviation: std, mean: mean_reduced}
 78 |   end
 79 | 
 80 |   @doc """
 81 |   Performs the standardization of the tensor using a fitted scaler.
 82 | 
 83 |   ## Examples
 84 | 
 85 |       iex> t = Nx.tensor([[1, -1, 2], [2, 0, 0], [0, 1, -1]])
 86 |       iex> scaler = Scholar.Preprocessing.StandardScaler.fit(t)
 87 |       %Scholar.Preprocessing.StandardScaler{
 88 |         standard_deviation: Nx.tensor(
 89 |           [
 90 |             [1.0657403469085693]
 91 |           ]
 92 |         ),
 93 |         mean: Nx.tensor(
 94 |           [
 95 |             [0.4444444477558136]
 96 |           ]
 97 |         )
 98 |       }
 99 |       iex> Scholar.Preprocessing.StandardScaler.transform(scaler, t)
100 |       #Nx.Tensor<
101 |         f32[3][3]
102 |         [
103 |           [0.5212860703468323, -1.3553436994552612, 1.4596009254455566],
104 |           [1.4596009254455566, -0.4170288145542145, -0.4170288145542145],
105 |           [-0.4170288145542145, 0.5212860703468323, -1.3553436994552612]
106 |         ]
107 |       >
108 |   """
109 |   defn transform(%__MODULE__{standard_deviation: std, mean: mean}, tensor) do
110 |     scale(tensor, std, mean)
111 |   end
112 | 
113 |   @doc """
114 |   Standardizes the tensor by removing the mean and scaling to unit variance.
115 | 
116 |   ## Examples
117 | 
118 |       iex> t = Nx.tensor([[1, -1, 2], [2, 0, 0], [0, 1, -1]])
119 |       iex> Scholar.Preprocessing.StandardScaler.fit_transform(t)
120 |       #Nx.Tensor<
121 |         f32[3][3]
122 |         [
123 |           [0.5212860703468323, -1.3553436994552612, 1.4596009254455566],
124 |           [1.4596009254455566, -0.4170288145542145, -0.4170288145542145],
125 |           [-0.4170288145542145, 0.5212860703468323, -1.3553436994552612]
126 |         ]
127 |       >
128 |   """
129 |   defn fit_transform(tensor, opts \\ []) do
130 |     tensor
131 |     |> fit(opts)
132 |     |> transform(tensor)
133 |   end
134 | 
135 |   defnp scale(tensor, std, mean) do
136 |     (tensor - mean) / Nx.select(std == 0, 1.0, std)
137 |   end
138 | end
139 | 


--------------------------------------------------------------------------------
/lib/scholar/shared.ex:
--------------------------------------------------------------------------------
 1 | defmodule Scholar.Shared do
 2 |   @moduledoc false
 3 | 
 4 |   # Collection of private helper functions and
 5 |   # macros for enforcing shape/type constraints,
 6 |   # and doing shape calculations.
 7 | 
 8 |   import Nx.Defn
 9 |   require Nx
10 | 
11 |   @doc """
12 |   Asserts `left` has same shape as `right`.
13 |   """
14 |   deftransform assert_same_shape!(left, right) do
15 |     left_shape = Nx.shape(left)
16 |     right_shape = Nx.shape(right)
17 | 
18 |     unless left_shape == right_shape do
19 |       raise ArgumentError,
20 |             "expected tensor to have shape #{inspect(left_shape)}, got tensor with shape #{inspect(right_shape)}"
21 |     end
22 |   end
23 | 
24 |   @doc """
25 |   Asserts `tensor` has rank `target_rank`.
26 |   """
27 |   deftransform assert_rank!(tensor, target_rank) do
28 |     rank = Nx.rank(tensor)
29 | 
30 |     unless rank == target_rank do
31 |       raise ArgumentError,
32 |             "expected tensor to have rank #{target_rank}, got tensor with rank #{rank}"
33 |     end
34 |   end
35 | 
36 |   @doc """
37 |   Returns the floating type of `tensor`.
38 |   """
39 |   deftransform to_float_type(tensor) do
40 |     tensor |> Nx.type() |> Nx.Type.to_floating()
41 |   end
42 | 
43 |   @doc """
44 |   Converts `tensor` to the floating type.
45 |   """
46 |   defn to_float(tensor) do
47 |     type = to_float_type(tensor)
48 |     Nx.as_type(tensor, type)
49 |   end
50 | 
51 |   deftransform validate_weights(weights, num_samples, opts \\ []) do
52 |     type = opts[:type]
53 | 
54 |     cond do
55 |       is_nil(weights) ->
56 |         Nx.tensor(1.0, type: type)
57 | 
58 |       Nx.is_tensor(weights) and Nx.shape(weights) in [{}, {num_samples}] ->
59 |         weights |> Nx.broadcast({num_samples}) |> Nx.as_type(type)
60 | 
61 |       is_list(weights) and length(weights) == num_samples ->
62 |         Nx.tensor(weights, type: type)
63 | 
64 |       true ->
65 |         raise ArgumentError,
66 |               "invalid value for :weights option: expected list or tensor of positive numbers of size #{num_samples}, got: #{inspect(weights)}"
67 |     end
68 |   end
69 | 
70 |   deftransform valid_broadcast!(n_dims, shape1, shape2) do
71 |     if tuple_size(shape1) != tuple_size(shape2) do
72 |       raise ArgumentError,
73 |             "expected shapes to have same rank, got #{inspect(tuple_size(shape1))} and #{inspect(tuple_size(shape2))}"
74 |     end
75 | 
76 |     valid_broadcast(n_dims, n_dims, shape1, shape2)
77 |   end
78 | 
79 |   deftransformp valid_broadcast(0, _n_dims, _shape1, _shape2), do: true
80 | 
81 |   deftransformp valid_broadcast(to_parse, n_dims, shape1, shape2) do
82 |     dim1 = elem(shape1, n_dims - to_parse)
83 |     dim2 = elem(shape2, n_dims - to_parse)
84 | 
85 |     if not (dim1 == 1 or dim2 == 1 or dim2 == dim1) do
86 |       raise ArgumentError,
87 |             "tensors must be broadcast compatible, got tensors with shapes #{inspect(shape1)} and #{inspect(shape2)}"
88 |     end
89 | 
90 |     valid_broadcast(to_parse - 1, n_dims, shape1, shape2)
91 |   end
92 | end
93 | 


--------------------------------------------------------------------------------
/mix.exs:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.MixProject do
  2 |   use Mix.Project
  3 | 
  4 |   @source_url "https://github.com/elixir-nx/scholar"
  5 |   @version "0.4.0"
  6 | 
  7 |   def project do
  8 |     [
  9 |       app: :scholar,
 10 |       name: "Scholar",
 11 |       version: @version,
 12 |       elixir: "~> 1.14",
 13 |       elixirc_paths: elixirc_paths(Mix.env()),
 14 |       deps: deps(),
 15 |       docs: &docs/0,
 16 |       package: package()
 17 |     ]
 18 |   end
 19 | 
 20 |   defp elixirc_paths(:test), do: ["lib", "test/support"]
 21 |   defp elixirc_paths(_), do: ["lib"]
 22 | 
 23 |   def application do
 24 |     [
 25 |       extra_applications: [:logger]
 26 |     ]
 27 |   end
 28 | 
 29 |   defp deps do
 30 |     [
 31 |       {:ex_doc, "~> 0.34", only: :docs},
 32 |       {:nx, "~> 0.9"},
 33 |       {:nimble_options, "~> 0.5.2 or ~> 1.0"},
 34 |       {:exla, ">= 0.0.0", only: :test},
 35 |       {:polaris, "~> 0.1"},
 36 |       {:benchee, "~> 1.0", only: :dev},
 37 |       {:scidata, "~> 0.1.11", only: :test}
 38 |     ]
 39 |   end
 40 | 
 41 |   defp package do
 42 |     [
 43 |       maintainers: ["Mateusz Słuszniak", "Krsto Proroković"],
 44 |       description: "Traditional machine learning on top of Nx",
 45 |       licenses: ["Apache-2.0"],
 46 |       links: %{"GitHub" => @source_url}
 47 |     ]
 48 |   end
 49 | 
 50 |   defp docs do
 51 |     [
 52 |       main: "readme",
 53 |       source_url: @source_url,
 54 |       assets: %{"notebooks/files" => "files"},
 55 |       logo: "images/scholar_simplified.png",
 56 |       extra_section: "Guides",
 57 |       extras: [
 58 |         "README.md",
 59 |         "notebooks/cv_gradient_boosting_tree.livemd",
 60 |         # "notebooks/hierarchical_clustering.livemd",
 61 |         "notebooks/k_means.livemd",
 62 |         "notebooks/k_nearest_neighbors.livemd",
 63 |         "notebooks/linear_regression.livemd",
 64 |         "notebooks/manifold_learning.livemd",
 65 |         "notebooks/mds.livemd",
 66 |         "notebooks/nearest_neighbors.livemd"
 67 |       ],
 68 |       groups_for_modules: [
 69 |         Models: [
 70 |           Scholar.Cluster.AffinityPropagation,
 71 |           Scholar.Cluster.DBSCAN,
 72 |           Scholar.Cluster.GaussianMixture,
 73 |           Scholar.Cluster.Hierarchical,
 74 |           Scholar.Cluster.KMeans,
 75 |           Scholar.Decomposition.PCA,
 76 |           Scholar.Integrate,
 77 |           Scholar.Interpolation.BezierSpline,
 78 |           Scholar.Interpolation.CubicSpline,
 79 |           Scholar.Interpolation.Linear,
 80 |           Scholar.Linear.BayesianRidgeRegression,
 81 |           Scholar.Linear.IsotonicRegression,
 82 |           Scholar.Linear.LinearRegression,
 83 |           Scholar.Linear.LogisticRegression,
 84 |           Scholar.Linear.PolynomialRegression,
 85 |           Scholar.Linear.RidgeRegression,
 86 |           Scholar.Linear.SVM,
 87 |           Scholar.Manifold.MDS,
 88 |           Scholar.Manifold.Trimap,
 89 |           Scholar.Manifold.TSNE,
 90 |           Scholar.NaiveBayes.Complement,
 91 |           Scholar.NaiveBayes.Gaussian,
 92 |           Scholar.NaiveBayes.Multinomial,
 93 |           Scholar.Neighbors.BruteKNN,
 94 |           Scholar.Neighbors.KDTree,
 95 |           Scholar.Neighbors.KNNClassifier,
 96 |           Scholar.Neighbors.KNNRegressor,
 97 |           Scholar.Neighbors.LargeVis,
 98 |           Scholar.Neighbors.NNDescent,
 99 |           Scholar.Neighbors.RadiusNNClassifier,
100 |           Scholar.Neighbors.RadiusNNRegressor,
101 |           Scholar.Neighbors.RandomProjectionForest
102 |         ],
103 |         Utilities: [
104 |           Scholar.Impute.SimpleImputer,
105 |           Scholar.Metrics.Classification,
106 |           Scholar.Metrics.Clustering,
107 |           Scholar.Metrics.Distance,
108 |           Scholar.Metrics.Neighbors,
109 |           Scholar.Metrics.Ranking,
110 |           Scholar.Metrics.Regression,
111 |           Scholar.Metrics.Similarity,
112 |           Scholar.ModelSelection,
113 |           Scholar.Preprocessing,
114 |           Scholar.Preprocessing.MaxAbsScaler,
115 |           Scholar.Preprocessing.MinMaxScaler,
116 |           Scholar.Preprocessing.Normalizer,
117 |           Scholar.Preprocessing.OneHotEncoder,
118 |           Scholar.Preprocessing.OrdinalEncoder,
119 |           Scholar.Preprocessing.StandardScaler,
120 |           Scholar.Stats
121 |         ]
122 |       ],
123 |       before_closing_body_tag: &before_closing_body_tag/1
124 |     ]
125 |   end
126 | 
127 |   defp before_closing_body_tag(:html) do
128 |     """
129 |     <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.13.0/dist/katex.min.css" integrity="sha384-t5CR+zwDAROtph0PXGte6ia8heboACF9R5l/DiY+WZ3P2lxNgvJkQk5n7GPvLMYw" crossorigin="anonymous">
130 |     <script defer src="https://cdn.jsdelivr.net/npm/katex@0.13.0/dist/katex.min.js" integrity="sha384-FaFLTlohFghEIZkw6VGwmf9ISTubWAVYW8tG8+w2LAIftJEULZABrF9PPFv+tVkH" crossorigin="anonymous"></script>
131 |     <script defer src="https://cdn.jsdelivr.net/npm/katex@0.13.0/dist/contrib/auto-render.min.js" integrity="sha384-bHBqxz8fokvgoJ/sc17HODNxa42TlaEhB+w8ZJXTc2nZf1VgEaFZeZvT4Mznfz0v" crossorigin="anonymous"></script>
132 |     <script>
133 |       document.addEventListener("DOMContentLoaded", function() {
134 |         renderMathInElement(document.body, {
135 |           delimiters: [
136 |             { left: "$$", right: "$$", display: true },
137 |             { left: "$", right: "$", display: false },
138 |           ]
139 |         });
140 |       });
141 |     </script>
142 |     <script src="https://cdn.jsdelivr.net/npm/vega@5.20.2"></script>
143 |     <script src="https://cdn.jsdelivr.net/npm/vega-lite@5.1.1"></script>
144 |     <script src="https://cdn.jsdelivr.net/npm/vega-embed@6.18.2"></script>
145 |     <script>
146 |       document.addEventListener("DOMContentLoaded", function () {
147 |         for (const codeEl of document.querySelectorAll("pre code.vega-lite")) {
148 |           try {
149 |             const preEl = codeEl.parentElement;
150 |             const spec = JSON.parse(codeEl.textContent);
151 |             const plotEl = document.createElement("div");
152 |             preEl.insertAdjacentElement("afterend", plotEl);
153 |             vegaEmbed(plotEl, spec);
154 |             preEl.remove();
155 |           } catch (error) {
156 |             console.log("Failed to render Vega-Lite plot: " + error)
157 |           }
158 |         }
159 |       });
160 |     </script>
161 |     """
162 |   end
163 | 
164 |   defp before_closing_body_tag(_), do: ""
165 | end
166 | 


--------------------------------------------------------------------------------
/mix.lock:
--------------------------------------------------------------------------------
 1 | %{
 2 |   "benchee": {:hex, :benchee, "1.3.1", "c786e6a76321121a44229dde3988fc772bca73ea75170a73fd5f4ddf1af95ccf", [:mix], [{:deep_merge, "~> 1.0", [hex: :deep_merge, repo: "hexpm", optional: false]}, {:statistex, "~> 1.0", [hex: :statistex, repo: "hexpm", optional: false]}, {:table, "~> 0.1.0", [hex: :table, repo: "hexpm", optional: true]}], "hexpm", "76224c58ea1d0391c8309a8ecbfe27d71062878f59bd41a390266bf4ac1cc56d"},
 3 |   "castore": {:hex, :castore, "0.1.22", "4127549e411bedd012ca3a308dede574f43819fe9394254ca55ab4895abfa1a2", [:mix], [], "hexpm", "c17576df47eb5aa1ee40cc4134316a99f5cad3e215d5c77b8dd3cfef12a22cac"},
 4 |   "complex": {:hex, :complex, "0.6.0", "b0130086a7a8c33574d293b2e0e250f4685580418eac52a5658a4bd148f3ccf1", [:mix], [], "hexpm", "0a5fa95580dcaf30fcd60fe1aaf24327c0fe401e98c24d892e172e79498269f9"},
 5 |   "deep_merge": {:hex, :deep_merge, "1.0.0", "b4aa1a0d1acac393bdf38b2291af38cb1d4a52806cf7a4906f718e1feb5ee961", [:mix], [], "hexpm", "ce708e5f094b9cd4e8f2be4f00d2f4250c4095be93f8cd6d018c753894885430"},
 6 |   "earmark_parser": {:hex, :earmark_parser, "1.4.42", "f23d856f41919f17cd06a493923a722d87a2d684f143a1e663c04a2b93100682", [:mix], [], "hexpm", "6915b6ca369b5f7346636a2f41c6a6d78b5af419d61a611079189233358b8b8b"},
 7 |   "elixir_make": {:hex, :elixir_make, "0.9.0", "6484b3cd8c0cee58f09f05ecaf1a140a8c97670671a6a0e7ab4dc326c3109726", [:mix], [], "hexpm", "db23d4fd8b757462ad02f8aa73431a426fe6671c80b200d9710caf3d1dd0ffdb"},
 8 |   "ex_doc": {:hex, :ex_doc, "0.36.1", "4197d034f93e0b89ec79fac56e226107824adcce8d2dd0a26f5ed3a95efc36b1", [:mix], [{:earmark_parser, "~> 1.4.42", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "d7d26a7cf965dacadcd48f9fa7b5953d7d0cfa3b44fa7a65514427da44eafd89"},
 9 |   "exla": {:hex, :exla, "0.9.2", "2b5cb7334f79fedc301502a793ffd10bc1ec8de2c61eebabcabf213fc98ae7e6", [:make, :mix], [{:elixir_make, "~> 0.6", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.0", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:nx, "~> 0.9.0", [hex: :nx, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:xla, "~> 0.8.0", [hex: :xla, repo: "hexpm", optional: false]}], "hexpm", "e51085e196b466d235e93d9f5ea2cbf7d90315d216aa02e996f99bcaaa19c593"},
10 |   "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"},
11 |   "makeup": {:hex, :makeup, "1.2.1", "e90ac1c65589ef354378def3ba19d401e739ee7ee06fb47f94c687016e3713d1", [:mix], [{:nimble_parsec, "~> 1.4", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "d36484867b0bae0fea568d10131197a4c2e47056a6fbe84922bf6ba71c8d17ce"},
12 |   "makeup_elixir": {:hex, :makeup_elixir, "1.0.1", "e928a4f984e795e41e3abd27bfc09f51db16ab8ba1aebdba2b3a575437efafc2", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "7284900d412a3e5cfd97fdaed4f5ed389b8f2b4cb49efc0eb3bd10e2febf9507"},
13 |   "makeup_erlang": {:hex, :makeup_erlang, "1.0.1", "c7f58c120b2b5aa5fd80d540a89fdf866ed42f1f3994e4fe189abebeab610839", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "8a89a1eeccc2d798d6ea15496a6e4870b75e014d1af514b1b71fa33134f57814"},
14 |   "nimble_csv": {:hex, :nimble_csv, "1.2.0", "4e26385d260c61eba9d4412c71cea34421f296d5353f914afe3f2e71cce97722", [:mix], [], "hexpm", "d0628117fcc2148178b034044c55359b26966c6eaa8e2ce15777be3bbc91b12a"},
15 |   "nimble_options": {:hex, :nimble_options, "1.1.1", "e3a492d54d85fc3fd7c5baf411d9d2852922f66e69476317787a7b2bb000a61b", [:mix], [], "hexpm", "821b2470ca9442c4b6984882fe9bb0389371b8ddec4d45a9504f00a66f650b44"},
16 |   "nimble_parsec": {:hex, :nimble_parsec, "1.4.0", "51f9b613ea62cfa97b25ccc2c1b4216e81df970acd8e16e8d1bdc58fef21370d", [:mix], [], "hexpm", "9c565862810fb383e9838c1dd2d7d2c437b3d13b267414ba6af33e50d2d1cf28"},
17 |   "nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"},
18 |   "nx": {:hex, :nx, "0.9.2", "17563029c01bf749aad3c31234326d7665abd0acc33ee2acbe531a4759f29a8a", [:mix], [{:complex, "~> 0.5", [hex: :complex, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "914d74741617d8103de8ab1f8c880353e555263e1c397b8a1109f79a3716557f"},
19 |   "polaris": {:hex, :polaris, "0.1.0", "dca61b18e3e801ecdae6ac9f0eca5f19792b44a5cb4b8d63db50fc40fc038d22", [:mix], [{:nx, "~> 0.5", [hex: :nx, repo: "hexpm", optional: false]}], "hexpm", "13ef2b166650e533cb24b10e2f3b8ab4f2f449ba4d63156e8c569527f206e2c2"},
20 |   "scidata": {:hex, :scidata, "0.1.11", "fe3358bac7d740374b4f2a7eff6a1cb02e5ee7f87f7cdb1e8648ad93c533165f", [:mix], [{:castore, "~> 0.1", [hex: :castore, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.1", [hex: :nimble_csv, repo: "hexpm", optional: false]}, {:stb_image, "~> 0.4", [hex: :stb_image, repo: "hexpm", optional: true]}], "hexpm", "90873337a9d5fe880d640517efa93d3c07e46c8ba436de44117f581800549f93"},
21 |   "statistex": {:hex, :statistex, "1.0.0", "f3dc93f3c0c6c92e5f291704cf62b99b553253d7969e9a5fa713e5481cd858a5", [:mix], [], "hexpm", "ff9d8bee7035028ab4742ff52fc80a2aa35cece833cf5319009b52f1b5a86c27"},
22 |   "telemetry": {:hex, :telemetry, "1.3.0", "fedebbae410d715cf8e7062c96a1ef32ec22e764197f70cda73d82778d61e7a2", [:rebar3], [], "hexpm", "7015fc8919dbe63764f4b4b87a95b7c0996bd539e0d499be6ec9d7f3875b79e6"},
23 |   "xla": {:hex, :xla, "0.8.0", "fef314d085dd3ee16a0816c095239938f80769150e15db16dfaa435553d7cb16", [:make, :mix], [{:elixir_make, "~> 0.4", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm", "739c61c8d93b97e12ba0369d10e76130224c208f1a76ad293e3581f056833e57"},
24 | }
25 | 


--------------------------------------------------------------------------------
/notebooks/files/knn_gyms.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/elixir-nx/scholar/48aa36e95dd14fa51e3ad5e23fc72c3a3eed3877/notebooks/files/knn_gyms.png


--------------------------------------------------------------------------------
/notebooks/files/mammoth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/elixir-nx/scholar/48aa36e95dd14fa51e3ad5e23fc72c3a3eed3877/notebooks/files/mammoth.png


--------------------------------------------------------------------------------
/notebooks/files/rbf_and_kdtree.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/elixir-nx/scholar/48aa36e95dd14fa51e3ad5e23fc72c3a3eed3877/notebooks/files/rbf_and_kdtree.png


--------------------------------------------------------------------------------
/notebooks/files/rpKNN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/elixir-nx/scholar/48aa36e95dd14fa51e3ad5e23fc72c3a3eed3877/notebooks/files/rpKNN.png


--------------------------------------------------------------------------------
/test/scholar/cluster/affinity_propagation_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Cluster.AffinityPropagationTest do
  2 |   use Scholar.Case, async: true
  3 |   alias Scholar.Cluster.AffinityPropagation
  4 |   doctest AffinityPropagation
  5 | 
  6 |   defp key do
  7 |     Nx.Random.key(42)
  8 |   end
  9 | 
 10 |   defp x do
 11 |     Nx.tensor([
 12 |       [16, 2, 17],
 13 |       [17, 3, 9],
 14 |       [9, 16, 15],
 15 |       [13, 8, 8],
 16 |       [3, 5, 15],
 17 |       [19, 11, 9],
 18 |       [8, 15, 2],
 19 |       [16, 2, 2],
 20 |       [10, 10, 0],
 21 |       [8, 7, 5],
 22 |       [4, 11, 8],
 23 |       [11, 17, 7],
 24 |       [16, 4, 2],
 25 |       [13, 9, 7],
 26 |       [18, 16, 12],
 27 |       [6, 8, 6],
 28 |       [18, 13, 1],
 29 |       [2, 2, 2],
 30 |       [0, 1, 18],
 31 |       [12, 16, 18],
 32 |       [3, 14, 5],
 33 |       [2, 16, 13],
 34 |       [6, 6, 13],
 35 |       [16, 3, 5],
 36 |       [0, 16, 5],
 37 |       [4, 18, 5],
 38 |       [5, 8, 0],
 39 |       [1, 5, 15],
 40 |       [10, 0, 14],
 41 |       [13, 8, 14],
 42 |       [19, 2, 9],
 43 |       [17, 17, 0],
 44 |       [19, 14, 19],
 45 |       [9, 19, 10],
 46 |       [11, 4, 12],
 47 |       [3, 16, 19],
 48 |       [17, 3, 6],
 49 |       [9, 16, 10],
 50 |       [5, 17, 3],
 51 |       [3, 15, 17]
 52 |     ])
 53 |   end
 54 | 
 55 |   defp x_test do
 56 |     Nx.tensor([
 57 |       [12, 11, 15],
 58 |       [6, 3, 3],
 59 |       [8, 16, 16],
 60 |       [12, 2, 17],
 61 |       [11, 3, 17],
 62 |       [15, 1, 14],
 63 |       [0, 6, 7],
 64 |       [7, 9, 3],
 65 |       [13, 3, 16],
 66 |       [11, 2, 2]
 67 |     ])
 68 |   end
 69 | 
 70 |   test "fit and compute_values" do
 71 |     model = AffinityPropagation.fit(x(), key: key(), preference: :median)
 72 | 
 73 |     model = AffinityPropagation.prune(model)
 74 | 
 75 |     assert model.labels ==
 76 |              Nx.tensor(
 77 |                [5, 6, 0, 2, 1, 3, 7, 6, 2, 2, 2, 7, 6, 2, 3, 2, 4, 2, 1] ++
 78 |                  [0, 7, 0, 1, 6, 7, 7, 2, 1, 5, 5, 6, 4, 3, 0, 5, 0, 6, 0, 7, 0]
 79 |              )
 80 | 
 81 |     assert model.cluster_centers ==
 82 |              Nx.tensor([
 83 |                [9.0, 16.0, 15.0],
 84 |                [3.0, 5.0, 15.0],
 85 |                [8.0, 7.0, 5.0],
 86 |                [18.0, 16.0, 12.0],
 87 |                [18.0, 13.0, 1.0],
 88 |                [11.0, 4.0, 12.0],
 89 |                [17.0, 3.0, 6.0],
 90 |                [5.0, 17.0, 3.0]
 91 |              ])
 92 | 
 93 |     assert model.cluster_centers_indices == Nx.tensor([2, 4, 9, 14, 16, 34, 36, 38])
 94 |   end
 95 | 
 96 |   test "predict with pruning" do
 97 |     model = AffinityPropagation.fit(x(), key: key(), preference: :median)
 98 |     model = AffinityPropagation.prune(model)
 99 |     preds = AffinityPropagation.predict(model, x_test())
100 |     assert preds == Nx.tensor([0, 2, 0, 5, 5, 5, 2, 2, 5, 2])
101 |   end
102 | 
103 |   test "predict without pruning" do
104 |     model = AffinityPropagation.fit(x(), key: key(), preference: :median)
105 |     preds = AffinityPropagation.predict(model, x_test())
106 |     assert preds == Nx.tensor([2, 9, 2, 34, 34, 34, 9, 9, 34, 9])
107 |   end
108 | end
109 | 


--------------------------------------------------------------------------------
/test/scholar/cluster/gmm_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule Scholar.Cluster.GaussianMixtureTest do
 2 |   use Scholar.Case, async: true
 3 |   alias Scholar.Cluster.GaussianMixture
 4 |   doctest GaussianMixture
 5 | 
 6 |   describe "invalid arguments" do
 7 |     test "when :num_gaussians is not provided" do
 8 |       x = Nx.tensor([[1, 2], [3, 4], [5, 6]])
 9 | 
10 |       assert_raise NimbleOptions.ValidationError,
11 |                    "required :num_gaussians option not found, received options: []",
12 |                    fn ->
13 |                      GaussianMixture.fit(x)
14 |                    end
15 |     end
16 | 
17 |     test "when :num_gaussians is invalid" do
18 |       x = Nx.tensor([[1, 2], [3, 4], [5, 6]])
19 | 
20 |       assert_raise ArgumentError,
21 |                    "invalid value for :num_gaussians option: expected positive integer between 1 and 3, got: 4",
22 |                    fn ->
23 |                      GaussianMixture.fit(x, num_gaussians: 4)
24 |                    end
25 | 
26 |       assert_raise NimbleOptions.ValidationError,
27 |                    "invalid value for :num_gaussians option: expected positive integer, got: 2.0",
28 |                    fn ->
29 |                      GaussianMixture.fit(x, num_gaussians: 2.0)
30 |                    end
31 | 
32 |       assert_raise NimbleOptions.ValidationError,
33 |                    "invalid value for :num_gaussians option: expected positive integer, got: -1",
34 |                    fn ->
35 |                      GaussianMixture.fit(x, num_gaussians: -1)
36 |                    end
37 |     end
38 | 
39 |     test "when training vector size is invalid" do
40 |       x = Nx.tensor([5, 6])
41 | 
42 |       assert_raise ArgumentError,
43 |                    "expected input tensor to have shape {n_samples, n_features}, got tensor with shape: {2}",
44 |                    fn -> GaussianMixture.fit(x, num_gaussians: 2) end
45 |     end
46 | 
47 |     test "when :num_runs is not a positive integer" do
48 |       x = Nx.tensor([[1, 2], [3, 4]])
49 | 
50 |       assert_raise NimbleOptions.ValidationError,
51 |                    "invalid value for :num_runs option: expected positive integer, got: 0",
52 |                    fn ->
53 |                      GaussianMixture.fit(x, num_gaussians: 2, num_runs: 0)
54 |                    end
55 | 
56 |       assert_raise NimbleOptions.ValidationError,
57 |                    "invalid value for :num_runs option: expected positive integer, got: 10.0",
58 |                    fn ->
59 |                      GaussianMixture.fit(x, num_gaussians: 2, num_runs: 10.0)
60 |                    end
61 |     end
62 | 
63 |     test "when :max_iter is not a positive integer" do
64 |       x = Nx.tensor([[1, 2], [3, 4]])
65 | 
66 |       assert_raise NimbleOptions.ValidationError,
67 |                    "invalid value for :max_iter option: expected positive integer, got: 0",
68 |                    fn ->
69 |                      GaussianMixture.fit(x, num_gaussians: 2, max_iter: 0)
70 |                    end
71 | 
72 |       assert_raise NimbleOptions.ValidationError,
73 |                    "invalid value for :max_iter option: expected positive integer, got: 200.0",
74 |                    fn ->
75 |                      GaussianMixture.fit(x, num_gaussians: 2, max_iter: 200.0)
76 |                    end
77 |     end
78 | 
79 |     test "when :tol is not a non-negative number" do
80 |       x = Nx.tensor([[1, 2], [3, 4]])
81 | 
82 |       assert_raise NimbleOptions.ValidationError,
83 |                    "invalid value for :tol option: expected a non-negative number, got: -0.1",
84 |                    fn ->
85 |                      GaussianMixture.fit(x, num_gaussians: 2, tol: -0.1)
86 |                    end
87 |     end
88 |   end
89 | end
90 | 


--------------------------------------------------------------------------------
/test/scholar/covariance/ledoit_wolf_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Covariance.LedoitWolfTest do
  2 |   use Scholar.Case, async: true
  3 |   alias Scholar.Covariance.LedoitWolf
  4 |   doctest LedoitWolf
  5 | 
  6 |   defp key do
  7 |     Nx.Random.key(1)
  8 |   end
  9 | 
 10 |   test "fit test - all default options" do
 11 |     key = key()
 12 | 
 13 |     {x, _new_key} =
 14 |       Nx.Random.multivariate_normal(
 15 |         key,
 16 |         Nx.tensor([0.0, 0.0, 0.0]),
 17 |         Nx.tensor([[3.0, 2.0, 1.0], [1.0, 2.0, 3.0], [1.3, 1.0, 2.2]]),
 18 |         shape: {10},
 19 |         type: :f32
 20 |       )
 21 | 
 22 |     model = LedoitWolf.fit(x)
 23 | 
 24 |     assert_all_close(
 25 |       model.covariance,
 26 |       Nx.tensor([
 27 |         [1.439786434173584, -0.0, 0.0],
 28 |         [-0.0, 1.439786434173584, 0.0],
 29 |         [0.0, 0.0, 1.439786434173584]
 30 |       ]),
 31 |       atol: 1.0e-3
 32 |     )
 33 | 
 34 |     assert_all_close(model.shrinkage, Nx.tensor(1.0), atol: 1.0e-3)
 35 | 
 36 |     assert_all_close(
 37 |       model.location,
 38 |       Nx.tensor([-1.015519142150879, -0.4495307505130768, 0.06475571542978287]),
 39 |       atol: 1.0e-3
 40 |     )
 41 |   end
 42 | 
 43 |   test "fit test - :assume_centered? is true" do
 44 |     key = key()
 45 | 
 46 |     {x, _new_key} =
 47 |       Nx.Random.multivariate_normal(
 48 |         key,
 49 |         Nx.tensor([0.0, 0.0, 0.0]),
 50 |         Nx.tensor([[3.0, 2.0, 1.0], [1.0, 2.0, 3.0], [1.3, 1.0, 2.2]]),
 51 |         shape: {10},
 52 |         type: :f32
 53 |       )
 54 | 
 55 |     model = LedoitWolf.fit(x, assume_centered?: true)
 56 | 
 57 |     assert_all_close(
 58 |       model.covariance,
 59 |       Nx.tensor([
 60 |         [1.852303147315979, 0.0, 0.0],
 61 |         [0.0, 1.852303147315979, 0.0],
 62 |         [0.0, 0.0, 1.852303147315979]
 63 |       ]),
 64 |       atol: 1.0e-3
 65 |     )
 66 | 
 67 |     assert_all_close(model.shrinkage, Nx.tensor(1.0), atol: 1.0e-3)
 68 | 
 69 |     assert_all_close(model.location, Nx.tensor([0, 0, 0]), atol: 1.0e-3)
 70 |   end
 71 | 
 72 |   test "fit test 2" do
 73 |     key = key()
 74 | 
 75 |     {x, _new_key} =
 76 |       Nx.Random.multivariate_normal(
 77 |         key,
 78 |         Nx.tensor([0.0, 0.0]),
 79 |         Nx.tensor([[2.2, 1.5], [0.7, 1.1]]),
 80 |         shape: {50},
 81 |         type: :f32
 82 |       )
 83 | 
 84 |     model = LedoitWolf.fit(x)
 85 | 
 86 |     assert_all_close(
 87 |       model.covariance,
 88 |       Nx.tensor([
 89 |         [1.8378269672393799, 0.27215731143951416],
 90 |         [0.27215731143951416, 1.2268550395965576]
 91 |       ]),
 92 |       atol: 1.0e-3
 93 |     )
 94 | 
 95 |     assert_all_close(model.shrinkage, Nx.tensor(0.38731059432029724), atol: 1.0e-3)
 96 | 
 97 |     assert_all_close(model.location, Nx.tensor([0.06882287561893463, 0.13750331103801727]),
 98 |       atol: 1.0e-3
 99 |     )
100 |   end
101 | 
102 |   test "fit test - 1 dim x" do
103 |     key = key()
104 | 
105 |     {x, _new_key} =
106 |       Nx.Random.multivariate_normal(key, Nx.tensor([0.0]), Nx.tensor([[0.4]]),
107 |         shape: {15},
108 |         type: :f32
109 |       )
110 | 
111 |     x = Nx.flatten(x)
112 | 
113 |     model = LedoitWolf.fit(x)
114 | 
115 |     assert_all_close(
116 |       model.covariance,
117 |       Nx.tensor([
118 |         [0.5322133302688599]
119 |       ]),
120 |       atol: 1.0e-3
121 |     )
122 | 
123 |     assert_all_close(model.shrinkage, Nx.tensor(0.0), atol: 1.0e-3)
124 | 
125 |     assert_all_close(model.location, Nx.tensor([0.060818854719400406]), atol: 1.0e-3)
126 |   end
127 | end
128 | 


--------------------------------------------------------------------------------
/test/scholar/covariance/shrunk_covariance_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Covariance.ShrunkCovarianceTest do
  2 |   use Scholar.Case, async: true
  3 |   alias Scholar.Covariance.ShrunkCovariance
  4 |   doctest ShrunkCovariance
  5 | 
  6 |   defp key do
  7 |     Nx.Random.key(1)
  8 |   end
  9 | 
 10 |   test "fit test - all default options" do
 11 |     key = key()
 12 | 
 13 |     {x, _new_key} =
 14 |       Nx.Random.multivariate_normal(
 15 |         key,
 16 |         Nx.tensor([0.0, 0.0, 0.0]),
 17 |         Nx.tensor([[3.0, 2.0, 1.0], [1.0, 2.0, 3.0], [1.3, 1.0, 2.2]]),
 18 |         shape: {10},
 19 |         type: :f32
 20 |       )
 21 | 
 22 |     model = ShrunkCovariance.fit(x)
 23 | 
 24 |     assert_all_close(
 25 |       model.covariance,
 26 |       Nx.tensor([
 27 |         [2.0949244499206543, -0.13400490581989288, 0.5413897037506104],
 28 |         [-0.13400490581989288, 1.2940725088119507, 0.0621684193611145],
 29 |         [0.5413897037506104, 0.0621684193611145, 0.9303621053695679]
 30 |       ]),
 31 |       atol: 1.0e-3
 32 |     )
 33 | 
 34 |     assert_all_close(
 35 |       model.location,
 36 |       Nx.tensor([-1.015519142150879, -0.4495307505130768, 0.06475571542978287]),
 37 |       atol: 1.0e-3
 38 |     )
 39 |   end
 40 | 
 41 |   test "fit test - :assume_centered? is true" do
 42 |     key = key()
 43 | 
 44 |     {x, _new_key} =
 45 |       Nx.Random.multivariate_normal(
 46 |         key,
 47 |         Nx.tensor([0.0, 0.0, 0.0]),
 48 |         Nx.tensor([[3.0, 2.0, 1.0], [1.0, 2.0, 3.0], [1.3, 1.0, 2.2]]),
 49 |         shape: {10},
 50 |         type: :f32
 51 |       )
 52 | 
 53 |     model = ShrunkCovariance.fit(x, assume_centered?: true)
 54 | 
 55 |     assert_all_close(
 56 |       model.covariance,
 57 |       Nx.tensor([
 58 |         [3.0643274784088135, 0.27685147523880005, 0.4822050631046295],
 59 |         [0.27685147523880005, 1.5171942710876465, 0.03596973791718483],
 60 |         [0.4822050631046295, 0.03596973791718483, 0.975387692451477]
 61 |       ]),
 62 |       atol: 1.0e-3
 63 |     )
 64 | 
 65 |     assert_all_close(model.location, Nx.tensor(0), atol: 1.0e-3)
 66 |   end
 67 | 
 68 |   test "fit test - :shrinkage" do
 69 |     key = key()
 70 | 
 71 |     {x, _new_key} =
 72 |       Nx.Random.multivariate_normal(
 73 |         key,
 74 |         Nx.tensor([0.0, 0.0, 0.0]),
 75 |         Nx.tensor([[3.0, 2.0, 1.0], [1.0, 2.0, 3.0], [1.3, 1.0, 2.2]]),
 76 |         shape: {10},
 77 |         type: :f32
 78 |       )
 79 | 
 80 |     model = ShrunkCovariance.fit(x, shrinkage: 0.8)
 81 | 
 82 |     assert_all_close(
 83 |       model.covariance,
 84 |       Nx.tensor([
 85 |         [1.5853726863861084, -0.029778867959976196, 0.12030883133411407],
 86 |         [-0.029778867959976196, 1.4074056148529053, 0.013815204612910748],
 87 |         [0.12030883133411407, 0.013815204612910748, 1.3265810012817383]
 88 |       ]),
 89 |       atol: 1.0e-3
 90 |     )
 91 | 
 92 |     assert_all_close(
 93 |       model.location,
 94 |       Nx.tensor([-1.015519142150879, -0.4495307505130768, 0.06475571542978287]),
 95 |       atol: 1.0e-3
 96 |     )
 97 |   end
 98 | 
 99 |   test "fit test 2" do
100 |     key = key()
101 | 
102 |     {x, _new_key} =
103 |       Nx.Random.multivariate_normal(
104 |         key,
105 |         Nx.tensor([0.0, 0.0]),
106 |         Nx.tensor([[2.2, 1.5], [0.7, 1.1]]),
107 |         shape: {50},
108 |         type: :f32
109 |       )
110 | 
111 |     model = ShrunkCovariance.fit(x)
112 | 
113 |     assert_all_close(
114 |       model.covariance,
115 |       Nx.tensor([
116 |         [1.9810796976089478, 0.3997809886932373],
117 |         [0.3997809886932373, 1.0836023092269897]
118 |       ]),
119 |       atol: 1.0e-3
120 |     )
121 | 
122 |     assert_all_close(model.location, Nx.tensor([0.06882287561893463, 0.13750331103801727]),
123 |       atol: 1.0e-3
124 |     )
125 |   end
126 | 
127 |   test "fit test - 1 dim x" do
128 |     key = key()
129 | 
130 |     {x, _new_key} =
131 |       Nx.Random.multivariate_normal(key, Nx.tensor([0.0]), Nx.tensor([[0.4]]),
132 |         shape: {15},
133 |         type: :f32
134 |       )
135 | 
136 |     x = Nx.flatten(x)
137 | 
138 |     model = ShrunkCovariance.fit(x)
139 | 
140 |     assert_all_close(
141 |       model.covariance,
142 |       Nx.tensor([
143 |         [0.5322133302688599]
144 |       ]),
145 |       atol: 1.0e-3
146 |     )
147 | 
148 |     assert_all_close(model.location, Nx.tensor([0.060818854719400406]), atol: 1.0e-3)
149 |   end
150 | end
151 | 


--------------------------------------------------------------------------------
/test/scholar/feature_extraction/count_vectorizer_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule Scholar.Preprocessing.CountVectorizer do
 2 |   use Scholar.Case, async: true
 3 |   alias Scholar.FeatureExtraction.CountVectorizer
 4 |   doctest CountVectorizer
 5 | 
 6 |   describe "fit_transform" do
 7 |     test "without padding" do
 8 |       tesnsor = Nx.tensor([[2, 3, 0], [1, 4, 4]])
 9 | 
10 |       counts =
11 |         CountVectorizer.fit_transform(tesnsor,
12 |           max_token_id: CountVectorizer.max_token_id(tesnsor)
13 |         )
14 | 
15 |       expected_counts = Nx.tensor([[1, 0, 1, 1, 0], [0, 1, 0, 0, 2]])
16 | 
17 |       assert counts == expected_counts
18 |     end
19 | 
20 |     test "with padding" do
21 |       tensor = Nx.tensor([[2, 3, 0], [1, 4, -1]])
22 | 
23 |       counts =
24 |         CountVectorizer.fit_transform(tensor, max_token_id: CountVectorizer.max_token_id(tensor))
25 | 
26 |       expected_counts = Nx.tensor([[1, 0, 1, 1, 0], [0, 1, 0, 0, 1]])
27 | 
28 |       assert counts == expected_counts
29 |     end
30 |   end
31 | 
32 |   describe "max_token_id" do
33 |     test "without padding" do
34 |       tensor = Nx.tensor([[2, 3, 0], [1, 4, 4]])
35 |       assert CountVectorizer.max_token_id(tensor) == 4
36 |     end
37 | 
38 |     test "with padding" do
39 |       tensor = Nx.tensor([[2, 3, 0], [1, 4, -1]])
40 |       assert CountVectorizer.max_token_id(tensor) == 4
41 |     end
42 |   end
43 | 
44 |   describe "errors" do
45 |     test "wrong input rank" do
46 |       assert_raise ArgumentError,
47 |                    "expected tensor to have shape {num_documents, num_tokens}, got tensor with shape: {3}",
48 |                    fn ->
49 |                      CountVectorizer.fit_transform(Nx.tensor([1, 2, 3]), max_token_id: 3)
50 |                    end
51 |     end
52 |   end
53 | end
54 | 


--------------------------------------------------------------------------------
/test/scholar/impute/knn_imputter_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule KNNImputterTest do
  2 |   use Scholar.Case, async: true
  3 |   alias Scholar.Impute.KNNImputter
  4 |   doctest KNNImputter
  5 | 
  6 |   describe "general cases" do
  7 |     def generate_data() do
  8 |       x = Nx.iota({5, 4})
  9 |       x = Nx.select(Nx.equal(Nx.quotient(x, 5), 2), Nx.Constants.nan(), x)
 10 |       Nx.indexed_put(x, Nx.tensor([[4, 2]]), Nx.tensor([6.0]))
 11 |     end
 12 | 
 13 |     test "general KNN imputer" do
 14 |       x = generate_data()
 15 |       jit_fit = Nx.Defn.jit(&KNNImputter.fit/2)
 16 |       jit_transform = Nx.Defn.jit(&KNNImputter.transform/2)
 17 | 
 18 |       knn_imputer =
 19 |         %KNNImputter{statistics: statistics, missing_values: missing_values} =
 20 |         jit_fit.(x, missing_values: :nan, num_neighbors: 2)
 21 | 
 22 |       assert missing_values == :nan
 23 | 
 24 |       assert statistics ==
 25 |                Nx.tensor([
 26 |                  [:nan, :nan, :nan, :nan],
 27 |                  [:nan, :nan, :nan, :nan],
 28 |                  [:nan, :nan, 4.0, 5.0],
 29 |                  [2.0, 3.0, 4.0, :nan],
 30 |                  [:nan, :nan, :nan, :nan]
 31 |                ])
 32 | 
 33 |       assert jit_transform.(knn_imputer, x) ==
 34 |                Nx.tensor([
 35 |                  [0.0, 1.0, 2.0, 3.0],
 36 |                  [4.0, 5.0, 6.0, 7.0],
 37 |                  [8.0, 9.0, 4.0, 5.0],
 38 |                  [2.0, 3.0, 4.0, 15.0],
 39 |                  [16.0, 17.0, 6.0, 19.0]
 40 |                ])
 41 |     end
 42 | 
 43 |     test "general KNN imputer with different number of neighbors" do
 44 |       x = generate_data()
 45 |       jit_fit = Nx.Defn.jit(&KNNImputter.fit/2)
 46 |       jit_transform = Nx.Defn.jit(&KNNImputter.transform/2)
 47 | 
 48 |       knn_imputter =
 49 |         %KNNImputter{statistics: statistics, missing_values: missing_values} =
 50 |         jit_fit.(x, missing_values: :nan, num_neighbors: 1)
 51 | 
 52 |       assert missing_values == :nan
 53 | 
 54 |       assert statistics ==
 55 |                Nx.tensor([
 56 |                  [:nan, :nan, :nan, :nan],
 57 |                  [:nan, :nan, :nan, :nan],
 58 |                  [:nan, :nan, 2.0, 3.0],
 59 |                  [0.0, 1.0, 2.0, :nan],
 60 |                  [:nan, :nan, :nan, :nan]
 61 |                ])
 62 | 
 63 |       assert jit_transform.(knn_imputter, x) ==
 64 |                Nx.tensor([
 65 |                  [0.0, 1.0, 2.0, 3.0],
 66 |                  [4.0, 5.0, 6.0, 7.0],
 67 |                  [8.0, 9.0, 2.0, 3.0],
 68 |                  [0.0, 1.0, 2.0, 15.0],
 69 |                  [16.0, 17.0, 6.0, 19.0]
 70 |                ])
 71 |     end
 72 | 
 73 |     test "missing values different than :nan" do
 74 |       x = generate_data()
 75 |       x = Nx.select(Nx.is_nan(x), 19.0, x)
 76 |       #      x = Nx.select(Nx.equal(x,19), :nan, x)
 77 |       jit_fit = Nx.Defn.jit(&KNNImputter.fit/2)
 78 |       jit_transform = Nx.Defn.jit(&KNNImputter.transform/2)
 79 | 
 80 |       knn_imputter =
 81 |         %KNNImputter{statistics: statistics, missing_values: missing_values} =
 82 |         jit_fit.(x, missing_values: 19.0, num_neighbors: 2)
 83 | 
 84 |       assert missing_values == 19.0
 85 | 
 86 |       assert statistics ==
 87 |                Nx.tensor([
 88 |                  [:nan, :nan, :nan, :nan],
 89 |                  [:nan, :nan, :nan, :nan],
 90 |                  [:nan, :nan, 4.0, 5.0],
 91 |                  [2.0, 3.0, 4.0, :nan],
 92 |                  [:nan, :nan, :nan, 5.0]
 93 |                ])
 94 | 
 95 |       assert jit_transform.(knn_imputter, x) ==
 96 |                Nx.tensor([
 97 |                  [0.0, 1.0, 2.0, 3.0],
 98 |                  [4.0, 5.0, 6.0, 7.0],
 99 |                  [8.0, 9.0, 4.0, 5.0],
100 |                  [2.0, 3.0, 4.0, 15.0],
101 |                  [16.0, 17.0, 6.0, 5.0]
102 |                ])
103 |     end
104 |   end
105 | 
106 |   describe "errors" do
107 |     test "invalid impute rank" do
108 |       x = Nx.tensor([1, 2, 2, 3])
109 | 
110 |       assert_raise ArgumentError,
111 |                    "wrong input rank. Expected: 2, got: 1",
112 |                    fn ->
113 |                      KNNImputter.fit(x, missing_values: 1, num_neighbors: 2)
114 |                    end
115 |     end
116 | 
117 |     test "invalid n_neighbors value" do
118 |       x = generate_data()
119 | 
120 |       jit_fit = Nx.Defn.jit(&KNNImputter.fit/2)
121 | 
122 |       assert_raise NimbleOptions.ValidationError,
123 |                    "invalid value for :num_neighbors option: expected positive integer, got: -1",
124 |                    fn ->
125 |                      jit_fit.(x, missing_values: 1.0, num_neighbors: -1)
126 |                    end
127 |     end
128 |   end
129 | end
130 | 


--------------------------------------------------------------------------------
/test/scholar/impute/simple_imputer_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule SimpleImputerTest do
  2 |   use Scholar.Case, async: true
  3 |   alias Scholar.Impute.SimpleImputer
  4 |   doctest SimpleImputer
  5 | 
  6 |   describe "general cases" do
  7 |     def generate_data() do
  8 |       x = Nx.iota({5, 4})
  9 |       x = Nx.select(Nx.equal(Nx.quotient(x, 5), 2), Nx.Constants.nan(), x)
 10 |       Nx.indexed_put(x, Nx.tensor([[4, 2]]), Nx.tensor([6.0]))
 11 |     end
 12 | 
 13 |     test "general test mode" do
 14 |       x = generate_data()
 15 | 
 16 |       simple_imputer_mode =
 17 |         %SimpleImputer{statistics: statistics, missing_values: missing_values} =
 18 |         SimpleImputer.fit(x, missing_values: :nan, strategy: :mode)
 19 | 
 20 |       assert statistics == Nx.tensor([0.0, 1.0, 6.0, 3.0])
 21 |       assert missing_values == :nan
 22 | 
 23 |       assert SimpleImputer.transform(simple_imputer_mode, x) ==
 24 |                Nx.tensor([
 25 |                  [0.0, 1.0, 2.0, 3.0],
 26 |                  [4.0, 5.0, 6.0, 7.0],
 27 |                  [8.0, 9.0, 6.0, 3.0],
 28 |                  [0.0, 1.0, 6.0, 15.0],
 29 |                  [16.0, 17.0, 6.0, 19.0]
 30 |                ])
 31 |     end
 32 | 
 33 |     test "general test median" do
 34 |       x = generate_data()
 35 | 
 36 |       simple_imputer_median =
 37 |         %SimpleImputer{statistics: statistics, missing_values: missing_values} =
 38 |         SimpleImputer.fit(x, missing_values: :nan, strategy: :median)
 39 | 
 40 |       assert statistics == Nx.tensor([6.0, 7.0, 6.0, 11.0])
 41 |       assert missing_values == :nan
 42 | 
 43 |       assert SimpleImputer.transform(simple_imputer_median, x) ==
 44 |                Nx.tensor([
 45 |                  [0.0, 1.0, 2.0, 3.0],
 46 |                  [4.0, 5.0, 6.0, 7.0],
 47 |                  [8.0, 9.0, 6.0, 11.0],
 48 |                  [6.0, 7.0, 6.0, 15.0],
 49 |                  [16.0, 17.0, 6.0, 19.0]
 50 |                ])
 51 |     end
 52 | 
 53 |     test "general test mean" do
 54 |       x = generate_data()
 55 | 
 56 |       simple_imputer_mean =
 57 |         %SimpleImputer{statistics: statistics, missing_values: missing_values} =
 58 |         SimpleImputer.fit(x, missing_values: :nan, strategy: :mean)
 59 | 
 60 |       assert_all_close(statistics, Nx.tensor([7.0, 8.0, 4.666666507720947, 11.0]))
 61 |       assert missing_values == :nan
 62 | 
 63 |       assert_all_close(
 64 |         SimpleImputer.transform(simple_imputer_mean, x),
 65 |         Nx.tensor([
 66 |           [0.0, 1.0, 2.0, 3.0],
 67 |           [4.0, 5.0, 6.0, 7.0],
 68 |           [8.0, 9.0, 4.666666507720947, 11.0],
 69 |           [7.0, 8.0, 4.666666507720947, 15.0],
 70 |           [16.0, 17.0, 6.0, 19.0]
 71 |         ])
 72 |       )
 73 |     end
 74 | 
 75 |     test "general test constant value" do
 76 |       x = generate_data()
 77 | 
 78 |       simple_imputer_constant_with_zeros =
 79 |         %SimpleImputer{statistics: statistics, missing_values: missing_values} =
 80 |         SimpleImputer.fit(x, missing_values: :nan, strategy: :constant)
 81 | 
 82 |       assert statistics == Nx.tensor([0.0, 0.0, 0.0, 0.0])
 83 |       assert missing_values == :nan
 84 | 
 85 |       %SimpleImputer{statistics: statistics, missing_values: missing_values} =
 86 |         SimpleImputer.fit(x,
 87 |           missing_values: :nan,
 88 |           strategy: :constant,
 89 |           fill_value: 1.37
 90 |         )
 91 | 
 92 |       assert statistics == Nx.tensor([1.37, 1.37, 1.37, 1.37])
 93 |       assert missing_values == :nan
 94 | 
 95 |       assert SimpleImputer.transform(simple_imputer_constant_with_zeros, x) ==
 96 |                Nx.tensor([
 97 |                  [0.0, 1.0, 2.0, 3.0],
 98 |                  [4.0, 5.0, 6.0, 7.0],
 99 |                  [8.0, 9.0, 0.0, 0.0],
100 |                  [0.0, 0.0, 0.0, 15.0],
101 |                  [16.0, 17.0, 6.0, 19.0]
102 |                ])
103 |     end
104 |   end
105 | 
106 |   test "mode with integer type" do
107 |     x = Nx.tile(Nx.tensor([1, 2, 1, 2, 1, 2]), [5, 1]) |> Nx.reshape({6, 5})
108 | 
109 |     simple_imputer_constant_with_zeros =
110 |       %SimpleImputer{statistics: statistics, missing_values: missing_values} =
111 |       SimpleImputer.fit(x, missing_values: 1, strategy: :mode)
112 | 
113 |     assert statistics == Nx.tensor([2, 2, 2, 2, 2])
114 |     assert missing_values == 1
115 | 
116 |     assert SimpleImputer.transform(simple_imputer_constant_with_zeros, x) ==
117 |              Nx.broadcast(2, {6, 5})
118 |   end
119 | 
120 |   describe "errors" do
121 |     test "Wrong impute rank" do
122 |       x = Nx.tensor([1, 2, 2, 3])
123 | 
124 |       assert_raise ArgumentError,
125 |                    "wrong input rank. Expected: 2, got: 1",
126 |                    fn ->
127 |                      SimpleImputer.fit(x, missing_values: 1, strategy: :mode)
128 |                    end
129 |     end
130 | 
131 |     test "Wrong :fill_value type" do
132 |       x = Nx.tensor([[1.0, 2.0, 2.0, 3.0]])
133 | 
134 |       assert_raise ArgumentError,
135 |                    "wrong type of `:fill_value` for the given data. Expected: :f or :bf, got: :s",
136 |                    fn ->
137 |                      SimpleImputer.fit(x,
138 |                        missing_values: 1.0,
139 |                        strategy: :constant,
140 |                        fill_value: 2
141 |                      )
142 |                    end
143 |     end
144 |   end
145 | end
146 | 


--------------------------------------------------------------------------------
/test/scholar/interpolation/bezier_spline_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule Scholar.Interpolation.BezierSplineTest do
 2 |   use Scholar.Case, async: true
 3 |   alias Scholar.Interpolation.BezierSpline
 4 |   doctest BezierSpline
 5 | 
 6 |   describe "bezier spline" do
 7 |     test "fit/2" do
 8 |       x = Nx.iota({4})
 9 |       y = Nx.tensor([0, 1, 8, 1])
10 | 
11 |       model = BezierSpline.fit(x, y)
12 | 
13 |       assert_all_close(
14 |         model.coefficients,
15 |         Nx.tensor([
16 |           [
17 |             [0.0, 0.0],
18 |             [0.3333335816860199, -0.5111109614372253],
19 |             [0.6666669845581055, -1.0222218036651611],
20 |             [1.0, 1.0]
21 |           ],
22 |           [
23 |             [1.0, 1.0],
24 |             [1.3333330154418945, 3.022221803665161],
25 |             [1.6666665077209473, 7.577777862548828],
26 |             [2.0, 8.0]
27 |           ],
28 |           [
29 |             [2.0, 8.0],
30 |             [2.3333334922790527, 8.422222137451172],
31 |             [2.6666667461395264, 4.711111068725586],
32 |             [3.0, 1.0]
33 |           ]
34 |         ])
35 |       )
36 | 
37 |       assert model.k == Nx.stack([x, y], axis: 1)
38 |     end
39 | 
40 |     test "input validation error cases" do
41 |       assert_raise ArgumentError,
42 |                    "expected x to be a tensor with shape {n}, where n > 2, got: {1, 1, 1}",
43 |                    fn ->
44 |                      BezierSpline.fit(Nx.iota({1, 1, 1}), Nx.iota({1, 1, 1}))
45 |                    end
46 | 
47 |       assert_raise ArgumentError,
48 |                    "expected x to be a tensor with shape {n}, where n > 2, got: {}",
49 |                    fn ->
50 |                      BezierSpline.fit(Nx.iota({}), Nx.iota({}))
51 |                    end
52 | 
53 |       assert_raise ArgumentError,
54 |                    "expected x to be a tensor with shape {n}, where n > 2, got: {1}",
55 |                    fn ->
56 |                      BezierSpline.fit(Nx.iota({1}), Nx.iota({1}))
57 |                    end
58 | 
59 |       assert_raise ArgumentError, "expected y to have shape {4}, got: {3}", fn ->
60 |         BezierSpline.fit(Nx.iota({4}), Nx.iota({3}))
61 |       end
62 |     end
63 | 
64 |     test "predict/2" do
65 |       x = Nx.iota({4})
66 |       y = Nx.tensor([0, 1, 8, 1])
67 | 
68 |       model = BezierSpline.fit(x, y)
69 | 
70 |       assert_all_close(
71 |         BezierSpline.predict(model, Nx.tensor([0, 1, 2, 3, -0.5, 0.5, 1.5, 2.5, 3.5]),
72 |           max_iter: 20,
73 |           eps: 1.0e-3
74 |         ),
75 |         Nx.tensor([
76 |           0.0,
77 |           0.9881857633590698,
78 |           7.997480392456055,
79 |           1.0217341184616089,
80 |           7.31151374111505e-7,
81 |           -0.4500003159046173,
82 |           5.083068370819092,
83 |           6.065662860870361,
84 |           -4.0382304191589355
85 |         ])
86 |       )
87 |     end
88 |   end
89 | end
90 | 


--------------------------------------------------------------------------------
/test/scholar/interpolation/linear_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule Scholar.Interpolation.LinearTest do
 2 |   use Scholar.Case, async: true
 3 |   alias Scholar.Interpolation.Linear
 4 |   doctest Linear
 5 | 
 6 |   describe "linear" do
 7 |     test "fit/2" do
 8 |       x = Nx.iota({4})
 9 |       y = Nx.tensor([0, 0, 1, 5])
10 | 
11 |       model = Linear.fit(x, y)
12 | 
13 |       assert model.coefficients ==
14 |                Nx.tensor([
15 |                  [0.0, 0.0],
16 |                  [1.0, -1.0],
17 |                  [4.0, -7.0]
18 |                ])
19 |     end
20 | 
21 |     test "input validation error cases" do
22 |       assert_raise ArgumentError,
23 |                    "expected x to be a tensor with shape {n}, where n > 1, got: {1, 1, 1}",
24 |                    fn ->
25 |                      Linear.fit(Nx.iota({1, 1, 1}), Nx.iota({1, 1, 1}))
26 |                    end
27 | 
28 |       assert_raise ArgumentError,
29 |                    "expected x to be a tensor with shape {n}, where n > 1, got: {}",
30 |                    fn ->
31 |                      Linear.fit(Nx.iota({}), Nx.iota({}))
32 |                    end
33 | 
34 |       assert_raise ArgumentError,
35 |                    "expected x to be a tensor with shape {n}, where n > 1, got: {1}",
36 |                    fn ->
37 |                      Linear.fit(Nx.iota({1}), Nx.iota({1}))
38 |                    end
39 | 
40 |       assert_raise ArgumentError, "expected y to have shape {4}, got: {3}", fn ->
41 |         Linear.fit(Nx.iota({4}), Nx.iota({3}))
42 |       end
43 |     end
44 | 
45 |     test "predict/2" do
46 |       x = Nx.iota({4})
47 |       y = Nx.tensor([0, 0, 1, 5])
48 | 
49 |       model = Linear.fit(x, y)
50 | 
51 |       assert Linear.predict(model, Nx.tensor([[[-0.5], [0.5], [1.5], [2.5], [3.5]]])) ==
52 |                Nx.tensor([[[0.0], [0.0], [0.5], [3], [7]]])
53 |     end
54 | 
55 |     test "with different types" do
56 |       x_s = Nx.tensor([1, 2, 3], type: :u64)
57 |       y_s = Nx.tensor([1.0, 2.0, 3.0], type: :f64)
58 |       target = Nx.tensor([1, 2], type: :u64)
59 | 
60 |       assert x_s
61 |              |> Scholar.Interpolation.Linear.fit(y_s)
62 |              |> Scholar.Interpolation.Linear.predict(target) ==
63 |                Nx.tensor([1.0, 2.0], type: :f64)
64 |     end
65 |   end
66 | end
67 | 


--------------------------------------------------------------------------------
/test/scholar/linear/bayesian_ridge_regression_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Linear.BayesianRidgeRegressionTest do
  2 |   import Nx.Defn
  3 |   use Scholar.Case, async: true
  4 |   alias Scholar.Linear.BayesianRidgeRegression
  5 |   alias Scholar.Linear.RidgeRegression
  6 |   doctest BayesianRidgeRegression
  7 | 
  8 |   test "toy bayesian ridge" do
  9 |     x = Nx.tensor([[1], [2], [6], [8], [10]])
 10 |     y = Nx.tensor([1, 2, 6, 8, 10])
 11 |     clf = BayesianRidgeRegression.fit(x, y)
 12 |     test = Nx.tensor([[1], [3], [4]])
 13 |     expected = Nx.tensor([1, 3, 4])
 14 |     predicted = BayesianRidgeRegression.predict(clf, test)
 15 |     assert_all_close(expected, predicted, atol: 1.0e-1)
 16 |   end
 17 | 
 18 |   test "toy bayesian ridge with column target" do
 19 |     x = Nx.tensor([[1], [2], [6], [8], [10]])
 20 |     y = Nx.tensor([1, 2, 6, 8, 10])
 21 |     model = BayesianRidgeRegression.fit(x, y)
 22 |     pred = BayesianRidgeRegression.predict(model, x)
 23 |     col_model = BayesianRidgeRegression.fit(x, y |> Nx.new_axis(-1))
 24 |     col_pred = BayesianRidgeRegression.predict(col_model, x)
 25 |     assert model == col_model
 26 |     assert pred == col_pred
 27 |   end
 28 | 
 29 |   test "2 column target raises" do
 30 |     x = Nx.tensor([[1], [2], [6], [8], [10]])
 31 |     y = Nx.tensor([1, 2, 6, 8, 10])
 32 |     y = Nx.new_axis(y, -1)
 33 |     y = Nx.concatenate([y, y], axis: 1)
 34 | 
 35 |     message =
 36 |       "Scholar.Linear.BayesianRidgeRegression expected y to have shape {n_samples}, got tensor with shape: #{inspect(Nx.shape(y))}"
 37 | 
 38 |     assert_raise ArgumentError,
 39 |                  message,
 40 |                  fn ->
 41 |                    BayesianRidgeRegression.fit(x, y)
 42 |                  end
 43 |   end
 44 | 
 45 |   test "ridge vs bayesian ridge: parameters" do
 46 |     x = Nx.tensor([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]])
 47 |     y = Nx.tensor([1, 2, 3, 2, 0, 4, 5])
 48 |     brr = BayesianRidgeRegression.fit(x, y)
 49 |     rr = RidgeRegression.fit(x, y, alpha: Nx.to_number(brr.lambda) / Nx.to_number(brr.alpha))
 50 |     assert_all_close(brr.coefficients, rr.coefficients, atol: 1.0e-2)
 51 |     assert_all_close(brr.intercept, rr.intercept, atol: 1.0e-2)
 52 |   end
 53 | 
 54 |   test "ridge vs bayesian ridge: weights" do
 55 |     x = Nx.tensor([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]])
 56 |     y = Nx.tensor([1, 2, 3, 2, 0, 4, 5])
 57 |     w = Nx.tensor([4, 3, 3, 1, 1, 2, 3])
 58 |     brr = BayesianRidgeRegression.fit(x, y, sample_weights: w)
 59 | 
 60 |     rr =
 61 |       RidgeRegression.fit(x, y,
 62 |         alpha: Nx.to_number(brr.lambda) / Nx.to_number(brr.alpha),
 63 |         sample_weights: w
 64 |       )
 65 | 
 66 |     assert_all_close(brr.coefficients, rr.coefficients, atol: 1.0e-2)
 67 |     assert_all_close(brr.intercept, rr.intercept, atol: 1.0e-2)
 68 |   end
 69 | 
 70 |   test "compute scores" do
 71 |     {x, y} = diabetes_data()
 72 |     eps = Nx.Constants.smallest_positive_normal(:f64)
 73 |     alpha = Nx.divide(1, Nx.add(Nx.variance(x), eps))
 74 |     lambda = 1.0
 75 |     alpha_1 = 0.1
 76 |     alpha_2 = 0.1
 77 |     lambda_1 = 0.1
 78 |     lambda_2 = 0.1
 79 |     # compute score
 80 |     score = compute_score(x, y, alpha, lambda, alpha_1, alpha_2, lambda_1, lambda_2)
 81 | 
 82 |     brr =
 83 |       BayesianRidgeRegression.fit(x, Nx.flatten(y),
 84 |         alpha_1: alpha_1,
 85 |         alpha_2: alpha_2,
 86 |         lambda_1: lambda_1,
 87 |         lambda_2: lambda_2,
 88 |         fit_intercept?: true,
 89 |         compute_scores?: true,
 90 |         iterations: 1
 91 |       )
 92 | 
 93 |     first_score = brr.scores[0]
 94 |     assert_all_close(score, first_score, rtol: 0.05)
 95 |   end
 96 | 
 97 |   defnp compute_score(x, y, alpha, lambda, alpha_1, alpha_2, lambda_1, lambda_2) do
 98 |     {n_samples, _} = Nx.shape(x)
 99 |     lambda_score = lambda_1 * Nx.log(lambda) - lambda_2 * lambda
100 |     alpha_score = alpha_1 * Nx.log(alpha) - alpha_2 * alpha
101 |     m = 1.0 / alpha * Nx.eye(n_samples) + 1.0 / lambda * Nx.dot(x, [-1], x, [-1])
102 |     m_inv_dot_y = Nx.LinAlg.solve(m, y)
103 |     logdet = m |> Nx.LinAlg.determinant() |> Nx.log()
104 | 
105 |     y_score =
106 |       -0.5 *
107 |         (logdet + Nx.dot(y, [0], m_inv_dot_y, [0]) + n_samples * Nx.log(2 * Nx.Constants.pi()))
108 | 
109 |     alpha_score + lambda_score + y_score
110 |   end
111 | 
112 |   test "constant inputs: prediction. n_features > n_samples" do
113 |     key = Nx.Random.key(42)
114 |     n_samples = 4
115 |     n_features = 5
116 |     {constant_value, new_key} = Nx.Random.uniform(key)
117 |     {x, _} = Nx.Random.uniform(new_key, shape: {n_samples, n_features}, type: :f64)
118 |     y = Nx.broadcast(constant_value, {n_samples})
119 |     expected = Nx.broadcast(constant_value, {n_samples})
120 |     brr = BayesianRidgeRegression.fit(x, y)
121 |     predicted = BayesianRidgeRegression.predict(brr, x)
122 |     assert_all_close(expected, predicted, atol: 0.01)
123 |   end
124 | 
125 |   test "constant inputs: variance is constant" do
126 |     key = Nx.Random.key(42)
127 |     n_samples = 15
128 |     n_features = 10
129 |     {constant_value, new_key} = Nx.Random.uniform(key)
130 |     {x, _} = Nx.Random.uniform(new_key, shape: {n_samples, n_features}, type: :f64)
131 |     y = Nx.broadcast(constant_value, {n_samples})
132 |     brr = BayesianRidgeRegression.fit(x, y)
133 |     check = Nx.less_equal(brr.sigma, 0.01)
134 |     assert Nx.all(check) == Nx.u8(1)
135 |   end
136 | end
137 | 


--------------------------------------------------------------------------------
/test/scholar/linear/logistic_regression_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Linear.LogisticRegressionTest do
  2 |   use Scholar.Case, async: true
  3 |   alias Scholar.Linear.LogisticRegression
  4 |   doctest LogisticRegression
  5 | 
  6 |   test "Iris Data Set - multinomial logistic regression test" do
  7 |     {x_train, x_test, y_train, y_test} = iris_data()
  8 | 
  9 |     model = LogisticRegression.fit(x_train, y_train, num_classes: 3)
 10 |     res = LogisticRegression.predict(model, x_test)
 11 |     accuracy = Scholar.Metrics.Classification.accuracy(res, y_test)
 12 | 
 13 |     assert Nx.greater_equal(accuracy, 0.96) == Nx.u8(1)
 14 |   end
 15 | 
 16 |   describe "errors" do
 17 |     test "when :num_classes is invalid" do
 18 |       x = Nx.tensor([[1, 2], [3, 4]])
 19 |       y = Nx.tensor([1, 2])
 20 | 
 21 |       assert_raise NimbleOptions.ValidationError,
 22 |                    "invalid value for :num_classes option: expected positive integer, got: -3",
 23 |                    fn ->
 24 |                      LogisticRegression.fit(x, y, num_classes: -3)
 25 |                    end
 26 | 
 27 |       assert_raise NimbleOptions.ValidationError,
 28 |                    "invalid value for :num_classes option: expected positive integer, got: 2.0",
 29 |                    fn ->
 30 |                      LogisticRegression.fit(x, y, num_classes: 2.0)
 31 |                    end
 32 |     end
 33 | 
 34 |     test "when missing :num_classes option" do
 35 |       x = Nx.tensor([[1, 2], [3, 4]])
 36 |       y = Nx.tensor([0, 1])
 37 | 
 38 |       assert_raise NimbleOptions.ValidationError,
 39 |                    "required :num_classes option not found, received options: []",
 40 |                    fn -> LogisticRegression.fit(x, y) end
 41 |     end
 42 | 
 43 |     test "when :optimizer is invalid" do
 44 |       x = Nx.tensor([[1, 2], [3, 4]])
 45 |       y = Nx.tensor([1, 2])
 46 | 
 47 |       assert_raise NimbleOptions.ValidationError,
 48 |                    "invalid value for :optimizer option: expected :optimizer to be either a valid 0-arity function in Polaris.Optimizers or a valid {init_fn, update_fn} tuple",
 49 |                    fn ->
 50 |                      LogisticRegression.fit(x, y,
 51 |                        num_classes: 2,
 52 |                        optimizer: :invalid_optimizer
 53 |                      )
 54 |                    end
 55 |     end
 56 | 
 57 |     test "when :iterations is not a positive integer" do
 58 |       x = Nx.tensor([[1, 2], [3, 4]])
 59 |       y = Nx.tensor([1, 2])
 60 | 
 61 |       assert_raise NimbleOptions.ValidationError,
 62 |                    "invalid value for :iterations option: expected positive integer, got: 0",
 63 |                    fn ->
 64 |                      LogisticRegression.fit(x, y, num_classes: 2, iterations: 0)
 65 |                    end
 66 |     end
 67 | 
 68 |     test "when training vector size is invalid" do
 69 |       x = Nx.tensor([5, 6])
 70 |       y = Nx.tensor([1, 2])
 71 | 
 72 |       assert_raise ArgumentError,
 73 |                    "expected x to have shape {n_samples, n_features}, got tensor with shape: {2}",
 74 |                    fn -> LogisticRegression.fit(x, y, num_classes: 2) end
 75 |     end
 76 | 
 77 |     test "when target vector size is invalid" do
 78 |       x = Nx.tensor([[1, 2], [3, 4]])
 79 |       y = Nx.tensor([[0, 1], [1, 0]])
 80 | 
 81 |       assert_raise ArgumentError,
 82 |                    "Scholar.Linear.LogisticRegression expected y to have shape {n_samples}, got tensor with shape: {2, 2}",
 83 |                    fn -> LogisticRegression.fit(x, y, num_classes: 2) end
 84 |     end
 85 |   end
 86 | 
 87 |   describe "column target tests" do
 88 |     @tag :wip
 89 |     test "column target" do
 90 |       {x_train, _, y_train, _} = iris_data()
 91 | 
 92 |       model = LogisticRegression.fit(x_train, y_train, num_classes: 3)
 93 |       pred = LogisticRegression.predict(model, x_train)
 94 |       col_model = LogisticRegression.fit(x_train, y_train |> Nx.new_axis(-1), num_classes: 3)
 95 |       col_pred = LogisticRegression.predict(col_model, x_train)
 96 |       assert model == col_model
 97 |       assert pred == col_pred
 98 |     end
 99 |   end
100 | end
101 | 


--------------------------------------------------------------------------------
/test/scholar/linear/svm_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule Scholar.Linear.SVMTest do
 2 |   use Scholar.Case, async: true
 3 |   alias Scholar.Linear.SVM
 4 |   doctest SVM
 5 | 
 6 |   test "Iris Data Set - multinomial classification svm test" do
 7 |     {x_train, x_test, y_train, y_test} = iris_data()
 8 | 
 9 |     loss_fn = fn y_pred, y_true ->
10 |       Scholar.Linear.SVM.hinge_loss(y_pred, y_true, c: 1.0, margin: 150)
11 |     end
12 | 
13 |     model = SVM.fit(x_train, y_train, num_classes: 3, loss_fn: loss_fn)
14 |     res = SVM.predict(model, x_test)
15 | 
16 |     accuracy = Scholar.Metrics.Classification.accuracy(res, y_test)
17 | 
18 |     assert Nx.greater_equal(accuracy, 0.96) == Nx.u8(1)
19 |   end
20 | 
21 |   test "test column target" do
22 |     {x_train, x_test, y_train, y_test} = iris_data()
23 | 
24 |     loss_fn = fn y_pred, y_true ->
25 |       Scholar.Linear.SVM.hinge_loss(y_pred, y_true, c: 1.0, margin: 150)
26 |     end
27 | 
28 |     col_model = SVM.fit(x_train, y_train |> Nx.new_axis(-1), num_classes: 3, loss_fn: loss_fn)
29 |     res = SVM.predict(col_model, x_test)
30 | 
31 |     model = SVM.fit(x_train, y_train, num_classes: 3, loss_fn: loss_fn)
32 | 
33 |     accuracy = Scholar.Metrics.Classification.accuracy(res, y_test)
34 | 
35 |     assert Nx.greater_equal(accuracy, 0.96) == Nx.u8(1)
36 |     assert model == col_model
37 |   end
38 | 
39 |   test "test fit 2 columned y data" do
40 |     {x_train, _, y_train, _} = iris_data()
41 | 
42 |     loss_fn = fn y_pred, y_true ->
43 |       Scholar.Linear.SVM.hinge_loss(y_pred, y_true, c: 1.0, margin: 150)
44 |     end
45 | 
46 |     y_train = Nx.new_axis(y_train, -1)
47 | 
48 |     y_train =
49 |       Nx.concatenate([y_train, y_train], axis: 1)
50 | 
51 |     message =
52 |       "Scholar.Linear.SVM expected y to have shape {n_samples}, got tensor with shape: #{inspect(Nx.shape(y_train))}"
53 | 
54 |     assert_raise ArgumentError,
55 |                  message,
56 |                  fn -> SVM.fit(x_train, y_train, num_classes: 3, loss_fn: loss_fn) end
57 |   end
58 | end
59 | 


--------------------------------------------------------------------------------
/test/scholar/metrics/classification_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule Scholar.Metrics.ClassificationTest do
 2 |   use Scholar.Case, async: true
 3 | 
 4 |   alias Scholar.Metrics.Classification
 5 |   doctest Classification
 6 | 
 7 |   test "roc_curve - y_score with repeated elements" do
 8 |     y_score = Nx.tensor([0.1, 0.1, 0.2, 0.2, 0.3, 0.3])
 9 |     y_true = Nx.tensor([0, 0, 1, 1, 1, 1])
10 |     distinct_value_indices = Classification.distinct_value_indices(y_score)
11 | 
12 |     {fpr, tpr, thresholds} = Classification.roc_curve(y_true, y_score, distinct_value_indices)
13 |     assert_all_close(fpr, Nx.tensor([0.0, 0.0, 0.0, 1.0]))
14 |     assert_all_close(tpr, Nx.tensor([0.0, 0.5, 1.0, 1.0]))
15 |     assert_all_close(thresholds, Nx.tensor([1.3, 0.3, 0.2, 0.1]))
16 |   end
17 | 
18 |   describe "fbeta_score" do
19 |     test "equals recall when beta is infinity" do
20 |       beta = Nx.tensor(:infinity)
21 |       y_true = Nx.tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], type: :u32)
22 |       y_pred = Nx.tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], type: :u32)
23 |       fbeta_scores = Classification.fbeta_score(y_true, y_pred, beta, num_classes: 2)
24 | 
25 |       assert_all_close(fbeta_scores, Classification.recall(y_true, y_pred, num_classes: 2))
26 |     end
27 | 
28 |     test "equals precision when beta is 0" do
29 |       beta = 0
30 |       y_true = Nx.tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], type: :u32)
31 |       y_pred = Nx.tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], type: :u32)
32 |       fbeta_scores = Classification.fbeta_score(y_true, y_pred, beta, num_classes: 2)
33 | 
34 |       assert_all_close(fbeta_scores, Classification.precision(y_true, y_pred, num_classes: 2))
35 |     end
36 |   end
37 | 
38 |   describe "mcc/2" do
39 |     test "returns 1 for perfect predictions" do
40 |       y_true = Nx.tensor([1, 0, 1, 0, 1])
41 |       y_pred = Nx.tensor([1, 0, 1, 0, 1])
42 |       assert Classification.mcc(y_true, y_pred) == Nx.tensor([1.0], type: :f32)
43 |     end
44 | 
45 |     test "returns -1 for completely wrong predictions" do
46 |       y_true = Nx.tensor([1, 0, 1, 0, 1])
47 |       y_pred = Nx.tensor([0, 1, 0, 1, 0])
48 |       assert Classification.mcc(y_true, y_pred) == Nx.tensor([-1.0], type: :f32)
49 |     end
50 | 
51 |     test "returns 0 when all predictions are positive" do
52 |       y_true = Nx.tensor([1, 0, 1, 0, 1])
53 |       y_pred = Nx.tensor([1, 1, 1, 1, 1])
54 |       assert Classification.mcc(y_true, y_pred) == Nx.tensor([0.0], type: :f32)
55 |     end
56 | 
57 |     test "returns 0 when all predictions are negative" do
58 |       y_true = Nx.tensor([1, 0, 1, 0, 1])
59 |       y_pred = Nx.tensor([0, 0, 0, 0, 0])
60 |       assert Classification.mcc(y_true, y_pred) == Nx.tensor([0.0], type: :f32)
61 |     end
62 | 
63 |     test "computes MCC for generic case" do
64 |       y_true = Nx.tensor([1, 0, 1, 0, 1])
65 |       y_pred = Nx.tensor([1, 0, 1, 1, 1])
66 |       assert Classification.mcc(y_true, y_pred) == Nx.tensor([0.6123723983764648], type: :f32)
67 |     end
68 | 
69 |     test "returns 0 when TP, TN, FP, and FN are all 0" do
70 |       y_true = Nx.tensor([0, 0, 0, 0, 0])
71 |       y_pred = Nx.tensor([0, 0, 0, 0, 0])
72 |       assert Classification.mcc(y_true, y_pred) == Nx.tensor([0.0], type: :f32)
73 |     end
74 |   end
75 | end
76 | 


--------------------------------------------------------------------------------
/test/scholar/metrics/clustering_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Metrics.ClusteringTest do
  2 |   use Scholar.Case, async: true
  3 |   alias Scholar.Metrics.Clustering
  4 |   doctest Clustering
  5 | 
  6 |   defp x do
  7 |     Nx.tensor([
  8 |       [0.8157184486105233, -0.01635270455208079, -0.18865907479231095],
  9 |       [-0.23329832770253972, -1.0670578022770247, -1.4104723030890731],
 10 |       [-1.7011914621798259, -0.5470603319369067, 0.6322584168194634],
 11 |       [0.9425204058473541, -1.037556635542662, -0.21666787618406214],
 12 |       [-0.3690472295854992, 0.09486339114365348, 0.6961098873520952],
 13 |       [-1.6771195334192937, -1.2952988072907938, 0.08604298953131055],
 14 |       [-0.15949996798407284, 0.6000955647811537, 0.8663542127627641],
 15 |       [-1.7895457106619646, -0.1754378695049237, -0.365205321101012],
 16 |       [-1.394493075734099, 0.07372952442240266, 0.050092862319408904],
 17 |       [0.7307430281513498, -0.9238089730338562, -1.1574739042650035],
 18 |       [0.8915021818008122, 0.3179163863864429, -0.08149978809223538],
 19 |       [-3.168762672235742, -0.9546695365525117, -0.9908465332175033],
 20 |       [1.4631690777205948, -0.4670129058849814, 0.6461228097855395],
 21 |       [0.5622446765584268, -1.3052125865380872, 2.1498777623693526],
 22 |       [1.520536826767848, 0.13949842485542288, 0.6555116085852244],
 23 |       [0.07897976746203501, -0.6255090568811114, 0.5807450128292253],
 24 |       [0.9392107340484556, -0.3814452707719886, 0.30010654905670114],
 25 |       [2.2255470282884557, 0.6745085423783157, -1.6413094715372527],
 26 |       [-1.1522920682636968, 1.6005950153494766, -0.3385979516219887],
 27 |       [0.391502390567214, -0.25562956508471424, 0.12315775252511421]
 28 |     ])
 29 |   end
 30 | 
 31 |   defp y do
 32 |     Nx.tensor([2, 2, 2, 2, 0, 2, 1, 1, 2, 2, 2, 2, 2, 1, 1, 0, 1, 2, 1, 2])
 33 |   end
 34 | 
 35 |   defp y_one do
 36 |     Nx.tensor([2, 2, 2, 2, 0, 2, 1, 3, 2, 2, 2, 2, 2, 1, 4, 0, 1, 2, 1, 2])
 37 |   end
 38 | 
 39 |   test "silhouette_score test" do
 40 |     expected = Nx.tensor(-0.17424359382143964)
 41 | 
 42 |     assert_all_close(Clustering.silhouette_score(x(), y(), num_clusters: 3), expected)
 43 |   end
 44 | 
 45 |   test "silhouette_samples test" do
 46 |     expected =
 47 |       Nx.tensor([
 48 |         -0.2700417935848236,
 49 |         0.014513458125293255,
 50 |         -0.36026713252067566,
 51 |         -0.2003783881664276,
 52 |         0.4819222092628479,
 53 |         -0.22008605301380157,
 54 |         -0.526187002658844,
 55 |         -0.2967069745063782,
 56 |         -0.3657914698123932,
 57 |         0.019084271043539047,
 58 |         -0.27950319647789,
 59 |         -0.03889584541320801,
 60 |         -0.3125722408294678,
 61 |         -0.3157929480075836,
 62 |         -0.23939572274684906,
 63 |         0.5096476674079895,
 64 |         -0.4214971363544464,
 65 |         0.03587301820516586,
 66 |         -0.16732777655124664,
 67 |         -0.5314690470695496
 68 |       ])
 69 | 
 70 |     assert_all_close(Clustering.silhouette_samples(x(), y(), num_clusters: 3), expected)
 71 |   end
 72 | 
 73 |   test "silhouette_score test with one-element clusters" do
 74 |     expected = Nx.tensor(-0.298110549250206)
 75 | 
 76 |     assert_all_close(Clustering.silhouette_score(x(), y_one(), num_clusters: 5), expected)
 77 |   end
 78 | 
 79 |   test "silhouette_samples test with one-element clusters" do
 80 |     expected =
 81 |       Nx.tensor([
 82 |         -0.4016384184360504,
 83 |         -0.05811696499586105,
 84 |         -0.5811521410942078,
 85 |         -0.2139229029417038,
 86 |         0.45039641857147217,
 87 |         -0.5208510756492615,
 88 |         -0.5239397287368774,
 89 |         0.0,
 90 |         -0.7291823625564575,
 91 |         0.019084271043539047,
 92 |         -0.5117945671081543,
 93 |         -0.536014974117279,
 94 |         -0.7470640540122986,
 95 |         -0.3105389475822449,
 96 |         0.0,
 97 |         0.4759448170661926,
 98 |         -0.6119896173477173,
 99 |         -0.2591235637664795,
100 |         -0.37083834409713745,
101 |         -0.5314690470695496
102 |       ])
103 | 
104 |     assert_all_close(Clustering.silhouette_samples(x(), y_one(), num_clusters: 5), expected)
105 |   end
106 | end
107 | 


--------------------------------------------------------------------------------
/test/scholar/metrics/distance_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule Scholar.Metrics.DistanceTest do
 2 |   use Scholar.Case, async: true
 3 |   alias Scholar.Metrics.Distance
 4 |   doctest Distance
 5 | 
 6 |   defp x do
 7 |     Nx.tensor([
 8 |       -0.165435329079628,
 9 |       -1.0515050888061523,
10 |       1.8801462650299072,
11 |       0.2381746470928192,
12 |       0.6978269219398499,
13 |       0.025831177830696106,
14 |       0.11569870263338089,
15 |       -0.6905220150947571,
16 |       -0.9335482120513916,
17 |       -0.025539811700582504
18 |     ])
19 |   end
20 | 
21 |   defp y do
22 |     Nx.tensor([
23 |       0.5898482203483582,
24 |       -0.5769372582435608,
25 |       1.43277108669281,
26 |       -0.024414867162704468,
27 |       -1.3458243608474731,
28 |       1.669877052307129,
29 |       0.6263275742530823,
30 |       0.8154261708259583,
31 |       0.06888432800769806,
32 |       0.022759810090065002
33 |     ])
34 |   end
35 | 
36 |   test "euclidean matches scipy" do
37 |     assert_all_close(Distance.euclidean(x(), y()), Nx.tensor(3.388213202573845))
38 |   end
39 | 
40 |   test "squared euclidean matches scipy" do
41 |     assert_all_close(Distance.squared_euclidean(x(), y()), Nx.tensor(11.479988706095714))
42 |   end
43 | 
44 |   test "manhattan matches scipy" do
45 |     assert_all_close(Distance.manhattan(x(), y()), Nx.tensor(8.694822449237108))
46 |   end
47 | 
48 |   test "chebyshev matches scipy" do
49 |     assert_all_close(Distance.chebyshev(x(), y()), Nx.tensor(2.043651282787323))
50 |   end
51 | 
52 |   test "minkowski matches scipy" do
53 |     assert_all_close(Distance.minkowski(x(), y()), Nx.tensor(3.388213202573845))
54 |   end
55 | 
56 |   test "minkowski with p set to :infinity matches chebyshev" do
57 |     assert_all_close(Distance.minkowski(x(), y(), p: :infinity), Nx.tensor(2.043651282787323))
58 |   end
59 | 
60 |   test "cosine matches scipy" do
61 |     assert_all_close(Distance.cosine(x(), y()), Nx.tensor(0.7650632810164779))
62 |   end
63 | 
64 |   test "hamming matches scipy" do
65 |     assert Distance.hamming(Nx.tensor([1, 0, 0]), Nx.tensor([0, 1, 0])) ==
66 |              Nx.tensor(0.6666666865348816)
67 | 
68 |     assert Distance.hamming(Nx.tensor([1, 0, 0]), Nx.tensor([1, 1, 0])) ==
69 |              Nx.tensor(0.3333333432674408)
70 | 
71 |     assert Distance.hamming(Nx.tensor([1, 0, 0]), Nx.tensor([2, 0, 0])) ==
72 |              Nx.tensor(0.3333333432674408)
73 | 
74 |     assert Distance.hamming(Nx.tensor([1, 0, 0]), Nx.tensor([3, 0, 0])) ==
75 |              Nx.tensor(0.3333333432674408)
76 |   end
77 | end
78 | 


--------------------------------------------------------------------------------
/test/scholar/metrics/neighbors_test.exs:
--------------------------------------------------------------------------------
1 | defmodule Scholar.Metrics.NeighborsTest do
2 |   use ExUnit.Case, async: true
3 |   alias Scholar.Metrics.Neighbors
4 |   doctest Neighbors
5 | end
6 | 


--------------------------------------------------------------------------------
/test/scholar/metrics/ranking_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule Scholar.Metrics.RankingTest do
 2 |   use Scholar.Case, async: true
 3 |   alias Scholar.Metrics.Ranking
 4 | 
 5 |   describe "dcg/3" do
 6 |     test "computes DCG when there are no ties" do
 7 |       y_true = Nx.tensor([3, 2, 3, 0, 1, 2])
 8 |       y_score = Nx.tensor([3.0, 2.2, 3.5, 0.5, 1.0, 2.1])
 9 | 
10 |       result = Ranking.dcg(y_true, y_score)
11 | 
12 |       x = Nx.tensor([7.140995025634766])
13 |       assert x == Nx.broadcast(result, {1})
14 |     end
15 | 
16 |     test "computes DCG with ties" do
17 |       y_true = Nx.tensor([3, 3, 3])
18 |       y_score = Nx.tensor([2.0, 2.0, 3.5])
19 | 
20 |       result = Ranking.dcg(y_true, y_score)
21 | 
22 |       x = Nx.tensor([6.3927892607143715])
23 |       assert x == Nx.broadcast(result, {1})
24 |     end
25 | 
26 |     test "raises error when shapes mismatch" do
27 |       y_true = Nx.tensor([3, 2, 3])
28 |       y_score = Nx.tensor([3.0, 2.2, 3.5, 0.5])
29 | 
30 |       assert_raise ArgumentError,
31 |                    "expected tensor to have shape {3}, got tensor with shape {4}",
32 |                    fn ->
33 |                      Ranking.dcg(y_true, y_score)
34 |                    end
35 |     end
36 | 
37 |     test "computes DCG for top-k values" do
38 |       y_true = Nx.tensor([3, 2, 3, 0, 1, 2])
39 |       y_score = Nx.tensor([3.0, 2.2, 3.5, 0.5, 1.0, 2.1])
40 | 
41 |       result = Ranking.dcg(y_true, y_score, k: 3)
42 | 
43 |       x = Nx.tensor([5.892789363861084])
44 |       assert x == Nx.broadcast(result, {1})
45 |     end
46 |   end
47 | end
48 | 


--------------------------------------------------------------------------------
/test/scholar/metrics/regression_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Metrics.RegressionTest do
  2 |   use Scholar.Case, async: true
  3 | 
  4 |   alias Scholar.Metrics.Regression
  5 |   doctest Regression
  6 | 
  7 |   describe "mean_tweedie_deviance!/3" do
  8 |     test "raise when y_pred <= 0 and power < 0" do
  9 |       power = -1
 10 |       y_true = Nx.tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], type: :u32)
 11 |       y_pred = Nx.tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 0], type: :u32)
 12 | 
 13 |       assert_raise RuntimeError, ~r/mean Tweedie deviance/, fn ->
 14 |         Regression.mean_tweedie_deviance!(y_true, y_pred, power)
 15 |       end
 16 |     end
 17 | 
 18 |     test "raise when y_pred <= 0 and 1 <= power < 2" do
 19 |       power = 1
 20 |       y_true = Nx.tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], type: :u32)
 21 |       y_pred = Nx.tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 0], type: :u32)
 22 | 
 23 |       assert_raise RuntimeError, ~r/mean Tweedie deviance/, fn ->
 24 |         Regression.mean_tweedie_deviance!(y_true, y_pred, power)
 25 |       end
 26 |     end
 27 | 
 28 |     test "raise when y_pred <= 0 and power >= 2" do
 29 |       power = 2
 30 |       y_true = Nx.tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], type: :u32)
 31 |       y_pred = Nx.tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 0], type: :u32)
 32 | 
 33 |       assert_raise RuntimeError, ~r/mean Tweedie deviance/, fn ->
 34 |         Regression.mean_tweedie_deviance!(y_true, y_pred, power)
 35 |       end
 36 |     end
 37 | 
 38 |     test "raise when y_true < 0 and 1 <= power < 2" do
 39 |       power = 1
 40 |       y_true = Nx.tensor([-1, 0, 0, 0, 0, 1, 1, 1, 1, 1], type: :s32)
 41 |       y_pred = Nx.tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], type: :s32)
 42 | 
 43 |       assert_raise RuntimeError, ~r/mean Tweedie deviance/, fn ->
 44 |         Regression.mean_tweedie_deviance!(y_true, y_pred, power)
 45 |       end
 46 |     end
 47 | 
 48 |     test "raise when y_true <= 0 and power >= 2" do
 49 |       power = 2
 50 |       y_true = Nx.tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], type: :s32)
 51 |       y_pred = Nx.tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], type: :s32)
 52 | 
 53 |       assert_raise RuntimeError, ~r/mean Tweedie deviance/, fn ->
 54 |         Regression.mean_tweedie_deviance!(y_true, y_pred, power)
 55 |       end
 56 |     end
 57 |   end
 58 | 
 59 |   describe "d2_tweedie_score/3" do
 60 |     test "equal R^2 when power is 0" do
 61 |       y_true = Nx.tensor([1, 1, 1, 1, 1, 2, 2, 1, 3, 1], type: :u32)
 62 |       y_pred = Nx.tensor([2, 2, 1, 1, 2, 2, 2, 1, 3, 1], type: :u32)
 63 |       d2 = Regression.d2_tweedie_score(y_true, y_pred, 0)
 64 |       r2 = Regression.r2_score(y_true, y_pred)
 65 | 
 66 |       assert Nx.equal(d2, r2)
 67 |     end
 68 |   end
 69 | 
 70 |   describe "mean_pinball_loss/3" do
 71 |     test "mean_pinball_loss cases from sklearn" do
 72 |       # Test cases copied from sklearn:
 73 |       # https://github.com/scikit-learn/scikit-learn/blob/128e40ed593c57e8b9e57a4109928d58fa8bf359/sklearn/metrics/tests/test_regression.py#L49      
 74 | 
 75 |       y_true = Nx.linspace(1, 50, n: 50)
 76 |       y_pred = Nx.add(y_true, 1)
 77 |       y_pred_2 = Nx.add(y_true, -1)
 78 | 
 79 |       assert Regression.mean_pinball_loss(y_true, y_pred) == Nx.tensor(0.5)
 80 |       assert Regression.mean_pinball_loss(y_true, y_pred_2) == Nx.tensor(0.5)
 81 |       assert Regression.mean_pinball_loss(y_true, y_pred, alpha: 0.4) == Nx.tensor(0.6)
 82 |       assert Regression.mean_pinball_loss(y_true, y_pred_2, alpha: 0.4) == Nx.tensor(0.4)
 83 |     end
 84 | 
 85 |     test "mean_pinball_loss with axes" do
 86 |       y_true = Nx.tensor([[1, 0, 0, 1], [0, 1, 1, 1], [1, 1, 0, 1]])
 87 |       y_pred = Nx.tensor([[0, 0, 0, 1], [1, 0, 1, 1], [0, 0, 0, 1]])
 88 | 
 89 |       sample_weight =
 90 |         Nx.tensor([[0.5, 0.5, 0.5, 1.5], [1.5, 0.5, 1.5, 1.5], [1.5, 1.5, 1.5, 1.5]])
 91 | 
 92 |       expected_error = Nx.tensor((1 + 2 / 3) / 8)
 93 |       expected_raw_values_tensor = Nx.tensor([0.5, 0.33333333, 0.0, 0.0])
 94 |       expected_raw_values_weighted_tensor = Nx.tensor([0.5, 0.4, 0.0, 0.0])
 95 | 
 96 |       mpbl = Regression.mean_pinball_loss(y_true, y_pred)
 97 |       assert_all_close(mpbl, expected_error)
 98 |       ## this assertion yields false due to precision error
 99 |       mpbl =
100 |         Regression.mean_pinball_loss(
101 |           y_true,
102 |           y_pred,
103 |           alpha: 0.5
104 |         )
105 | 
106 |       assert_all_close(mpbl, expected_error)
107 |       mpbl = Regression.mean_pinball_loss(y_true, y_pred, alpha: 0.5, axes: [0])
108 |       assert_all_close(mpbl, expected_raw_values_tensor)
109 | 
110 |       mpbl =
111 |         Regression.mean_pinball_loss(y_true, y_pred,
112 |           alpha: 0.5,
113 |           sample_weights: sample_weight,
114 |           axes: [0]
115 |         )
116 | 
117 |       assert_all_close(mpbl, expected_raw_values_weighted_tensor)
118 | 
119 |       mpbl =
120 |         Regression.mean_pinball_loss(y_true, y_pred,
121 |           alpha: 0.5,
122 |           sample_weights: sample_weight
123 |         )
124 | 
125 |       assert_all_close(mpbl, Nx.tensor(0.225))
126 |     end
127 |   end
128 | end
129 | 


--------------------------------------------------------------------------------
/test/scholar/metrics/similarity_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Metrics.SimilarityTest do
  2 |   use Scholar.Case, async: true
  3 |   alias Scholar.Metrics.Similarity
  4 |   doctest Similarity
  5 | 
  6 |   describe "jaccard/2" do
  7 |     test "returns similarity according to sklearn jaccard_score function" do
  8 |       x = Nx.tensor([1, 2, 3, 5, 0])
  9 |       y = Nx.tensor([1, 30, 4, 8, 9])
 10 | 
 11 |       assert Similarity.jaccard(x, y) == Nx.tensor(1 / 9)
 12 |     end
 13 | 
 14 |     test "returns 100% of similarity" do
 15 |       x = Nx.tensor([1, 2, 3])
 16 |       y = Nx.tensor([1, 2, 3])
 17 | 
 18 |       assert Similarity.jaccard(x, y) == Nx.tensor(1.0)
 19 |     end
 20 | 
 21 |     test "returns 0% of similarity" do
 22 |       x = Nx.tensor([1, 2, 3])
 23 |       y = Nx.tensor([4, 5, 6])
 24 | 
 25 |       assert Similarity.jaccard(x, y) == Nx.tensor(0.0)
 26 |     end
 27 | 
 28 |     test "returns 20% of similarity" do
 29 |       x = Nx.tensor([1, 2, 3])
 30 |       y = Nx.tensor([3, 4, 5])
 31 | 
 32 |       assert Similarity.jaccard(x, y) == Nx.tensor(0.20)
 33 |     end
 34 | 
 35 |     test "returns similarity when tensors have a single element" do
 36 |       x = Nx.tensor([1])
 37 |       y = Nx.tensor([2])
 38 | 
 39 |       assert Similarity.jaccard(x, y) == Nx.tensor(0.0)
 40 |     end
 41 | 
 42 |     test "returns similarity when tensor has multiple dimensions" do
 43 |       x = Nx.tensor([[0, 1, 1], [1, 1, 0]])
 44 |       y = Nx.tensor([[1, 1, 1], [1, 0, 0]])
 45 | 
 46 |       assert Similarity.jaccard(x, y) == Nx.tensor(1.0)
 47 |     end
 48 |   end
 49 | 
 50 |   describe "binary_jaccard/2" do
 51 |     test "returns similarity according to sklearn jaccard_score function" do
 52 |       x = Nx.tensor([1, 0, 0, 1, 1, 1])
 53 |       y = Nx.tensor([0, 0, 1, 1, 1, 0])
 54 | 
 55 |       assert Similarity.binary_jaccard(x, y) == Nx.tensor(0.4)
 56 |     end
 57 | 
 58 |     test "returns 100% of similarity" do
 59 |       x = Nx.tensor([1, 0, 1])
 60 |       y = Nx.tensor([1, 0, 1])
 61 | 
 62 |       assert Similarity.binary_jaccard(x, y) == Nx.tensor(1.0)
 63 |     end
 64 | 
 65 |     test "returns 0% of similarity" do
 66 |       x = Nx.tensor([1, 1, 1])
 67 |       y = Nx.tensor([0, 0, 0])
 68 | 
 69 |       assert Similarity.binary_jaccard(x, y) == Nx.tensor(0.0)
 70 |     end
 71 | 
 72 |     test "returns 20% of similarity" do
 73 |       x = Nx.tensor([1, 0, 1, 0, 1])
 74 |       y = Nx.tensor([0, 1, 1, 1, 0])
 75 | 
 76 |       assert Similarity.binary_jaccard(x, y) == Nx.tensor(0.2)
 77 |     end
 78 | 
 79 |     test "returns similarity when tensors have a single element" do
 80 |       x = Nx.tensor([1])
 81 |       y = Nx.tensor([1])
 82 | 
 83 |       assert Similarity.binary_jaccard(x, y) == Nx.tensor(1.0)
 84 |     end
 85 | 
 86 |     test "returns similarity when tensors have scalars" do
 87 |       x = Nx.tensor(1)
 88 |       y = Nx.tensor(0)
 89 | 
 90 |       assert Similarity.binary_jaccard(x, y) == Nx.tensor(0.0)
 91 |     end
 92 | 
 93 |     test "returns similarity when tensor has multiple dimensions" do
 94 |       x = Nx.tensor([[0, 1, 1], [1, 1, 1]])
 95 |       y = Nx.tensor([[1, 1, 1], [1, 1, 1]])
 96 | 
 97 |       assert Similarity.binary_jaccard(x, y) == Nx.tensor(5 / 6)
 98 |     end
 99 | 
100 |     test "raises exception when tensors have different shapes" do
101 |       x = Nx.tensor([1, 1, 0])
102 |       y = Nx.tensor([1, 0])
103 | 
104 |       assert_raise ArgumentError,
105 |                    "expected tensor to have shape {3}, got tensor with shape {2}",
106 |                    fn ->
107 |                      Similarity.binary_jaccard(x, y)
108 |                    end
109 |     end
110 |   end
111 | end
112 | 


--------------------------------------------------------------------------------
/test/scholar/model_selection_test.exs:
--------------------------------------------------------------------------------
1 | defmodule Scholar.ModelSelectionTest do
2 |   use Scholar.Case, async: true
3 |   alias Scholar.ModelSelection
4 |   doctest ModelSelection
5 | end
6 | 


--------------------------------------------------------------------------------
/test/scholar/neighbors/brute_knn_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Neighbors.BruteKNNTest do
  2 |   use ExUnit.Case, async: true
  3 |   alias Scholar.Neighbors.BruteKNN
  4 |   doctest BruteKNN
  5 | 
  6 |   defp data do
  7 |     Nx.tensor([
  8 |       [10, 15],
  9 |       [46, 63],
 10 |       [68, 21],
 11 |       [40, 33],
 12 |       [25, 54],
 13 |       [15, 43],
 14 |       [44, 58],
 15 |       [45, 40],
 16 |       [62, 69],
 17 |       [53, 67]
 18 |     ])
 19 |   end
 20 | 
 21 |   defp query do
 22 |     Nx.tensor([
 23 |       [12, 23],
 24 |       [55, 30],
 25 |       [41, 57],
 26 |       [64, 72],
 27 |       [26, 39]
 28 |     ])
 29 |   end
 30 | 
 31 |   defp result do
 32 |     neighbor_indices =
 33 |       Nx.tensor(
 34 |         [
 35 |           [0, 5, 3],
 36 |           [7, 3, 2],
 37 |           [6, 1, 9],
 38 |           [8, 9, 1],
 39 |           [5, 4, 3]
 40 |         ],
 41 |         type: :u64
 42 |       )
 43 | 
 44 |     neighbor_distances =
 45 |       Nx.tensor([
 46 |         [8.246211051940918, 20.2237491607666, 29.73213768005371],
 47 |         [14.142135620117188, 15.29705810546875, 15.81138801574707],
 48 |         [3.1622776985168457, 7.8102498054504395, 15.620499610900879],
 49 |         [3.605551242828369, 12.083045959472656, 20.124610900878906],
 50 |         [11.704699516296387, 15.033296585083008, 15.231546401977539]
 51 |       ])
 52 | 
 53 |     {neighbor_indices, neighbor_distances}
 54 |   end
 55 | 
 56 |   describe "fit" do
 57 |     test "default" do
 58 |       data = data()
 59 |       k = 3
 60 |       model = BruteKNN.fit(data, num_neighbors: k)
 61 |       assert model.num_neighbors == 3
 62 |       assert model.data == data
 63 |       assert model.batch_size == nil
 64 |     end
 65 | 
 66 |     test "custom metric and batch_size" do
 67 |       data = data()
 68 |       k = 3
 69 |       metric = &Scholar.Metrics.Distance.minkowski/2
 70 |       batch_size = 2
 71 |       model = BruteKNN.fit(data, num_neighbors: k, metric: metric, batch_size: batch_size)
 72 |       assert model.num_neighbors == k
 73 |       assert model.metric == metric
 74 |       assert model.data == data
 75 |       assert model.batch_size == batch_size
 76 |     end
 77 |   end
 78 | 
 79 |   describe "predict" do
 80 |     test "batch_size = 1" do
 81 |       query = query()
 82 |       k = 3
 83 |       model = BruteKNN.fit(data(), num_neighbors: k, batch_size: 1)
 84 |       {neighbors_true, distances_true} = result()
 85 |       {neighbors_pred, distances_pred} = BruteKNN.predict(model, query)
 86 |       assert neighbors_pred == neighbors_true
 87 |       assert distances_pred == distances_true
 88 |     end
 89 | 
 90 |     test "batch_size = 2" do
 91 |       query = query()
 92 |       k = 3
 93 |       model = BruteKNN.fit(data(), num_neighbors: k, batch_size: 2)
 94 |       {neighbors_true, distances_true} = result()
 95 |       {neighbors_pred, distances_pred} = BruteKNN.predict(model, query)
 96 |       assert neighbors_pred == neighbors_true
 97 |       assert distances_pred == distances_true
 98 |     end
 99 | 
100 |     test "batch_size = 5" do
101 |       query = query()
102 |       k = 3
103 |       model = BruteKNN.fit(data(), num_neighbors: k, batch_size: 5)
104 |       {neighbors_true, distances_true} = result()
105 |       {neighbors_pred, distances_pred} = BruteKNN.predict(model, query)
106 |       assert neighbors_pred == neighbors_true
107 |       assert distances_pred == distances_true
108 |     end
109 | 
110 |     test "batch_size = 10" do
111 |       query = query()
112 |       k = 3
113 |       model = BruteKNN.fit(data(), num_neighbors: k, batch_size: 10)
114 |       {neighbors_true, distances_true} = result()
115 |       {neighbors_pred, distances_pred} = BruteKNN.predict(model, query)
116 | 
117 |       assert neighbors_pred ==
118 |                neighbors_true
119 | 
120 |       assert distances_pred == distances_true
121 |     end
122 | 
123 |     test "custom metric" do
124 |       model = BruteKNN.fit(data(), num_neighbors: 3, batch_size: 1, metric: :cosine)
125 |       assert {_, _} = BruteKNN.predict(model, query())
126 |     end
127 |   end
128 | end
129 | 


--------------------------------------------------------------------------------
/test/scholar/neighbors/kd_tree_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Neighbors.KDTreeTest do
  2 |   use Scholar.Case, async: true
  3 |   alias Scholar.Neighbors.KDTree
  4 |   doctest KDTree
  5 | 
  6 |   defp example do
  7 |     Nx.tensor([
  8 |       [10, 15],
  9 |       [46, 63],
 10 |       [68, 21],
 11 |       [40, 33],
 12 |       [25, 54],
 13 |       [15, 43],
 14 |       [44, 58],
 15 |       [45, 40],
 16 |       [62, 69],
 17 |       [53, 67]
 18 |     ])
 19 |   end
 20 | 
 21 |   describe "fit" do
 22 |     test "iota" do
 23 |       tree = KDTree.fit(Nx.iota({5, 2}))
 24 |       assert tree.levels == 3
 25 |       assert tree.indices == Nx.u32([3, 1, 4, 0, 2])
 26 |       assert tree.num_neighbors == 3
 27 |     end
 28 | 
 29 |     test "float" do
 30 |       tree = KDTree.fit(Nx.as_type(example(), :f32))
 31 |       assert tree.levels == 4
 32 |       assert Nx.to_flat_list(tree.indices) == [1, 5, 9, 3, 6, 2, 8, 0, 7, 4]
 33 |       assert tree.num_neighbors == 3
 34 |     end
 35 | 
 36 |     test "sample" do
 37 |       tree = KDTree.fit(example())
 38 |       assert tree.levels == 4
 39 |       assert Nx.to_flat_list(tree.indices) == [1, 5, 9, 3, 6, 2, 8, 0, 7, 4]
 40 |       assert tree.num_neighbors == 3
 41 |     end
 42 |   end
 43 | 
 44 |   defp x do
 45 |     Nx.tensor([
 46 |       [3, 6, 7, 5],
 47 |       [9, 8, 5, 4],
 48 |       [4, 4, 4, 1],
 49 |       [9, 4, 5, 6],
 50 |       [6, 4, 5, 7],
 51 |       [4, 5, 3, 3],
 52 |       [4, 5, 7, 8],
 53 |       [9, 4, 4, 5],
 54 |       [8, 4, 3, 9],
 55 |       [2, 8, 4, 4]
 56 |     ])
 57 |   end
 58 | 
 59 |   defp x_pred do
 60 |     Nx.tensor([[4, 3, 8, 4], [1, 6, 1, 1], [3, 7, 9, 2], [5, 2, 1, 2]])
 61 |   end
 62 | 
 63 |   describe "predict knn" do
 64 |     test "all defaults" do
 65 |       kdtree = KDTree.fit(x())
 66 |       {indices, distances} = KDTree.predict(kdtree, x_pred())
 67 | 
 68 |       assert indices == Nx.tensor([[0, 6, 4], [5, 2, 9], [0, 9, 2], [5, 2, 7]], type: :s64)
 69 | 
 70 |       assert_all_close(
 71 |         distances,
 72 |         Nx.tensor([
 73 |           [3.464101552963257, 4.582575798034668, 4.795831680297852],
 74 |           [4.242640495300293, 4.690415859222412, 4.795831680297852],
 75 |           [3.7416574954986572, 5.5677642822265625, 6.0],
 76 |           [3.872983455657959, 3.872983455657959, 6.164413928985596]
 77 |         ])
 78 |       )
 79 |     end
 80 | 
 81 |     test "metric set to {:minkowski, 1.5}" do
 82 |       kdtree = KDTree.fit(x(), metric: {:minkowski, 1.5})
 83 |       {indices, distances} = KDTree.predict(kdtree, x_pred())
 84 | 
 85 |       assert indices == Nx.tensor([[0, 6, 2], [5, 2, 9], [0, 9, 2], [5, 2, 7]], type: :s64)
 86 | 
 87 |       assert_all_close(
 88 |         distances,
 89 |         Nx.tensor([
 90 |           [4.065119743347168, 5.191402435302734, 5.862917423248291],
 91 |           [5.198591709136963, 5.591182708740234, 5.869683265686035],
 92 |           [4.334622859954834, 6.35192346572876, 6.9637274742126465],
 93 |           [4.649191856384277, 4.649191856384277, 7.664907932281494]
 94 |         ])
 95 |       )
 96 |     end
 97 | 
 98 |     test "k set to 4" do
 99 |       kdtree = KDTree.fit(x(), num_neighbors: 4)
100 |       {indices, distances} = KDTree.predict(kdtree, x_pred())
101 | 
102 |       assert indices ==
103 |                Nx.tensor([[0, 6, 4, 2], [5, 2, 9, 0], [0, 9, 2, 5], [5, 2, 7, 4]], type: :s64)
104 | 
105 |       assert_all_close(
106 |         distances,
107 |         Nx.tensor([
108 |           [3.464101552963257, 4.582575798034668, 4.795831680297852, 5.099019527435303],
109 |           [4.242640495300293, 4.690415859222412, 4.795831680297852, 7.4833149909973145],
110 |           [3.7416574954986572, 5.5677642822265625, 6.0, 6.480740547180176],
111 |           [3.872983455657959, 3.872983455657959, 6.164413928985596, 6.78233003616333]
112 |         ])
113 |       )
114 |     end
115 | 
116 |     test "float type data" do
117 |       kdtree = KDTree.fit(x() |> Nx.as_type(:f64), num_neighbors: 4)
118 |       {indices, distances} = KDTree.predict(kdtree, x_pred())
119 | 
120 |       assert indices ==
121 |                Nx.tensor([[0, 6, 4, 2], [5, 2, 9, 0], [0, 9, 2, 5], [5, 2, 7, 4]], type: :s64)
122 | 
123 |       assert_all_close(
124 |         distances,
125 |         Nx.tensor([
126 |           [3.464101552963257, 4.582575798034668, 4.795831680297852, 5.099019527435303],
127 |           [4.242640495300293, 4.690415859222412, 4.795831680297852, 7.4833149909973145],
128 |           [3.7416574954986572, 5.5677642822265625, 6.0, 6.480740547180176],
129 |           [3.872983455657959, 3.872983455657959, 6.164413928985596, 6.78233003616333]
130 |         ])
131 |       )
132 |     end
133 |   end
134 | end
135 | 


--------------------------------------------------------------------------------
/test/scholar/neighbors/knn_regressor_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Neighbors.KNNRegressorTest do
  2 |   use Scholar.Case, async: true
  3 |   alias Scholar.Neighbors.KNNRegressor
  4 |   doctest KNNRegressor
  5 | 
  6 |   defp x_train do
  7 |     Nx.tensor([
  8 |       [3, 6, 7, 5],
  9 |       [9, 8, 5, 4],
 10 |       [4, 4, 4, 1],
 11 |       [9, 4, 5, 6],
 12 |       [6, 4, 5, 7],
 13 |       [4, 5, 3, 3],
 14 |       [4, 5, 7, 8],
 15 |       [9, 4, 4, 5],
 16 |       [8, 4, 3, 9],
 17 |       [2, 8, 4, 4]
 18 |     ])
 19 |   end
 20 | 
 21 |   defp y_train do
 22 |     Nx.tensor([[0], [1], [1], [1], [1], [1], [1], [1], [0], [0]])
 23 |   end
 24 | 
 25 |   defp x do
 26 |     Nx.tensor([[4, 3, 8, 4], [1, 6, 1, 1], [3, 7, 9, 2], [5, 2, 1, 2]])
 27 |   end
 28 | 
 29 |   describe "fit" do
 30 |     test "fit with default parameters" do
 31 |       model = KNNRegressor.fit(x_train(), y_train(), num_neighbors: 3)
 32 | 
 33 |       assert model.algorithm == Scholar.Neighbors.BruteKNN.fit(x_train(), num_neighbors: 3)
 34 |       assert model.labels == y_train()
 35 |       assert model.weights == :uniform
 36 |     end
 37 | 
 38 |     test "fit with k-d tree" do
 39 |       model = KNNRegressor.fit(x_train(), y_train(), algorithm: :kd_tree, num_neighbors: 3)
 40 | 
 41 |       assert model.algorithm == Scholar.Neighbors.KDTree.fit(x_train(), num_neighbors: 3)
 42 |       assert model.labels == y_train()
 43 |       assert model.weights == :uniform
 44 |     end
 45 | 
 46 |     test "fit with random projection forest" do
 47 |       key = Nx.Random.key(12)
 48 | 
 49 |       model =
 50 |         KNNRegressor.fit(x_train(), y_train(),
 51 |           algorithm: :random_projection_forest,
 52 |           num_neighbors: 3,
 53 |           num_trees: 4,
 54 |           key: key
 55 |         )
 56 | 
 57 |       assert model.algorithm ==
 58 |                Scholar.Neighbors.RandomProjectionForest.fit(x_train(),
 59 |                  num_neighbors: 3,
 60 |                  num_trees: 4,
 61 |                  key: key
 62 |                )
 63 | 
 64 |       assert model.labels == y_train()
 65 |       assert model.weights == :uniform
 66 |     end
 67 |   end
 68 | 
 69 |   describe "predict" do
 70 |     test "predict with default parameters" do
 71 |       model = KNNRegressor.fit(x_train(), y_train(), num_neighbors: 3)
 72 |       y_pred = KNNRegressor.predict(model, x())
 73 |       assert_all_close(y_pred, Nx.tensor([[0.66666667], [0.66666667], [0.33333333], [1.0]]))
 74 |     end
 75 | 
 76 |     test "predict with weights set to :distance" do
 77 |       model = KNNRegressor.fit(x_train(), y_train(), num_neighbors: 3, weights: :distance)
 78 |       y_pred = KNNRegressor.predict(model, x())
 79 |       assert_all_close(y_pred, Nx.tensor([[0.59648849], [0.68282796], [0.2716506], [1.0]]))
 80 |     end
 81 | 
 82 |     test "predict with cosine metric and weights set to :distance" do
 83 |       model =
 84 |         KNNRegressor.fit(x_train(), y_train(),
 85 |           num_neighbors: 3,
 86 |           metric: :cosine,
 87 |           weights: :distance
 88 |         )
 89 | 
 90 |       y_pred = KNNRegressor.predict(model, x())
 91 |       assert_all_close(y_pred, Nx.tensor([[0.5736568], [0.427104], [0.33561941], [1.0]]))
 92 |     end
 93 | 
 94 |     test "predict with 2D labels" do
 95 |       y =
 96 |         Nx.tensor([
 97 |           [1, 4],
 98 |           [0, 3],
 99 |           [2, 5],
100 |           [0, 3],
101 |           [0, 3],
102 |           [1, 4],
103 |           [2, 5],
104 |           [0, 3],
105 |           [1, 4],
106 |           [2, 5]
107 |         ])
108 | 
109 |       model = KNNRegressor.fit(x_train(), y, num_neighbors: 3)
110 |       y_pred = KNNRegressor.predict(model, x())
111 | 
112 |       assert_all_close(
113 |         y_pred,
114 |         Nx.tensor([
115 |           [1.0, 4.0],
116 |           [1.6666666269302368, 4.666666507720947],
117 |           [1.6666666269302368, 4.666666507720947],
118 |           [1.0, 4.0]
119 |         ])
120 |       )
121 |     end
122 | 
123 |     test "predict with 2D labels, cosine metric and weights set to :distance" do
124 |       y =
125 |         Nx.tensor([
126 |           [1, 4],
127 |           [0, 3],
128 |           [2, 5],
129 |           [0, 3],
130 |           [0, 3],
131 |           [1, 4],
132 |           [2, 5],
133 |           [0, 3],
134 |           [1, 4],
135 |           [2, 5]
136 |         ])
137 | 
138 |       model =
139 |         KNNRegressor.fit(x_train(), y, num_neighbors: 3, metric: :cosine, weights: :distance)
140 | 
141 |       y_pred = KNNRegressor.predict(model, x())
142 | 
143 |       assert_all_close(
144 |         y_pred,
145 |         Nx.tensor([
146 |           [1.11344606, 4.11344606],
147 |           [1.3915288, 4.3915288],
148 |           [1.53710155, 4.53710155],
149 |           [0.0, 3.0]
150 |         ])
151 |       )
152 |     end
153 |   end
154 | end
155 | 


--------------------------------------------------------------------------------
/test/scholar/neighbors/large_vis_test.exs:
--------------------------------------------------------------------------------
1 | defmodule Scholar.Neighbors.LargeVisTest do
2 |   use ExUnit.Case, async: true
3 |   alias Scholar.Neighbors.LargeVis
4 |   doctest LargeVis
5 | end
6 | 


--------------------------------------------------------------------------------
/test/scholar/neighbors/nn_descent_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule Scholar.Neighbors.NNDescentTest do
 2 |   use ExUnit.Case, async: true
 3 |   alias Scholar.Neighbors.NNDescent
 4 |   doctest NNDescent
 5 | 
 6 |   test "every point is its own neighbor when num_neighbors is 1" do
 7 |     key = Nx.Random.key(12)
 8 |     {tensor, key} = Nx.Random.uniform(key, shape: {10, 5})
 9 |     size = Nx.axis_size(tensor, 0)
10 | 
11 |     %NNDescent{nearest_neighbors: nearest_neighbors, distances: distances} =
12 |       NNDescent.fit(tensor,
13 |         num_neighbors: 1,
14 |         key: key
15 |       )
16 | 
17 |     assert Nx.flatten(nearest_neighbors) == Nx.iota({size}, type: :s64)
18 |     assert Nx.flatten(distances) == Nx.broadcast(0.0, {size})
19 |   end
20 | end
21 | 


--------------------------------------------------------------------------------
/test/scholar/neighbors/random_projection_forest_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule Scholar.Neighbors.RandomProjectionForestTest do
 2 |   use ExUnit.Case, async: true
 3 |   alias Scholar.Neighbors.RandomProjectionForest
 4 |   doctest RandomProjectionForest
 5 | 
 6 |   defp example do
 7 |     Nx.tensor([
 8 |       [10, 15],
 9 |       [46, 63],
10 |       [68, 21],
11 |       [40, 33],
12 |       [25, 54],
13 |       [15, 43],
14 |       [44, 58],
15 |       [45, 40],
16 |       [62, 69],
17 |       [53, 67]
18 |     ])
19 |   end
20 | 
21 |   describe "fit" do
22 |     test "shape" do
23 |       tensor = example()
24 | 
25 |       forest =
26 |         RandomProjectionForest.fit(tensor, num_neighbors: 2, num_trees: 4, min_leaf_size: 3)
27 | 
28 |       assert forest.num_neighbors == 2
29 |       assert forest.depth == 1
30 |       assert forest.leaf_size == 5
31 |       assert forest.num_trees == 4
32 |       assert forest.indices.shape == {4, 10}
33 |       assert forest.data.shape == {10, 2}
34 |       assert forest.hyperplanes.shape == {4, 1, 2}
35 |       assert forest.medians.shape == {4, 1}
36 |     end
37 |   end
38 | 
39 |   describe "predict" do
40 |     test "shape" do
41 |       tensor = example()
42 | 
43 |       forest =
44 |         RandomProjectionForest.fit(tensor, num_neighbors: 2, num_trees: 4, min_leaf_size: 3)
45 | 
46 |       {neighbor_indices, neighbor_distances} =
47 |         RandomProjectionForest.predict(forest, Nx.tensor([[20, 30], [30, 50]]))
48 | 
49 |       assert Nx.shape(neighbor_indices) == {2, 2}
50 |       assert Nx.shape(neighbor_distances) == {2, 2}
51 |     end
52 | 
53 |     test "every point is its own neighbor when num_neighbors is 1" do
54 |       key = Nx.Random.key(12)
55 |       {tensor, key} = Nx.Random.uniform(key, shape: {1000, 10})
56 |       size = Nx.axis_size(tensor, 0)
57 | 
58 |       forest =
59 |         RandomProjectionForest.fit(tensor,
60 |           num_neighbors: 1,
61 |           num_trees: 1,
62 |           min_leaf_size: 1,
63 |           key: key
64 |         )
65 | 
66 |       {neighbors, distances} = RandomProjectionForest.predict(forest, tensor)
67 |       assert Nx.flatten(neighbors) == Nx.iota({size}, type: :u32)
68 |       assert Nx.flatten(distances) == Nx.broadcast(0.0, {size})
69 |     end
70 | 
71 |     test "every point is its own neighbor when num_neighbors is 1 and size is power of two" do
72 |       key = Nx.Random.key(12)
73 |       {tensor, key} = Nx.Random.uniform(key, shape: {1024, 10})
74 |       size = Nx.axis_size(tensor, 0)
75 | 
76 |       forest =
77 |         RandomProjectionForest.fit(tensor,
78 |           num_neighbors: 1,
79 |           num_trees: 1,
80 |           min_leaf_size: 1,
81 |           key: key
82 |         )
83 | 
84 |       {neighbors, distances} = RandomProjectionForest.predict(forest, tensor)
85 |       assert Nx.flatten(neighbors) == Nx.iota({size}, type: :u32)
86 |       assert Nx.flatten(distances) == Nx.broadcast(0.0, {size})
87 |     end
88 |   end
89 | end
90 | 


--------------------------------------------------------------------------------
/test/scholar/neighbors/rnn_regressor_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Neighbors.RadiusNNRegressorTest do
  2 |   use Scholar.Case, async: true
  3 |   alias Scholar.Neighbors.RadiusNNRegressor
  4 |   doctest RadiusNNRegressor
  5 | 
  6 |   defp x do
  7 |     Nx.tensor([
  8 |       [3, 6, 7, 5],
  9 |       [9, 8, 5, 4],
 10 |       [4, 4, 4, 1],
 11 |       [9, 4, 5, 6],
 12 |       [6, 4, 5, 7],
 13 |       [4, 5, 3, 3],
 14 |       [4, 5, 7, 8],
 15 |       [9, 4, 4, 5],
 16 |       [8, 4, 3, 9],
 17 |       [2, 8, 4, 4]
 18 |     ])
 19 |   end
 20 | 
 21 |   defp y do
 22 |     Nx.tensor([0, 1, 1, 1, 1, 1, 1, 1, 0, 0])
 23 |   end
 24 | 
 25 |   defp x_pred do
 26 |     Nx.tensor([[4, 3, 8, 4], [1, 6, 1, 1], [3, 7, 9, 2], [5, 2, 1, 2]])
 27 |   end
 28 | 
 29 |   describe "predict" do
 30 |     test "predict with weights set to :distance" do
 31 |       model =
 32 |         RadiusNNRegressor.fit(x(), y(),
 33 |           num_classes: 2,
 34 |           radius: 10,
 35 |           weights: :distance
 36 |         )
 37 | 
 38 |       predictions = RadiusNNRegressor.predict(model, x_pred())
 39 |       assert_all_close(predictions, Nx.tensor([0.69033845, 0.71773642, 0.68217609, 0.75918273]))
 40 |     end
 41 | 
 42 |     test "predict with weights set to :distance and with specific metric" do
 43 |       model =
 44 |         RadiusNNRegressor.fit(x(), y(),
 45 |           num_classes: 2,
 46 |           radius: 10,
 47 |           weights: :distance,
 48 |           metric: :cosine
 49 |         )
 50 | 
 51 |       predictions = RadiusNNRegressor.predict(model, x_pred())
 52 |       assert_all_close(predictions, Nx.tensor([0.683947, 0.54694187, 0.59806132, 0.86398641]))
 53 |     end
 54 | 
 55 |     test "predict with weights set to :distance and with specific metric and 2d labels" do
 56 |       y =
 57 |         Nx.tensor([
 58 |           [1, 4],
 59 |           [0, 3],
 60 |           [2, 5],
 61 |           [0, 3],
 62 |           [0, 3],
 63 |           [1, 4],
 64 |           [2, 5],
 65 |           [0, 3],
 66 |           [1, 4],
 67 |           [2, 5]
 68 |         ])
 69 | 
 70 |       model =
 71 |         RadiusNNRegressor.fit(x(), y,
 72 |           num_classes: 3,
 73 |           radius: 10,
 74 |           weights: :distance,
 75 |           metric: :cosine
 76 |         )
 77 | 
 78 |       predictions = RadiusNNRegressor.predict(model, x_pred())
 79 | 
 80 |       assert_all_close(
 81 |         predictions,
 82 |         Nx.tensor([
 83 |           [0.99475077, 3.99475077],
 84 |           [1.20828527, 4.20828527],
 85 |           [1.15227075, 4.15227075],
 86 |           [0.37743229, 3.37743229]
 87 |         ])
 88 |       )
 89 |     end
 90 |   end
 91 | 
 92 |   describe "errors" do
 93 |     test "wrong shape of x" do
 94 |       x = Nx.tensor([1, 2, 3, 4, 5])
 95 |       y = Nx.tensor([1, 2, 3, 4, 5])
 96 | 
 97 |       assert_raise ArgumentError,
 98 |                    "expected input tensor to have shape {n_samples, n_features} or {num_samples, num_samples},
 99 |              got tensor with shape: {5}",
100 |                    fn ->
101 |                      RadiusNNRegressor.fit(x, y, num_classes: 5)
102 |                    end
103 |     end
104 | 
105 |     test "wrong shape of y" do
106 |       x = Nx.tensor([[1], [2], [3], [4], [5], [6]])
107 |       y = Nx.tensor([[[1, 2, 3, 4, 5]]])
108 | 
109 |       assert_raise ArgumentError,
110 |                    "expected labels to have shape {num_samples} or {num_samples, num_outputs},
111 |             got tensor with shape: {1, 1, 5}",
112 |                    fn ->
113 |                      RadiusNNRegressor.fit(x, y, num_classes: 5)
114 |                    end
115 |     end
116 | 
117 |     test "incompatible shapes of x and y" do
118 |       x = Nx.tensor([[1], [2], [3], [4], [5], [6]])
119 |       y = Nx.tensor([1, 2, 3, 4, 5])
120 | 
121 |       assert_raise ArgumentError,
122 |                    "expected labels to have the same size of the first axis as data,
123 |       got: 6 != 5",
124 |                    fn ->
125 |                      RadiusNNRegressor.fit(x, y, num_classes: 5)
126 |                    end
127 |     end
128 |   end
129 | end
130 | 


--------------------------------------------------------------------------------
/test/scholar/preprocessing/binarizer_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule Scholar.Preprocessing.BinarizerTest do
 2 |   use Scholar.Case, async: true
 3 |   alias Scholar.Preprocessing.Binarizer
 4 |   doctest Binarizer
 5 | 
 6 |   describe "binarization" do
 7 |     test "binarize with positive threshold" do
 8 |       tensor = Nx.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [-2.0, -1.0, 0.0]])
 9 | 
10 |       jit_binarizer = Nx.Defn.jit(&Binarizer.fit_transform/2)
11 | 
12 |       result = jit_binarizer.(tensor, threshold: 2.0)
13 | 
14 |       assert Nx.to_flat_list(result) == [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0]
15 |     end
16 | 
17 |     test "binarize values with default threshold" do
18 |       tensor = Nx.tensor([[0.0, -1.0, 2.0], [3.0, 4.0, -5.0], [-2.0, 1.0, 0.0]])
19 | 
20 |       result = Binarizer.fit_transform(tensor)
21 | 
22 |       assert Nx.to_flat_list(result) == [0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]
23 |     end
24 | 
25 |     test "binarize with threshold less than 0" do
26 |       tensor = Nx.tensor([[0.0, 0.5, -0.5], [-0.1, -0.2, -0.3]])
27 |       jit_binarizer = Nx.Defn.jit(&Binarizer.fit_transform/2)
28 | 
29 |       result = jit_binarizer.(tensor, threshold: -0.2)
30 | 
31 |       assert Nx.to_flat_list(result) == [1.0, 1.0, 0.0, 1.0, 0.0, 0.0]
32 |     end
33 |   end
34 | end
35 | 


--------------------------------------------------------------------------------
/test/scholar/preprocessing/max_abs_scaler_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule Scholar.Preprocessing.MaxAbsScalerTest do
 2 |   use Scholar.Case, async: true
 3 |   alias Scholar.Preprocessing.MaxAbsScaler
 4 | 
 5 |   doctest MaxAbsScaler
 6 | 
 7 |   describe "fit_transform/2" do
 8 |     test "set axes to [0]" do
 9 |       data = Nx.tensor([[1, -1, 2], [3, 0, 0], [0, 1, -1], [2, 3, 1]])
10 | 
11 |       expected =
12 |         Nx.tensor([
13 |           [0.3333333432674408, -0.3333333432674408, 1.0],
14 |           [1.0, 0.0, 0.0],
15 |           [0.0, 0.3333333432674408, -0.5],
16 |           [0.6666666865348816, 1.0, 0.5]
17 |         ])
18 | 
19 |       assert_all_close(MaxAbsScaler.fit_transform(data, axes: [0]), expected)
20 |     end
21 | 
22 |     test "Work in case where tensor contains only zeros" do
23 |       data = Nx.broadcast(Nx.f32(0), {3, 3})
24 |       expected = data
25 |       assert MaxAbsScaler.fit_transform(data) == expected
26 |     end
27 |   end
28 | end
29 | 


--------------------------------------------------------------------------------
/test/scholar/preprocessing/min_max_scaler_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule Scholar.Preprocessing.MinMaxScalerTest do
 2 |   use Scholar.Case, async: true
 3 |   alias Scholar.Preprocessing.MinMaxScaler
 4 | 
 5 |   doctest MinMaxScaler
 6 | 
 7 |   describe "fit_transform/2" do
 8 |     test "set axes to [0]" do
 9 |       data = Nx.tensor([[1, -1, 2], [3, 0, 0], [0, 1, -1], [2, 3, 1]])
10 | 
11 |       expected =
12 |         Nx.tensor([
13 |           [0.3333333432674408, 0.0, 1.0],
14 |           [1.0, 0.25, 0.3333333432674408],
15 |           [0.0, 0.5, 0.0],
16 |           [0.6666666865348816, 1.0, 0.6666666865348816]
17 |         ])
18 | 
19 |       assert_all_close(MinMaxScaler.fit_transform(data, axes: [0]), expected)
20 |     end
21 | 
22 |     test "set axes to [0], min_bound to 1, and max_bound to 3" do
23 |       data = Nx.tensor([[1, -1, 2], [3, 0, 0], [0, 1, -1], [2, 3, 1]])
24 | 
25 |       expected =
26 |         Nx.tensor([
27 |           [1.6666667461395264, 1.0, 3.0],
28 |           [3.0, 1.5, 1.6666667461395264],
29 |           [1.0, 2.0, 1.0],
30 |           [2.3333334922790527, 3.0, 2.3333334922790527]
31 |         ])
32 | 
33 |       assert_all_close(
34 |         MinMaxScaler.fit_transform(data, axes: [0], min_bound: 1, max_bound: 3),
35 |         expected
36 |       )
37 |     end
38 | 
39 |     test "Work in case where tensor contains only zeros" do
40 |       data = Nx.broadcast(Nx.f32(0), {3, 3})
41 |       expected = data
42 |       assert MinMaxScaler.fit_transform(data) == expected
43 |     end
44 |   end
45 | end
46 | 


--------------------------------------------------------------------------------
/test/scholar/preprocessing/normalizer_test.exs:
--------------------------------------------------------------------------------
1 | defmodule Scholar.Preprocessing.NormalizerTest do
2 |   use Scholar.Case, async: true
3 |   alias Scholar.Preprocessing.Normalizer
4 | 
5 |   doctest Normalizer
6 | end
7 | 


--------------------------------------------------------------------------------
/test/scholar/preprocessing/one_hot_encoder_test.exs:
--------------------------------------------------------------------------------
1 | defmodule Scholar.Preprocessing.OneHotEncoderTest do
2 |   use Scholar.Case, async: true
3 |   alias Scholar.Preprocessing.OneHotEncoder
4 | 
5 |   doctest OneHotEncoder
6 | end
7 | 


--------------------------------------------------------------------------------
/test/scholar/preprocessing/ordinal_encoder_test.exs:
--------------------------------------------------------------------------------
1 | defmodule Scholar.Preprocessing.OrdinalEncoderTest do
2 |   use Scholar.Case, async: true
3 |   alias Scholar.Preprocessing.OrdinalEncoder
4 | 
5 |   doctest OrdinalEncoder
6 | end
7 | 


--------------------------------------------------------------------------------
/test/scholar/preprocessing/robust_scaler_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.Preprocessing.RobustScalerTest do
  2 |   use Scholar.Case, async: true
  3 |   alias Scholar.Preprocessing.RobustScaler
  4 |   doctest RobustScaler
  5 | 
  6 |   describe "fit_transform" do
  7 |     test "applies scaling to data" do
  8 |       data = Nx.tensor([[1, -1, 2], [2, 0, 0], [0, 1, -1]])
  9 | 
 10 |       expected =
 11 |         Nx.tensor([
 12 |           [0.0, -1.0, 1.3333333333333333],
 13 |           [1.0, 0.0, 0.0],
 14 |           [-1.0, 1.0, -0.6666666666666666]
 15 |         ])
 16 | 
 17 |       assert_all_close(RobustScaler.fit_transform(data), expected)
 18 |     end
 19 | 
 20 |     test "applies scaling to data with custom quantile range" do
 21 |       data = Nx.tensor([[1, -1, 2], [2, 0, 0], [0, 1, -1]])
 22 | 
 23 |       expected =
 24 |         Nx.tensor([
 25 |           [0.0, -0.7142857142857142, 1.0],
 26 |           [0.7142857142857142, 0.0, 0.0],
 27 |           [-0.7142857142857142, 0.7142857142857142, -0.5]
 28 |         ])
 29 | 
 30 |       assert_all_close(
 31 |         RobustScaler.fit_transform(data, quantile_range: {10, 80}),
 32 |         expected
 33 |       )
 34 |     end
 35 | 
 36 |     test "handles constant data (all values the same)" do
 37 |       data = Nx.tensor([[5, 5, 5], [5, 5, 5], [5, 5, 5]])
 38 |       expected = Nx.tensor([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]])
 39 | 
 40 |       assert_all_close(RobustScaler.fit_transform(data), expected)
 41 |     end
 42 | 
 43 |     test "handles already scaled data" do
 44 |       data = Nx.tensor([[0, -1, 1], [1, 0, 0], [-1, 1, -1]])
 45 |       expected = data
 46 | 
 47 |       assert_all_close(RobustScaler.fit_transform(data), expected)
 48 |     end
 49 | 
 50 |     test "handles single-row tensor" do
 51 |       data = Nx.tensor([[1, 2, 3]])
 52 |       expected = Nx.tensor([[0.0, 0.0, 0.0]])
 53 | 
 54 |       assert_all_close(RobustScaler.fit_transform(data), expected)
 55 |     end
 56 | 
 57 |     test "handles single-column tensor" do
 58 |       data = Nx.tensor([[1], [2], [3]])
 59 |       expected = Nx.tensor([[-1.0], [0.0], [1.0]])
 60 | 
 61 |       assert_all_close(RobustScaler.fit_transform(data), expected)
 62 |     end
 63 | 
 64 |     test "handles data with negative values only" do
 65 |       data = Nx.tensor([[-5, -10, -15], [-15, -5, -20], [-10, -15, -5]])
 66 | 
 67 |       expected =
 68 |         Nx.tensor([
 69 |           [1.0, 0.0, 0.0],
 70 |           [-1.0, 1.0, -0.6666666666666666],
 71 |           [0.0, -1.0, 1.3333333333333333]
 72 |         ])
 73 | 
 74 |       assert_all_close(RobustScaler.fit_transform(data), expected)
 75 |     end
 76 | 
 77 |     test "handles data with extreme outliers" do
 78 |       data = Nx.tensor([[1, 2, 3], [1000, 2000, 3000], [-1000, -2000, -3000]])
 79 | 
 80 |       expected =
 81 |         Nx.tensor([[0.0, 0.0, 0.0], [0.999, 0.999, 0.999], [-1.001, -1.001, -1.001]])
 82 | 
 83 |       assert_all_close(
 84 |         RobustScaler.fit_transform(data),
 85 |         expected
 86 |       )
 87 |     end
 88 |   end
 89 | 
 90 |   describe "errors" do
 91 |     test "wrong input rank for fit" do
 92 |       assert_raise ArgumentError,
 93 |                    "expected tensor to have shape {num_samples, num_features}, got tensor with shape: {1, 1, 1}",
 94 |                    fn ->
 95 |                      RobustScaler.fit(Nx.tensor([[[1]]]))
 96 |                    end
 97 |     end
 98 | 
 99 |     test "wrong input rank for transform" do
100 |       assert_raise ArgumentError,
101 |                    "expected tensor to have shape {num_samples, num_features}, got tensor with shape: {1, 1, 1}",
102 |                    fn ->
103 |                      RobustScaler.fit(Nx.tensor([[1]]))
104 |                      |> RobustScaler.transform(Nx.tensor([[[1]]]))
105 |                    end
106 |     end
107 | 
108 |     test "wrong quantile range" do
109 |       assert_raise NimbleOptions.ValidationError,
110 |                    "invalid value for :quantile_range option: expected :quantile_range to be a tuple {q_min, q_max} such that 0.0 < q_min < q_max < 100.0, got: {10, 800}",
111 |                    fn ->
112 |                      RobustScaler.fit(Nx.tensor([[[1]]]), quantile_range: {10, 800})
113 |                    end
114 |     end
115 |   end
116 | end
117 | 


--------------------------------------------------------------------------------
/test/scholar/preprocessing/standard_scaler_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule Scholar.Preprocessing.StandardScalerTest do
 2 |   use Scholar.Case, async: true
 3 |   alias Scholar.Preprocessing.StandardScaler
 4 | 
 5 |   doctest StandardScaler
 6 | 
 7 |   describe "fit_transform/2" do
 8 |     test "applies standard scaling to data" do
 9 |       data = Nx.tensor([[1, -1, 2], [2, 0, 0], [0, 1, -1]])
10 | 
11 |       expected =
12 |         Nx.tensor([
13 |           [0.5212860703468323, -1.3553436994552612, 1.4596009254455566],
14 |           [1.4596009254455566, -0.4170288145542145, -0.4170288145542145],
15 |           [-0.4170288145542145, 0.5212860703468323, -1.3553436994552612]
16 |         ])
17 | 
18 |       assert_all_close(StandardScaler.fit_transform(data), expected)
19 |     end
20 | 
21 |     test "leaves data as it is when variance is zero" do
22 |       data = 42.0
23 |       expected = Nx.tensor(data)
24 |       assert StandardScaler.fit_transform(data) == expected
25 |     end
26 |   end
27 | end
28 | 


--------------------------------------------------------------------------------
/test/scholar/preprocessing_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule Scholar.PreprocessingTest do
 2 |   use Scholar.Case, async: true
 3 |   alias Scholar.Preprocessing
 4 |   doctest Preprocessing
 5 | 
 6 |   describe "standard_scaler/1" do
 7 |     test "applies standard scaling to data" do
 8 |       data = Nx.tensor([[1, -1, 2], [2, 0, 0], [0, 1, -1]])
 9 | 
10 |       expected =
11 |         Nx.tensor([
12 |           [0.5212860703468323, -1.3553436994552612, 1.4596009254455566],
13 |           [1.4596009254455566, -0.4170288145542145, -0.4170288145542145],
14 |           [-0.4170288145542145, 0.5212860703468323, -1.3553436994552612]
15 |         ])
16 | 
17 |       assert_all_close(Preprocessing.standard_scale(data), expected)
18 |     end
19 | 
20 |     test "leaves data as it is when variance is zero" do
21 |       data = 42.0
22 |       expected = Nx.tensor(data)
23 |       assert Preprocessing.standard_scale(data) == expected
24 |     end
25 |   end
26 | end
27 | 


--------------------------------------------------------------------------------
/test/scholar/stats_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule Scholar.StatsTest do
  2 |   use Scholar.Case, async: true
  3 |   alias Scholar.Stats
  4 |   doctest Stats
  5 | 
  6 |   defp x do
  7 |     Nx.tensor([[3, 5, 3], [2, 6, 1], [9, 3, 2], [1, 6, 8]])
  8 |   end
  9 | 
 10 |   describe "moment" do
 11 |     test "moment set to 3" do
 12 |       expected_moment = Nx.tensor([29.53125, -1.5, 18.0])
 13 |       assert_all_close(Stats.moment(x(), 3), expected_moment)
 14 |     end
 15 | 
 16 |     test "moment set to 3 and axis set to 1" do
 17 |       expected_moment = Nx.tensor([0.5925924181938171, 6.0, 19.25926399230957, -12.0])
 18 |       assert_all_close(Stats.moment(x(), 3, axes: [1]), expected_moment)
 19 |     end
 20 | 
 21 |     test "moment set to 3 and axis set to 1 and keep_axes set to true" do
 22 |       expected_moment = Nx.tensor([[0.5925924181938171], [6.0], [19.25926399230957], [-12.0]])
 23 |       assert_all_close(Stats.moment(x(), 3, axes: [1], keep_axes: true), expected_moment)
 24 |     end
 25 | 
 26 |     test "moment set to 3 and axis set to nil" do
 27 |       expected_moment = Nx.tensor(9.438657407407414)
 28 |       assert_all_close(Stats.moment(x(), 3, axes: nil), expected_moment)
 29 |     end
 30 | 
 31 |     test "moment set to 3 and axis set to nil and keep_axes set to true" do
 32 |       expected_moment = Nx.tensor([[9.438657407407414]])
 33 |       assert_all_close(Stats.moment(x(), 3, axes: nil, keep_axes: true), expected_moment)
 34 |     end
 35 | 
 36 |     test "moment set to 3 and axis set to [0, 1] and keep_axes set to true" do
 37 |       expected_moment = Nx.tensor([[9.438657407407414]])
 38 |       assert_all_close(Stats.moment(x(), 3, axes: [0, 1], keep_axes: true), expected_moment)
 39 |     end
 40 |   end
 41 | 
 42 |   describe "skew" do
 43 |     test "all defaults" do
 44 |       expected_skew = Nx.tensor([0.97940938, -0.81649658, 0.9220734])
 45 |       assert_all_close(Stats.skew(x()), expected_skew)
 46 |     end
 47 | 
 48 |     test "axis set to 1" do
 49 |       expected_skew = Nx.tensor([0.70710678, 0.59517006, 0.65201212, -0.47033046])
 50 |       assert_all_close(Stats.skew(x(), axes: [1]), expected_skew)
 51 |     end
 52 | 
 53 |     test "axis set to 1 and keep_axes set to true" do
 54 |       expected_skew = Nx.tensor([[0.70710678], [0.59517006], [0.65201212], [-0.47033046]])
 55 |       assert_all_close(Stats.skew(x(), axes: [1], keep_axes: true), expected_skew)
 56 |     end
 57 | 
 58 |     test "axis set to nil" do
 59 |       expected_skew = Nx.tensor(0.5596660882003394)
 60 |       assert_all_close(Stats.skew(x(), axes: nil), expected_skew)
 61 |     end
 62 | 
 63 |     test "axis set to nil and keep_axes set to true" do
 64 |       expected_skew = Nx.tensor([[0.5596660882003394]])
 65 |       assert_all_close(Stats.skew(x(), axes: nil, keep_axes: true), expected_skew)
 66 |     end
 67 | 
 68 |     test "axis set to [0, 1] and keep_axes set to true" do
 69 |       expected_skew = Nx.tensor([[0.5596660882003394]])
 70 |       assert_all_close(Stats.skew(x(), axes: [0, 1], keep_axes: true), expected_skew)
 71 |     end
 72 | 
 73 |     test "axis set to [1] and bias set to false" do
 74 |       expected_skew = Nx.tensor([1.73205081, 1.45786297, 1.59709699, -1.15206964])
 75 |       assert_all_close(Stats.skew(x(), axes: [1], bias: false), expected_skew)
 76 |     end
 77 |   end
 78 | 
 79 |   describe "kurtosis" do
 80 |     test "all defaults" do
 81 |       expected_kurtosis = Nx.tensor([-0.79808533, -1.0, -0.83947681])
 82 |       assert_all_close(Stats.kurtosis(x()), expected_kurtosis)
 83 |     end
 84 | 
 85 |     test "axis set to 1" do
 86 |       expected_kurtosis = Nx.tensor([-1.5, -1.5, -1.5, -1.5])
 87 |       assert_all_close(Stats.kurtosis(x(), axes: [1]), expected_kurtosis)
 88 |     end
 89 | 
 90 |     test "axis set to 1 and keep_axes set to true" do
 91 |       expected_kurtosis = Nx.tensor([[-1.5], [-1.5], [-1.5], [-1.5]])
 92 |       assert_all_close(Stats.kurtosis(x(), axes: [1], keep_axes: true), expected_kurtosis)
 93 |     end
 94 | 
 95 |     test "axis set to nil" do
 96 |       expected_kurtosis = Nx.tensor(-0.9383737228328437)
 97 |       assert_all_close(Stats.kurtosis(x(), axes: nil), expected_kurtosis)
 98 |     end
 99 | 
100 |     test "axis set to nil and keep_axes set to true" do
101 |       expected_kurtosis = Nx.tensor([[-0.9383737228328437]])
102 |       assert_all_close(Stats.kurtosis(x(), axes: nil, keep_axes: true), expected_kurtosis)
103 |     end
104 | 
105 |     test "axis set to [0, 1] and keep_axes set to true" do
106 |       expected_kurtosis = Nx.tensor([[-0.9383737228328437]])
107 |       assert_all_close(Stats.kurtosis(x(), axes: [0, 1], keep_axes: true), expected_kurtosis)
108 |     end
109 | 
110 |     test "axis set to nil and bias set to false" do
111 |       expected_kurtosis = Nx.tensor(-0.757638248501074)
112 |       assert_all_close(Stats.kurtosis(x(), axes: nil, bias: false), expected_kurtosis)
113 |     end
114 | 
115 |     test "axis set to nil and bias set to false and variant set to pearson" do
116 |       expected_kurtosis = Nx.tensor(2.242361751498926)
117 | 
118 |       assert_all_close(
119 |         Stats.kurtosis(x(), axes: nil, bias: false, variant: :pearson),
120 |         expected_kurtosis
121 |       )
122 |     end
123 |   end
124 | end
125 | 


--------------------------------------------------------------------------------
/test/support/diabetes_data_raw.csv:
--------------------------------------------------------------------------------
 1 | 59 2 32.1 101.0 157 93.2 38.0 4.0 4.8598 87
 2 | 48 1 21.6 87.0 183 103.2 70.0 3.0 3.8918 69
 3 | 72 2 30.5 93.0 156 93.6 41.0 4.0 4.6728 85
 4 | 24 1 25.3 84.0 198 131.4 40.0 5.0 4.8903 89
 5 | 50 1 23.0 101.0 192 125.4 52.0 4.0 4.2905 80
 6 | 23 1 22.6 89.0 139 64.8 61.0 2.0 4.1897 68
 7 | 36 2 22.0 90.0 160 99.6 50.0 3.0 3.9512 82
 8 | 66 2 26.2 114.0 255 185.0 56.0 4.55 4.2485 92
 9 | 60 2 32.1 83.0 179 119.4 42.0 4.0 4.4773 94
10 | 29 1 30.0 85.0 180 93.4 43.0 4.0 5.3845 88
11 | 22 1 18.6 97.0 114 57.6 46.0 2.0 3.9512 83
12 | 56 2 28.0 85.0 184 144.8 32.0 6.0 3.5835 77
13 | 53 1 23.7 92.0 186 109.2 62.0 3.0 4.3041 81
14 | 50 2 26.2 97.0 186 105.4 49.0 4.0 5.0626 88
15 | 61 1 24.0 91.0 202 115.4 72.0 3.0 4.2905 73
16 | 34 2 24.7 118.0 254 184.2 39.0 7.0 5.037 81
17 | 47 1 30.3 109.0 207 100.2 70.0 3.0 5.2149 98
18 | 68 2 27.5 111.0 214 147.0 39.0 5.0 4.9416 91
19 | 38 1 25.4 84.0 162 103.0 42.0 4.0 4.4427 87
20 | 41 1 24.7 83.0 187 108.2 60.0 3.0 4.5433 78
21 | 35 1 21.1 82.0 156 87.8 50.0 3.0 4.5109 95
22 | 25 2 24.3 95.0 162 98.6 54.0 3.0 3.8501 87
23 | 25 1 26.0 92.0 187 120.4 56.0 3.0 3.9703 88
24 | 61 2 32.0 103.67 210 85.2 35.0 6.0 6.107 124
25 | 31 1 29.7 88.0 167 103.4 48.0 4.0 4.3567 78
26 | 30 2 25.2 83.0 178 118.4 34.0 5.0 4.852 83
27 | 19 1 19.2 87.0 124 54.0 57.0 2.0 4.1744 90
28 | 42 1 31.9 83.0 158 87.6 53.0 3.0 4.4659 101
29 | 63 1 24.4 73.0 160 91.4 48.0 3.0 4.6347 78
30 | 67 2 25.8 113.0 158 54.2 64.0 2.0 5.2933 104
31 | 32 1 30.5 89.0 182 110.6 56.0 3.0 4.3438 89
32 | 42 1 20.3 71.0 161 81.2 66.0 2.0 4.2341 81
33 | 58 2 38.0 103.0 150 107.2 22.0 7.0 4.6444 98
34 | 57 1 21.7 94.0 157 58.0 82.0 2.0 4.4427 92
35 | 53 1 20.5 78.0 147 84.2 52.0 3.0 3.989 75
36 | 62 2 23.5 80.33 225 112.8 86.0 2.62 4.8752 96
37 | 52 1 28.5 110.0 195 97.2 60.0 3.0 5.2417 85
38 | 46 1 27.4 78.0 171 88.0 58.0 3.0 4.8283 90
39 | 48 2 33.0 123.0 253 163.6 44.0 6.0 5.425 97
40 | 48 2 27.7 73.0 191 119.4 46.0 4.0 4.852 92
41 | 50 2 25.6 101.0 229 162.2 43.0 5.0 4.7791 114
42 | 21 1 20.1 63.0 135 69.0 54.0 3.0 4.0943 89
43 | 32 2 25.4 90.33 153 100.4 34.0 4.5 4.5326 83
44 | 54 1 24.2 74.0 204 109.0 82.0 2.0 4.1744 109
45 | 61 2 32.7 97.0 177 118.4 29.0 6.0 4.9972 87
46 | 56 2 23.1 104.0 181 116.4 47.0 4.0 4.4773 79
47 | 33 1 25.3 85.0 155 85.0 51.0 3.0 4.5539 70
48 | 27 1 19.6 78.0 128 68.0 43.0 3.0 4.4427 71
49 | 67 2 22.5 98.0 191 119.2 61.0 3.0 3.989 86
50 | 37 2 27.7 93.0 180 119.4 30.0 6.0 5.0304 88


--------------------------------------------------------------------------------
/test/support/diabetes_target.csv:
--------------------------------------------------------------------------------
 1 | 1.510000000000000000e+02
 2 | 7.500000000000000000e+01
 3 | 1.410000000000000000e+02
 4 | 2.060000000000000000e+02
 5 | 1.350000000000000000e+02
 6 | 9.700000000000000000e+01
 7 | 1.380000000000000000e+02
 8 | 6.300000000000000000e+01
 9 | 1.100000000000000000e+02
10 | 3.100000000000000000e+02
11 | 1.010000000000000000e+02
12 | 6.900000000000000000e+01
13 | 1.790000000000000000e+02
14 | 1.850000000000000000e+02
15 | 1.180000000000000000e+02
16 | 1.710000000000000000e+02
17 | 1.660000000000000000e+02
18 | 1.440000000000000000e+02
19 | 9.700000000000000000e+01
20 | 1.680000000000000000e+02
21 | 6.800000000000000000e+01
22 | 4.900000000000000000e+01
23 | 6.800000000000000000e+01
24 | 2.450000000000000000e+02
25 | 1.840000000000000000e+02
26 | 2.020000000000000000e+02
27 | 1.370000000000000000e+02
28 | 8.500000000000000000e+01
29 | 1.310000000000000000e+02
30 | 2.830000000000000000e+02
31 | 1.290000000000000000e+02
32 | 5.900000000000000000e+01
33 | 3.410000000000000000e+02
34 | 8.700000000000000000e+01
35 | 6.500000000000000000e+01
36 | 1.020000000000000000e+02
37 | 2.650000000000000000e+02
38 | 2.760000000000000000e+02
39 | 2.520000000000000000e+02
40 | 9.000000000000000000e+01
41 | 1.000000000000000000e+02
42 | 5.500000000000000000e+01
43 | 6.100000000000000000e+01
44 | 9.200000000000000000e+01
45 | 2.590000000000000000e+02
46 | 5.300000000000000000e+01
47 | 1.900000000000000000e+02
48 | 1.420000000000000000e+02
49 | 7.500000000000000000e+01
50 | 1.420000000000000000e+02


--------------------------------------------------------------------------------
/test/test_helper.exs:
--------------------------------------------------------------------------------
1 | Application.ensure_all_started(:exla)
2 | 


--------------------------------------------------------------------------------