├── test
    ├── test_helper.exs
    ├── learn_kit_test.exs
    └── learn_kit
    │   ├── math_test.exs
    │   ├── regression
    │       ├── linear_test.exs
    │       └── polynomial_test.exs
    │   ├── preprocessing_test.exs
    │   ├── naive_bayes
    │       └── gaussian_test.exs
    │   └── knn_test.exs
├── .DS_Store
├── lib
    ├── learn_kit.ex
    └── learn_kit
    │   ├── naive_bayes
    │       ├── gaussian
    │       │   ├── fit.ex
    │       │   ├── score.ex
    │       │   ├── normalize.ex
    │       │   └── classify.ex
    │       └── gaussian.ex
    │   ├── preprocessing
    │       └── normalize.ex
    │   ├── regression
    │       ├── score.ex
    │       ├── polynomial
    │       │   └── calculations.ex
    │       ├── linear
    │       │   └── calculations.ex
    │       ├── linear.ex
    │       └── polynomial.ex
    │   ├── knn.ex
    │   ├── preprocessing.ex
    │   ├── knn
    │       └── classify.ex
    │   └── math.ex
├── .formatter.exs
├── .gitignore
├── mix.exs
├── mix.lock
├── CHANGELOG.md
├── config
    └── config.exs
└── README.md


/test/test_helper.exs:
--------------------------------------------------------------------------------
1 | ExUnit.start()
2 | 


--------------------------------------------------------------------------------
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kortirso/elixir_learn_kit/HEAD/.DS_Store


--------------------------------------------------------------------------------
/test/learn_kit_test.exs:
--------------------------------------------------------------------------------
1 | defmodule LearnKitTest do
2 |   use ExUnit.Case
3 | end
4 | 


--------------------------------------------------------------------------------
/lib/learn_kit.ex:
--------------------------------------------------------------------------------
1 | defmodule LearnKit do
2 |   @moduledoc """
3 |   Documentation for LearnKit.
4 |   """
5 | end
6 | 


--------------------------------------------------------------------------------
/.formatter.exs:
--------------------------------------------------------------------------------
1 | # Used by "mix format"
2 | [
3 |   inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"]
4 | ]
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # The directory Mix will write compiled artifacts to.
 2 | /_build/
 3 | 
 4 | # If you run "mix test --cover", coverage assets end up here.
 5 | /cover/
 6 | 
 7 | # The directory Mix downloads your dependencies sources to.
 8 | /deps/
 9 | 
10 | # Where 3rd-party dependencies like ExDoc output generated docs.
11 | /doc/
12 | 
13 | # Ignore .fetch files in case you like to edit your project deps locally.
14 | /.fetch
15 | 
16 | # If the VM crashes, it generates a dump, let's ignore it too.
17 | erl_crash.dump
18 | 
19 | # Also ignore archive artifacts (built via "mix archive.build").
20 | *.ez
21 | 
22 | # Ignore package tarball (built via "mix hex.build").
23 | learn_kit-*.tar
24 | 
25 | 


--------------------------------------------------------------------------------
/mix.exs:
--------------------------------------------------------------------------------
 1 | defmodule LearnKit.MixProject do
 2 |   use Mix.Project
 3 | 
 4 |   @description """
 5 |     Elixir package for machine learning
 6 |   """
 7 | 
 8 |   def project do
 9 |     [
10 |       app: :learn_kit,
11 |       version: "0.1.6",
12 |       elixir: "~> 1.7",
13 |       name: "LearnKit",
14 |       description: @description,
15 |       source_url: "https://github.com/kortirso/elixir_learn_kit",
16 |       start_permanent: Mix.env() == :prod,
17 |       deps: deps(),
18 |       package: package()
19 |     ]
20 |   end
21 | 
22 |   def application do
23 |     [
24 |       extra_applications: [:logger]
25 |     ]
26 |   end
27 | 
28 |   defp deps do
29 |     [
30 |       {:ex_doc, "~> 0.19", only: :dev},
31 |       {:matrix, "~> 0.3.2"}
32 |     ]
33 |   end
34 | 
35 |   defp package do
36 |     [
37 |       maintainers: ["Anton Bogdanov"],
38 |       licenses: ["MIT"],
39 |       links: %{"GitHub" => "https://github.com/kortirso/elixir_learn_kit"}
40 |     ]
41 |   end
42 | end
43 | 


--------------------------------------------------------------------------------
/lib/learn_kit/naive_bayes/gaussian/fit.ex:
--------------------------------------------------------------------------------
 1 | defmodule LearnKit.NaiveBayes.Gaussian.Fit do
 2 |   @moduledoc """
 3 |   Module for fit functions
 4 |   """
 5 | 
 6 |   alias LearnKit.Math
 7 | 
 8 |   defmacro __using__(_opts) do
 9 |     quote do
10 |       defp fit_data(data_set) do
11 |         Enum.map(data_set, fn {key, value} ->
12 |           {key, calc_features(value)}
13 |         end)
14 |       end
15 | 
16 |       defp calc_features(features) do
17 |         features
18 |         |> Math.transpose()
19 |         |> calc_combination()
20 |       end
21 | 
22 |       defp calc_combination(combinations) do
23 |         Enum.map(combinations, fn combination ->
24 |           mean = Math.mean(combination)
25 |           variance = Math.variance(combination, mean)
26 |           standard_deviation = Math.standard_deviation_from_variance(variance)
27 |           %{mean: mean, variance: variance, standard_deviation: standard_deviation}
28 |         end)
29 |       end
30 |     end
31 |   end
32 | end
33 | 


--------------------------------------------------------------------------------
/lib/learn_kit/naive_bayes/gaussian/score.ex:
--------------------------------------------------------------------------------
 1 | defmodule LearnKit.NaiveBayes.Gaussian.Score do
 2 |   @moduledoc """
 3 |   Module for calculating accuracy of prediction
 4 |   """
 5 | 
 6 |   alias LearnKit.NaiveBayes.Gaussian
 7 |   alias LearnKit.Math
 8 | 
 9 |   defmacro __using__(_opts) do
10 |     quote do
11 |       defp calc_score(fit_data, data_set) do
12 |         data_set
13 |         |> Enum.map(fn {label, features} ->
14 |           check_features(features, fit_data, label)
15 |         end)
16 |         |> List.flatten()
17 |         |> Math.mean()
18 |         |> Float.ceil(6)
19 |       end
20 | 
21 |       defp check_features(features, fit_data, label) do
22 |         Enum.map(features, fn feature ->
23 |           check_feature(feature, fit_data, label)
24 |         end)
25 |       end
26 | 
27 |       defp check_feature(feature, fit_data, label) do
28 |         {:ok, {predicted_label, _}} = Gaussian.predict(%Gaussian{fit_data: fit_data}, feature)
29 |         if predicted_label == label, do: 1, else: 0
30 |       end
31 |     end
32 |   end
33 | end
34 | 


--------------------------------------------------------------------------------
/mix.lock:
--------------------------------------------------------------------------------
 1 | %{
 2 |   "earmark": {:hex, :earmark, "1.2.6", "b6da42b3831458d3ecc57314dff3051b080b9b2be88c2e5aa41cd642a5b044ed", [:mix], []},
 3 |   "ex_doc": {:hex, :ex_doc, "0.19.1", "519bb9c19526ca51d326c060cb1778d4a9056b190086a8c6c115828eaccea6cf", [:mix], [{:earmark, "~> 1.1", [hex: :earmark, optional: false]}, {:makeup_elixir, "~> 0.7", [hex: :makeup_elixir, optional: false]}]},
 4 |   "exprintf": {:hex, :exprintf, "0.2.1", "b7e895dfb00520cfb7fc1671303b63b37dc3897c59be7cbf1ae62f766a8a0314", [:mix], []},
 5 |   "makeup": {:hex, :makeup, "0.5.5", "9e08dfc45280c5684d771ad58159f718a7b5788596099bdfb0284597d368a882", [:mix], [{:nimble_parsec, "~> 0.4", [hex: :nimble_parsec, optional: false]}]},
 6 |   "makeup_elixir": {:hex, :makeup_elixir, "0.10.0", "0f09c2ddf352887a956d84f8f7e702111122ca32fbbc84c2f0569b8b65cbf7fa", [:mix], [{:makeup, "~> 0.5.5", [hex: :makeup, optional: false]}]},
 7 |   "matrix": {:hex, :matrix, "0.3.2", "9c826bc3a1117bf5e1c5cdcf3a3d95456c93bc2e127a04e363e9fc90b724f784", [:mix], [{:exprintf, "~> 0.1", [hex: :exprintf, optional: false]}]},
 8 |   "nimble_parsec": {:hex, :nimble_parsec, "0.4.0", "ee261bb53214943679422be70f1658fff573c5d0b0a1ecd0f18738944f818efe", [:mix], []},
 9 | }
10 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | All notable changes to this project will be documented in this file.
 3 | 
 4 | The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
 5 | and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
 6 | 
 7 | ## [0.1.6] - 2019-01-08
 8 | ### Added
 9 | - Polynomial Regression predictor
10 | 
11 | ### Modified
12 | - code refactoring
13 | 
14 | ## [0.1.5] - 2018-12-18
15 | ### Added
16 | - preprocessing normalization for data set
17 | - normalize train data for Gaussian NB
18 | 
19 | ### Modified
20 | - normalization can be selected for KNN
21 | 
22 | ## [0.1.4] - 2018-12-17
23 | ### Modified
24 | - errors conditions and tests for KNN
25 | - distance calculation between points for KNN
26 | 
27 | ## [0.1.3] - 2018-11-22
28 | ### Modified
29 | - Linear Regression, fit with gradient descent
30 | 
31 | ## [0.1.2] - 2018-11-22
32 | ### Added
33 | - CHANGELOG.md file
34 | - Add simple Linear Regression predictor
35 | 
36 | ### Modified
37 | - Readme
38 | - Tests
39 | 
40 | ## [0.1.1] - 2018-11-19
41 | ### Added
42 | - Gaussian Naive Bayes algorithm
43 | - Math module with useful operations
44 | 
45 | ## [0.1.0] - 2018-11-15
46 | ### Added
47 | - K-Nearest Neighbours algorithm
48 | 


--------------------------------------------------------------------------------
/config/config.exs:
--------------------------------------------------------------------------------
 1 | # This file is responsible for configuring your application
 2 | # and its dependencies with the aid of the Mix.Config module.
 3 | use Mix.Config
 4 | 
 5 | # This configuration is loaded before any dependency and is restricted
 6 | # to this project. If another project depends on this project, this
 7 | # file won't be loaded nor affect the parent project. For this reason,
 8 | # if you want to provide default values for your application for
 9 | # 3rd-party users, it should be done in your "mix.exs" file.
10 | 
11 | # You can configure your application as:
12 | #
13 | #     config :learn_kit, key: :value
14 | #
15 | # and access this configuration in your application as:
16 | #
17 | #     Application.get_env(:learn_kit, :key)
18 | #
19 | # You can also configure a 3rd-party app:
20 | #
21 | #     config :logger, level: :info
22 | #
23 | 
24 | # It is also possible to import configuration files, relative to this
25 | # directory. For example, you can emulate configuration per environment
26 | # by uncommenting the line below and defining dev.exs, test.exs and such.
27 | # Configuration from the imported file will override the ones defined
28 | # here (which is why it is important to import them last).
29 | #
30 | #     import_config "#{Mix.env()}.exs"
31 | 


--------------------------------------------------------------------------------
/lib/learn_kit/naive_bayes/gaussian/normalize.ex:
--------------------------------------------------------------------------------
 1 | defmodule LearnKit.NaiveBayes.Gaussian.Normalize do
 2 |   @moduledoc """
 3 |   Module for fit functions
 4 |   """
 5 | 
 6 |   alias LearnKit.Preprocessing
 7 | 
 8 |   defmacro __using__(_opts) do
 9 |     quote do
10 |       defp normalize_data(data_set, type) do
11 |         case type do
12 |           t when t in ["minimax", "z_normalization"] -> normalize(data_set, type)
13 |           _ -> data_set
14 |         end
15 |       end
16 | 
17 |       # normalize each feature
18 |       defp normalize(data_set, type) do
19 |         coefficients = find_coefficients_for_normalization(data_set, type)
20 |         Enum.map(data_set, fn {key, features} ->
21 |           {
22 |             key,
23 |             Enum.map(features, fn feature -> Preprocessing.normalize_feature(feature, coefficients, type) end)
24 |           }
25 |         end)
26 |       end
27 | 
28 |       # find coefficients for normalization
29 |       defp find_coefficients_for_normalization(data_set, type) do
30 |         Enum.reduce(data_set, [], fn {_, features}, acc ->
31 |           Enum.reduce(features, acc, fn feature, acc -> [feature | acc] end)
32 |         end)
33 |         |> Preprocessing.coefficients(type)
34 |       end
35 |     end
36 |   end
37 | end
38 | 


--------------------------------------------------------------------------------
/test/learn_kit/math_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule LearnKit.MathTest do
 2 |   use ExUnit.Case
 3 |   alias LearnKit.Math
 4 | 
 5 |   test "calculate sum" do
 6 |     assert 3 = Math.summ(1, 2)
 7 |   end
 8 | 
 9 |   test "calculate mean" do
10 |     assert 2.0 = Math.mean([1, 2, 3])
11 |   end
12 | 
13 |   test "calculate variance" do
14 |     assert 1.25 = Math.variance([1, 2, 3, 4])
15 |   end
16 | 
17 |   test "calculate variance, with calculated mean" do
18 |     assert 1.25 = Math.variance([1, 2, 3, 4], 2.5)
19 |   end
20 | 
21 |   test "calculate standard deviation" do
22 |     assert 0.5 = Math.standard_deviation([1, 2])
23 |   end
24 | 
25 |   test "calculate standard deviation from variance" do
26 |     assert 1.118033988749895 = Math.standard_deviation_from_variance(1.25)
27 |   end
28 | 
29 |   test "calculate division" do
30 |     assert 5.0 = Math.division(10, 2)
31 |   end
32 | 
33 |   test "calculate covariance" do
34 |     assert 5.5 = Math.covariance([1, 2, 3], [14, 17, 25])
35 |   end
36 | 
37 |   test "calculate correlation" do
38 |     assert 0.9672471299049061 = Math.correlation([1, 2, 3], [14, 17, 25])
39 |   end
40 | 
41 |   test "transposing a matrix" do
42 |     assert [[1, 3, 5], [2, 4, 6]] = Math.transpose([[1, 2], [3, 4], [5, 6]])
43 |   end
44 | 
45 |   test "scalar multiplication with matrix" do
46 |     assert [50, 60] = Math.scalar_multiply(10, [5, 6])
47 |   end
48 | 
49 |   test "vector subtraction" do
50 |     assert [5, 45, 20] = Math.vector_subtraction([40, 50, 60], [35, 5, 40])
51 |   end
52 | end
53 | 


--------------------------------------------------------------------------------
/lib/learn_kit/preprocessing/normalize.ex:
--------------------------------------------------------------------------------
 1 | defmodule LearnKit.Preprocessing.Normalize do
 2 |   @moduledoc """
 3 |   Module for data normalization
 4 |   """
 5 | 
 6 |   alias LearnKit.Math
 7 | 
 8 |   defmacro __using__(_opts) do
 9 |     quote do
10 |       defp normalization(features, type) do
11 |         list_of_params = coefficients(features, type)
12 |         features
13 |         |> Math.transpose()
14 |         |> Enum.with_index()
15 |         |> Enum.map(fn {feature, index} -> transform_feature(feature, Enum.at(list_of_params, index), type) end)
16 |         |> Math.transpose()
17 |       end
18 | 
19 |       defp return_params(list, "minimax") do
20 |         {
21 |           Enum.min(list),
22 |           Enum.max(list)
23 |         }
24 |       end
25 | 
26 |       defp return_params(list, "z_normalization") do
27 |         {
28 |           Math.mean(list),
29 |           Math.standard_deviation(list)
30 |         }
31 |       end
32 | 
33 |       defp transform_feature(feature, params_for_point, type) do
34 |         divider = define_divider(params_for_point, type)
35 |         case divider do
36 |           0 -> feature
37 |           _ -> Enum.map(feature, fn point -> (point - elem(params_for_point, 0)) / divider end)
38 |         end
39 |       end
40 | 
41 |       defp define_divider(params_for_point, "minimax") do
42 |         elem(params_for_point, 1) - elem(params_for_point, 0)
43 |       end
44 | 
45 |       defp define_divider(params_for_point, "z_normalization") do
46 |         elem(params_for_point, 1)
47 |       end
48 |     end
49 |   end
50 | end
51 | 


--------------------------------------------------------------------------------
/test/learn_kit/regression/linear_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule LearnKit.Regression.LinearTest do
 2 |   use ExUnit.Case
 3 |   alias LearnKit.Regression.Linear
 4 | 
 5 |   setup_all do
 6 |     {:ok, predictor: Linear.new([1, 2, 3, 4], [3, 6, 10, 15])}
 7 |   end
 8 | 
 9 |   test "create new linear predictor with empty data set" do
10 |     assert %Linear{factors: factors, results: results, coefficients: coefficients} = Linear.new
11 | 
12 |     assert factors == []
13 |     assert results == []
14 |     assert coefficients == []
15 |   end
16 | 
17 |   test "create new linear predictor with data", state do
18 |     assert %Linear{factors: factors, results: results, coefficients: coefficients} = state[:predictor]
19 | 
20 |     assert factors == [1, 2, 3, 4]
21 |     assert results == [3, 6, 10, 15]
22 |     assert coefficients == []
23 |   end
24 | 
25 |   test "fit data set", state do
26 |     %Linear{coefficients: coefficients} = state[:predictor] |> Linear.fit
27 | 
28 |     assert coefficients == [-1.5, 4.0]
29 |   end
30 | 
31 |   test "fit data set with gradient descent", state do
32 |     %Linear{coefficients: coefficients} = state[:predictor] |> Linear.fit([method: "gradient descent"])
33 | 
34 |     assert [-1.5, 4.0] = coefficients |> Enum.map(fn x -> Float.round(x, 2) end)
35 |   end
36 | 
37 |   test "return prediction using the linear model", state do
38 |     predictor = state[:predictor] |> Linear.fit
39 | 
40 |     assert {:ok, result} = predictor |> Linear.predict([4, 8, 13])
41 |     assert result == [14.5, 30.5, 50.5]
42 |   end
43 | 
44 |   test "returns coefficient of determination R^2 of the prediction", state do
45 |     predictor = state[:predictor] |> Linear.fit
46 | 
47 |     assert {:ok, result} = predictor |> Linear.score
48 |     assert result == 0.9876543209876543
49 |   end
50 | end
51 | 


--------------------------------------------------------------------------------
/test/learn_kit/preprocessing_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule LearnKit.PreprocessingTest do
 2 |   use ExUnit.Case
 3 |   alias LearnKit.Preprocessing
 4 | 
 5 |   describe "for invalid data" do
 6 |     test "use preprocessor with invalid data" do
 7 |       assert_raise FunctionClauseError, fn ->
 8 |         Preprocessing.normalize("")
 9 |       end
10 |     end
11 | 
12 |     test "use preprocessor with invalid options" do
13 |       assert_raise FunctionClauseError, fn ->
14 |         Preprocessing.normalize([[1, 2], [3, 4], [5, 6]], "")
15 |       end
16 |     end
17 |   end
18 | 
19 |   describe "for valid data" do
20 |     test "prepare coefficients for normalization, minimax" do
21 |       result = Preprocessing.coefficients([[1, 2], [3, 4], [5, 6]], "minimax")
22 | 
23 |       assert result == [{1, 5}, {2, 6}]
24 |     end
25 | 
26 |     test "prepare coefficients for normalization, z_normalization" do
27 |       result = Preprocessing.coefficients([[1, 2], [3, 4], [5, 6]], "z_normalization")
28 | 
29 |       assert result == [{3.0, 1.632993161855452}, {4.0, 1.632993161855452}]
30 |     end
31 | 
32 |     test "normalize 1 feature with predefined coefficients" do
33 |       result = Preprocessing.normalize_feature([1, 2], [{1, 5}, {2, 6}], "minimax")
34 | 
35 |       assert result == [0.0, 0.0]
36 |     end
37 | 
38 |     test "normalize data set with minimax normalization" do
39 |       result = Preprocessing.normalize([[1, 2], [3, 4], [5, 6]])
40 | 
41 |       assert result == [[0.0, 0.0], [0.5, 0.5], [1.0, 1.0]]
42 |     end
43 | 
44 |     test "normalize data set with z normalization" do
45 |       result = Preprocessing.normalize([[1, 2], [3, 4], [5, 6]], [type: "z_normalization"])
46 | 
47 |       assert result == [[-1.224744871391589, -1.224744871391589], [0.0, 0.0], [1.224744871391589, 1.224744871391589]]
48 |     end
49 |   end
50 | end
51 | 


--------------------------------------------------------------------------------
/lib/learn_kit/naive_bayes/gaussian/classify.ex:
--------------------------------------------------------------------------------
 1 | defmodule LearnKit.NaiveBayes.Gaussian.Classify do
 2 |   @moduledoc """
 3 |   Module for prediction functions
 4 |   """
 5 | 
 6 |   defmacro __using__(_opts) do
 7 |     quote do
 8 |       # classify data
 9 |       # returns data like [label1: 0.03592747361085857, label2: 0.00399309643713954]
10 |       defp classify_data(fit_data, feature) do
11 |         labels_count = fit_data |> Keyword.keys() |> length()
12 |         fit_data
13 |         |> Enum.map(fn {label, fit_results} ->
14 |           {label, class_probability(labels_count, feature, fit_results)}
15 |         end)
16 |       end
17 | 
18 |       # compute the final naive Bayesian probability for a given set of features being a part of a given label
19 |       defp class_probability(labels_count, feature, fit_results) do
20 |         class_fraction = 1.0 / labels_count
21 |         feature_bayes = feature_mult(feature, fit_results, 1.0, 0)
22 |         feature_bayes * class_fraction
23 |         |> Float.round(10)
24 |       end
25 | 
26 |       # multiply together the feature probabilities for all of the features in a label for given values
27 |       defp feature_mult([], _, acc, _), do: acc
28 | 
29 |       defp feature_mult([head | tail], fit_results, acc, index) do
30 |         acc = acc * feature_probability(index, head, fit_results)
31 |         feature_mult(tail, fit_results, acc, index + 1)
32 |       end
33 | 
34 |       defp feature_probability(index, value, fit_results) do
35 |         # select result from training
36 |         fit_result = Enum.at(fit_results, index)
37 |         # deal with the edge case of a 0 standard deviation
38 |         if fit_result.standard_deviation == 0 do
39 |           if fit_result.mean == value, do: 1.0, else: 0.0
40 |         else
41 |         # calculate the gaussian probability
42 |           exp = - :math.pow(value - fit_result.mean, 2) / (2 * fit_result.variance)
43 |           :math.exp(exp) / :math.sqrt(2 * :math.pi * fit_result.variance)
44 |         end
45 |       end
46 |     end
47 |   end
48 | end
49 | 


--------------------------------------------------------------------------------
/lib/learn_kit/regression/score.ex:
--------------------------------------------------------------------------------
 1 | defmodule LearnKit.Regression.Score do
 2 |   @moduledoc """
 3 |   Module for scoring regression models
 4 |   """
 5 | 
 6 |   alias LearnKit.Math
 7 | 
 8 |   defmacro __using__(_opts) do
 9 |     quote do
10 |       @doc """
11 |       Returns the coefficient of determination R^2 of the prediction
12 | 
13 |       ## Parameters
14 | 
15 |         - predictor: %LearnKit.Regression.Linear{}
16 | 
17 |       ## Examples
18 | 
19 |           iex> predictor |> LearnKit.Regression.Linear.score
20 |           {:ok, 0.9876543209876543}
21 | 
22 |       """
23 |       @spec score(%LearnKit.Regression.Linear{factors: factors, results: results, coefficients: coefficients}) :: {:ok, number}
24 | 
25 |       def score(regression = %_{factors: _, results: _, coefficients: _}) do
26 |         {
27 |           :ok,
28 |           calculate_score(regression)
29 |         }
30 |       end
31 | 
32 |       defp calculate_score(%_{coefficients: []}, _, _), do: raise("There was no fit for model")
33 | 
34 |       defp calculate_score(regression = %_{coefficients: _, factors: _, results: results}) do
35 |         1.0 - sum_of_squared_errors(regression) / total_sum_of_squares(results)
36 |       end
37 | 
38 |       defp prediction_error(regression, x, y) do
39 |         {:ok, prediction} = predict(regression, x)
40 |         y - prediction
41 |       end
42 | 
43 |       defp sum_of_squared_errors(regression = %_{coefficients: _, factors: factors, results: results}) do
44 |         factors
45 |         |> Enum.zip(results)
46 |         |> Enum.reduce(0, fn {xi, yi}, acc ->
47 |           acc + squared_prediction_error(regression, xi, yi)
48 |         end)
49 |       end
50 | 
51 |       defp total_sum_of_squares(list) do
52 |         mean_list = Math.mean(list)
53 |         Enum.reduce(list, 0, fn x, acc -> acc + :math.pow(x - mean_list, 2) end)
54 |       end
55 | 
56 |       defp squared_prediction_error(regression = %_{coefficients: coefficients}, x, y) do
57 |         regression
58 |         |> prediction_error(x, y)
59 |         |> :math.pow(2)
60 |       end
61 |     end
62 |   end
63 | end
64 | 


--------------------------------------------------------------------------------
/test/learn_kit/regression/polynomial_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule LearnKit.Regression.PolynomialTest do
 2 |   use ExUnit.Case
 3 |   alias LearnKit.Regression.Polynomial
 4 | 
 5 |   setup_all do
 6 |     factors = [-3, -2, -1, -0.2, 1, 3]
 7 |     results = [0.9, 0.8, 0.4, 0.2, 0.1, 0]
 8 |     {:ok, predictor: Polynomial.new(factors, results), factors: factors, results: results}
 9 |   end
10 | 
11 |   test "create new polynomial predictor with empty data set" do
12 |     assert Polynomial.new() == %Polynomial{}
13 |   end
14 | 
15 |   test "create new polynomial predictor with data" do
16 |     assert Polynomial.new([1, 2], [3, 4]) == %Polynomial{coefficients: [], degree: 2, factors: [1, 2], results: [3, 4]}
17 |   end
18 | 
19 |   test "fit data set", state do
20 |     %Polynomial{coefficients: coefficients, degree: 2, factors: factors, results: results } = state.predictor |> Polynomial.fit(degree: 2)
21 | 
22 |     assert coefficients == [0.2290655593570844, -0.16280041315555793, 0.027763965678671648]
23 |     assert factors == state.factors
24 |     assert results == state.results
25 |   end
26 | 
27 |   test "fit data set with degree of 4", state do
28 |     %Polynomial{coefficients: coefficients, degree: 4, factors: factors, results: results } = state.predictor |> Polynomial.fit(degree: 4)
29 | 
30 |     assert coefficients == [0.14805723970909512, -0.15811217698985996, 0.12329778502873823, 8.627221168971827e-4, -0.009963024223179073]
31 |     assert factors == state.factors
32 |     assert results == state.results
33 |   end
34 | 
35 |   test "predict using the polynomial model of simple sample", state do
36 |     {:ok, result} = state.predictor |> Polynomial.fit(degree: 2) |> Polynomial.predict(3)
37 | 
38 |     assert result == -0.009459989001544572
39 |   end
40 | 
41 |   test "predict using the polynomial model of multiple samples", state do
42 |     {:ok, result} = state.predictor |> Polynomial.fit(degree: 2) |> Polynomial.predict([3, 5])
43 | 
44 |     assert result == [-0.009459989001544572, 0.10916263554608596]
45 |   end
46 | 
47 |   test "returns coefficient of determination R^2 of the prediction", state do
48 |     predictor = state.predictor |> Polynomial.fit()
49 | 
50 |     assert {:ok, result} = predictor |> Polynomial.score()
51 |     assert result == 0.9614116660464942
52 |   end
53 | end
54 | 


--------------------------------------------------------------------------------
/lib/learn_kit/regression/polynomial/calculations.ex:
--------------------------------------------------------------------------------
 1 | defmodule LearnKit.Regression.Polynomial.Calculations do
 2 |   @moduledoc """
 3 |   Module for fit functions
 4 |   """
 5 | 
 6 |   defmacro __using__(_opts) do
 7 |     quote do
 8 |       defp do_predict(polynomial, samples) do
 9 |         Enum.map(samples, fn sample ->
10 |           {:ok, prediction} = predict(polynomial, sample)
11 |           prediction
12 |         end)
13 |       end
14 | 
15 |       defp matrix_line(1, factors, degree) do
16 |         power_ofs = Enum.to_list(1..degree)
17 |         [Enum.count(factors) | sum_of_x_i_with_k(power_ofs, factors)]
18 |       end
19 | 
20 |       defp matrix_line(line, factors, degree) do
21 |         line_factor = line - 1
22 |         power_ofs = Enum.to_list(line_factor..(degree + line_factor))
23 |         sum_of_x_i_with_k(power_ofs, factors)
24 |       end
25 | 
26 |       defp matrix(factors, degree) do
27 |         lines = Enum.to_list(1..(degree + 1))
28 |         Enum.map(lines, fn line ->
29 |           matrix_line(line, factors, degree)
30 |         end)
31 |       end
32 | 
33 |       def sum_of_x_i_with_k(ks, factors) do
34 |         Enum.map(ks, fn factor ->
35 |           sum_x_with_k(factors, factor, 0.0)
36 |         end)
37 |       end
38 | 
39 |       defp substitute_coefficients([], _, _, sum), do: sum
40 | 
41 |       defp substitute_coefficients([coefficient | tail], x, k, sum) do
42 |         sum = sum + :math.pow(x, k) * coefficient
43 |         substitute_coefficients(tail, x, k - 1, sum)
44 |       end
45 | 
46 |       defp sum_x_with_k([x | tail], k, sum) do
47 |         sum = sum + :math.pow(x, k)
48 |         sum_x_with_k(tail, k, sum)
49 |       end
50 | 
51 |       defp sum_x_with_k([], _, sum), do: sum
52 | 
53 |       defp sum_x_y_with_k([], [], _degree, sum), do: [sum]
54 | 
55 |       defp sum_x_y_with_k([x | xtail], [y | ytail], degree, sum) do
56 |         exponent = degree - 1
57 |         sum = sum + :math.pow(x, exponent) * y
58 |         sum_x_y_with_k(xtail, ytail, degree, sum)
59 |       end
60 | 
61 |       def x_y_matrix(_, _, 0, matrix), do: matrix |> Enum.reverse()
62 | 
63 |       def x_y_matrix(xs, ys, degree, matrix) do
64 |         matrix = matrix ++ [sum_x_y_with_k(xs, ys, degree, 0.0)]
65 |         x_y_matrix(xs, ys, degree - 1, matrix)
66 |       end
67 |     end
68 |   end
69 | end
70 | 


--------------------------------------------------------------------------------
/test/learn_kit/naive_bayes/gaussian_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule LearnKit.NaiveBayes.GaussianTest do
 2 |   use ExUnit.Case
 3 | 
 4 |   alias LearnKit.NaiveBayes.Gaussian
 5 | 
 6 |   setup_all do
 7 |     {:ok, classifier: Gaussian.new([{:label1, [[-1, -1], [-2, -1], [-3, -2]]}, {:label2, [[1, 1], [2, 1], [3, 2], [-2, -2]]}])}
 8 |   end
 9 | 
10 |   test "create new knn classifier with empty data set" do
11 |     assert %Gaussian{data_set: data_set} = Gaussian.new
12 | 
13 |     assert data_set == []
14 |   end
15 | 
16 |   test "add train data to classifier" do
17 |     %Gaussian{data_set: data_set} =
18 |       Gaussian.new
19 |       |> Gaussian.add_train_data({:a1, [1, 2]})
20 |       |> Gaussian.add_train_data({:a1, [1, 3]})
21 |       |> Gaussian.add_train_data({:b1, [2, 3]})
22 | 
23 |     assert data_set == [b1: [[2, 3]], a1: [[1, 3], [1, 2]]]
24 |   end
25 | 
26 |   test "normalize data set", state do
27 |     %Gaussian{data_set: data_set} = state[:classifier] |> Gaussian.normalize_train_data("minimax")
28 | 
29 |     assert data_set ==
30 |       [
31 |         label1: [[0.3333333333333333, 0.25], [0.16666666666666666, 0.25], [0.0, 0.0]],
32 |         label2: [ [0.6666666666666666, 0.75], [0.8333333333333334, 0.75], [1.0, 1.0], [0.16666666666666666, 0.0]]
33 |       ]
34 |   end
35 | 
36 |   test "fit data set", state do
37 |     %Gaussian{fit_data: fit_data} = state[:classifier] |> Gaussian.fit
38 | 
39 |     assert fit_data ==
40 |       [
41 |         label1: [
42 |                   %{mean: -2.0, standard_deviation: 0.816496580927726, variance: 0.6666666666666666},
43 |                   %{mean: -1.3333333333333333, standard_deviation: 0.4714045207910317, variance: 0.2222222222222222}
44 |                 ],
45 |         label2: [
46 |                   %{mean: 1.0, standard_deviation: 1.8708286933869707, variance: 3.5},
47 |                   %{mean: 0.5, standard_deviation: 1.5, variance: 2.25}
48 |                 ]
49 |       ]
50 |   end
51 | 
52 |   test "return probability estimates for the feature", state do
53 |     classifier = state[:classifier] |> Gaussian.fit
54 | 
55 |     assert {:ok, result} = classifier |> Gaussian.predict_proba([1, 2])
56 |     assert result == [label1: 0.0, label2: 0.017199571]
57 |   end
58 | 
59 |   test "return exact prediction for the feature", state do
60 |     classifier = state[:classifier] |> Gaussian.fit
61 | 
62 |     assert {:ok, result} = classifier |> Gaussian.predict([1, 2])
63 |     assert result == {:label2, 0.017199571}
64 |   end
65 | 
66 |   test "returns the mean accuracy on the given test data and labels", state do
67 |     classifier = state[:classifier] |> Gaussian.fit
68 | 
69 |     assert {:ok, result} = classifier |> Gaussian.score
70 |     assert result == 0.857143
71 |   end
72 | end
73 | 


--------------------------------------------------------------------------------
/test/learn_kit/knn_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule LearnKit.KnnTest do
 2 |   use ExUnit.Case
 3 |   alias LearnKit.Knn
 4 | 
 5 |   setup_all do
 6 |     {:ok, classifier: Knn.new([{:a1, [[-1, -1], [-2, -1], [-3, -2]]}, {:b1, [[1, 1], [2, 1], [3, 2], [-2, -2]]}])}
 7 |   end
 8 | 
 9 |   describe "for invalid data" do
10 |     test "create new classifier with invalid data" do
11 |       assert_raise FunctionClauseError, fn ->
12 |         Knn.new("")
13 |       end
14 |     end
15 | 
16 |     test "add train data in invalid format", state do
17 |       assert_raise FunctionClauseError, fn ->
18 |         Knn.add_train_data(state[:classifier], {:something_valid, "invalid"})
19 |       end
20 |     end
21 | 
22 |     test "classify without options", state do
23 |       assert_raise FunctionClauseError, fn ->
24 |         Knn.classify(state[:classifier], "")
25 |       end
26 |     end
27 | 
28 |     test "classify with empty options", state do
29 |       assert {:error, "Feature option is required"} = Knn.classify(state[:classifier], [])
30 |     end
31 | 
32 |     test "classify with invalid feature", state do
33 |       assert {:error, "Feature option must be presented as array"} = Knn.classify(state[:classifier], [feature: "1"])
34 |     end
35 | 
36 |     test "classify with invalid k", state do
37 |       assert {:error, "K option must be positive integer"} = Knn.classify(state[:classifier], [feature: [-1, -2], k: -2])
38 |     end
39 |   end
40 | 
41 |   describe "for valid data" do
42 |     test "create new knn classifier with empty data set" do
43 |       assert %Knn{data_set: data_set} = Knn.new
44 | 
45 |       assert data_set == []
46 |     end
47 | 
48 |     test "add train data to classifier" do
49 |       %Knn{data_set: data_set} =
50 |         Knn.new
51 |         |> Knn.add_train_data({:a1, [1, 2]})
52 |         |> Knn.add_train_data({:a1, [1, 3]})
53 |         |> Knn.add_train_data({:b1, [2, 3]})
54 | 
55 |       assert data_set == [b1: [[2, 3]], a1: [[1, 3], [1, 2]]]
56 |     end
57 | 
58 |     test "classify new feature", state do
59 |       assert {:ok, :a1} = Knn.classify(state[:classifier], [feature: [-1, -2], k: 3, weight: "distance"])
60 |     end
61 | 
62 |     test "classify new feature, for existed point", state do
63 |       assert {:ok, :b1} = Knn.classify(state[:classifier], [feature: [-2, -2], k: 3, weight: "uniform"])
64 |     end
65 | 
66 |     test "classify new feature, minimax normalization", state do
67 |       assert {:ok, :a1} = Knn.classify(state[:classifier], [feature: [-1, -2], k: 3, weight: "distance", normalization: "minimax"])
68 |     end
69 | 
70 |     test "classify new feature, z normalization", state do
71 |       assert {:ok, :a1} = Knn.classify(state[:classifier], [feature: [-1, -2], k: 3, weight: "distance", normalization: "z_normalization"])
72 |     end
73 |   end
74 | end
75 | 


--------------------------------------------------------------------------------
/lib/learn_kit/regression/linear/calculations.ex:
--------------------------------------------------------------------------------
 1 | defmodule LearnKit.Regression.Linear.Calculations do
 2 |   @moduledoc """
 3 |   Module for fit functions
 4 |   """
 5 | 
 6 |   alias LearnKit.{Math, Regression.Linear}
 7 | 
 8 |   defmacro __using__(_opts) do
 9 |     quote do
10 |       defp do_fit("gradient descent", %Linear{factors: factors, results: results}) do
11 |         gradient_descent_iteration(
12 |           [:rand.uniform(), :rand.uniform()],
13 |           0.0001,
14 |           nil,
15 |           1_000_000,
16 |           Enum.zip(factors, results),
17 |           0
18 |         )
19 |       end
20 | 
21 |       defp do_fit(_, %Linear{factors: factors, results: results}) do
22 |         beta = calc_beta(factors, results)
23 |         alpha = Math.mean(results) - beta * Math.mean(factors)
24 |         [alpha, beta]
25 |       end
26 | 
27 |       defp do_predict(linear, samples) do
28 |         Enum.map(samples, fn sample ->
29 |           {:ok, prediction} = predict(linear, sample)
30 |           prediction
31 |         end)
32 |       end
33 | 
34 |       defp calc_beta(factors, results) do
35 |         Math.correlation(factors, results) * Math.standard_deviation(results) / Math.standard_deviation(factors)
36 |       end
37 | 
38 |       defp squared_error_gradient(linear, x, y) do
39 |         error_variable = prediction_error(linear, x, y)
40 |         [
41 |           -2 * error_variable,
42 |           -2 * error_variable * x
43 |         ]
44 |       end
45 | 
46 |       defp gradient_descent_iteration(_, _, min_theta, _, _, no_improve_step) when no_improve_step >= 100, do: min_theta
47 | 
48 |       defp gradient_descent_iteration(theta, alpha, min_theta, min_value, data, no_improve_step) do
49 |         [
50 |           min_theta,
51 |           min_value,
52 |           no_improve_step,
53 |           alpha
54 |         ] = check_value(data, min_value, theta, min_theta, no_improve_step, alpha)
55 | 
56 |         calc_new_theta(data, theta, alpha)
57 |         |> gradient_descent_iteration(alpha, min_theta, min_value, data, no_improve_step)
58 |       end
59 | 
60 |       defp calc_new_theta(data, theta, alpha) do
61 |         data
62 |         |> Enum.shuffle()
63 |         |> Enum.reduce(theta, fn {xi, yi}, acc ->
64 |           gradient_i = squared_error_gradient(%Linear{coefficients: theta}, xi, yi)
65 |           acc |> Math.vector_subtraction(alpha |> Math.scalar_multiply(gradient_i))
66 |         end)
67 |       end
68 | 
69 |       defp check_value(data, min_value, theta, min_theta, no_improve_step, alpha) do
70 |         value = calc_new_value(data, theta)
71 |         cond do
72 |           value < min_value -> [theta, value, 0, 0.0001]
73 |           true -> [min_theta, min_value, no_improve_step + 1, alpha * 0.9]
74 |         end
75 |       end
76 | 
77 |       defp calc_new_value(data, theta) do
78 |         Enum.reduce(data, 0, fn {xi, yi}, acc ->
79 |           acc + squared_prediction_error(%Linear{coefficients: theta}, xi, yi)
80 |         end)
81 |       end
82 |     end
83 |   end
84 | end
85 | 


--------------------------------------------------------------------------------
/lib/learn_kit/knn.ex:
--------------------------------------------------------------------------------
  1 | defmodule LearnKit.Knn do
  2 |   @moduledoc """
  3 |   Module for k-nearest neighbours (knn) algorithm
  4 |   """
  5 | 
  6 |   defstruct data_set: []
  7 | 
  8 |   alias LearnKit.Knn
  9 |   use Knn.Classify
 10 | 
 11 |   @type label :: atom
 12 |   @type feature :: [integer]
 13 |   @type point :: {label, feature}
 14 |   @type features :: [feature]
 15 |   @type data_set :: [{label, features}]
 16 | 
 17 |   @doc """
 18 |   Creates classifier with empty data_set
 19 | 
 20 |   ## Examples
 21 | 
 22 |       iex> classifier = LearnKit.Knn.new
 23 |       %LearnKit.Knn{data_set: []}
 24 | 
 25 |   """
 26 |   @spec new() :: %Knn{data_set: []}
 27 | 
 28 |   def new, do: Knn.new([])
 29 | 
 30 |   @doc """
 31 |   Creates classifier with data_set
 32 | 
 33 |   ## Parameters
 34 | 
 35 |     - data_set: Keyword list with labels and features in tuples
 36 | 
 37 |   ## Examples
 38 | 
 39 |       iex> classifier = LearnKit.Knn.new([{:a1, [[1, 2], [2, 3]]}, {:b1, [[-1, -2]]}])
 40 |       %LearnKit.Knn{data_set: [a1: [[1, 2], [2, 3]], b1: [[-1, -2]]]}
 41 | 
 42 |   """
 43 |   @spec new(data_set) :: %Knn{data_set: data_set}
 44 | 
 45 |   def new(data_set) when is_list(data_set), do: %Knn{data_set: data_set}
 46 | 
 47 |   @doc """
 48 |   Add train data to classifier
 49 | 
 50 |   ## Parameters
 51 | 
 52 |     - classifier: %LearnKit.Knn{}
 53 |     - train data: tuple with label and feature
 54 | 
 55 |   ## Examples
 56 | 
 57 |       iex> classifier = classifier |> LearnKit.Knn.add_train_data({:a1, [-1, -1]})
 58 |       %LearnKit.Knn{data_set: [a1: [[-1, -1]]]}
 59 | 
 60 |   """
 61 |   @spec add_train_data(%Knn{data_set: data_set}, point) :: %Knn{data_set: data_set}
 62 | 
 63 |   def add_train_data(%Knn{data_set: data_set}, {key, value}) when is_atom(key) and is_list(value) do
 64 |     features = if Keyword.has_key?(data_set, key), do: data_set[key], else: []
 65 |     data_set = Keyword.put(data_set, key, [value | features])
 66 |     %Knn{data_set: data_set}
 67 |   end
 68 | 
 69 |   @doc """
 70 |   Classify label of the new feature
 71 | 
 72 |   ## Parameters
 73 | 
 74 |     - classifier: %LearnKit.Knn{}
 75 |     - options: keyword list with options
 76 | 
 77 |   ## Options
 78 | 
 79 |     - feature: feature for classification, required, example: [1, 2, 3]
 80 |     - k: number of nearest neighbours, default is 3, optional
 81 |     - algorithm: brute, optional
 82 |     - weight: uniform/distance, default is uniform, optional
 83 |     - normalization: none/minimax/z_normalization, default is none, optional
 84 | 
 85 |   ## Examples
 86 | 
 87 |       iex> classifier |> LearnKit.Knn.classify([feature: [-1, -2], k: 3, weight: "distance"])
 88 |       {:ok, :a1}
 89 | 
 90 |   """
 91 |   @spec classify(%Knn{data_set: data_set}, [tuple]) :: {:ok, label}
 92 | 
 93 |   def classify(%Knn{data_set: data_set}, options) when is_list(options) do
 94 |     cond do
 95 |       !Keyword.has_key?(options, :feature) ->
 96 |         {:error, "Feature option is required"}
 97 | 
 98 |       !is_list(options[:feature]) ->
 99 |         {:error, "Feature option must be presented as array"}
100 | 
101 |       Keyword.has_key?(options, :k) && (!is_integer(options[:k]) || options[:k] <= 0) ->
102 |         {:error, "K option must be positive integer"}
103 | 
104 |       true ->
105 |         options = Keyword.merge([k: 3, algorithm: "brute", weight: "uniform", normalization: "none"], options)
106 |         {label, _} = prediction(data_set, options)
107 |         {:ok, label}
108 |     end
109 |   end
110 | end
111 | 


--------------------------------------------------------------------------------
/lib/learn_kit/preprocessing.ex:
--------------------------------------------------------------------------------
  1 | defmodule LearnKit.Preprocessing do
  2 |   @moduledoc """
  3 |   Module for data preprocessing
  4 |   """
  5 | 
  6 |   alias LearnKit.{Preprocessing, Math}
  7 |   use Preprocessing.Normalize
  8 | 
  9 |   @type row :: [number]
 10 |   @type matrix :: [row]
 11 | 
 12 |   @doc """
 13 |   Normalize data set with minimax normalization
 14 | 
 15 |   ## Parameters
 16 | 
 17 |     - features: list of features for normalization
 18 | 
 19 |   ## Examples
 20 | 
 21 |       iex> LearnKit.Preprocessing.normalize([[1, 2], [3, 4], [5, 6]])
 22 |       [
 23 |         [0.0, 0.0],
 24 |         [0.5, 0.5],
 25 |         [1.0, 1.0]
 26 |       ]
 27 | 
 28 |   """
 29 |   @spec normalize(matrix) :: matrix
 30 | 
 31 |   def normalize(features) when is_list(features), do: normalize(features, [type: "minimax"])
 32 | 
 33 |   @doc """
 34 |   Normalize data set
 35 | 
 36 |   ## Parameters
 37 | 
 38 |     - features: list of features for normalization
 39 |     - options: keyword list with options
 40 | 
 41 |   ## Options
 42 | 
 43 |     - type: minimax/z_normalization, default is minimax, optional
 44 | 
 45 |   ## Examples
 46 | 
 47 |       iex> LearnKit.Preprocessing.normalize([[1, 2], [3, 4], [5, 6]], [type: "z_normalization"])
 48 |       [
 49 |         [-1.224744871391589, -1.224744871391589],
 50 |         [0.0, 0.0],
 51 |         [1.224744871391589, 1.224744871391589]
 52 |       ]
 53 | 
 54 |   """
 55 |   @spec normalize(matrix, list) :: matrix
 56 | 
 57 |   def normalize(features, options) when is_list(features) and is_list(options) do
 58 |     options = Keyword.merge([type: "minimax"], options)
 59 |     case options[:type] do
 60 |       "z_normalization" -> normalization(features, "z_normalization")
 61 |       _ -> normalization(features, "minimax")
 62 |     end
 63 |   end
 64 | 
 65 |   @doc """
 66 |   Prepare coefficients for normalization
 67 | 
 68 |   ## Parameters
 69 | 
 70 |     - features: features grouped by index
 71 |     - type: minimax/z_normalization
 72 | 
 73 |   ## Examples
 74 | 
 75 |       iex> LearnKit.Preprocessing.coefficients([[1, 2], [3, 4], [5, 6]], "minimax")
 76 |       [{1, 5}, {2, 6}]
 77 | 
 78 |       iex> LearnKit.Preprocessing.coefficients([[1, 2], [3, 4], [5, 6]], "z_normalization")
 79 |       [{3.0, 1.632993161855452}, {4.0, 1.632993161855452}]
 80 | 
 81 |   """
 82 |   @spec coefficients(matrix, String.t()) :: matrix
 83 | 
 84 |   def coefficients(features, type) when is_list(features) and is_binary(type) do
 85 |     features
 86 |     |> Math.transpose()
 87 |     |> Enum.map(fn list -> return_params(list, type) end)
 88 |   end
 89 | 
 90 |   @doc """
 91 |   Normalize 1 feature with predefined coefficients
 92 | 
 93 |   ## Parameters
 94 | 
 95 |     - feature: feature for normalization
 96 |     - coefficients: predefined coefficients
 97 |     - type: minimax/z_normalization
 98 | 
 99 |   ## Examples
100 | 
101 |       iex> LearnKit.Preprocessing.normalize_feature([1, 2], [{1, 5}, {2, 6}], "minimax")
102 |       [0.0, 0.0]
103 | 
104 |   """
105 |   @spec normalize_feature(list, list(tuple), String.t()) :: list
106 | 
107 |   def normalize_feature(feature, coefficients, type) when is_list(feature) and is_list(coefficients) and is_binary(type) do
108 |     Enum.zip(feature, coefficients)
109 |     |> Enum.map(fn {point, params_for_point} ->
110 |       divider = define_divider(params_for_point, type)
111 |       case divider do
112 |         0 -> point
113 |         _ -> (point - elem(params_for_point, 0)) / divider
114 |       end
115 |     end)
116 |   end
117 | end
118 | 


--------------------------------------------------------------------------------
/lib/learn_kit/regression/linear.ex:
--------------------------------------------------------------------------------
  1 | defmodule LearnKit.Regression.Linear do
  2 |   @moduledoc """
  3 |   Module for Linear Regression algorithm
  4 |   """
  5 | 
  6 |   defstruct factors: [], results: [], coefficients: []
  7 | 
  8 |   alias LearnKit.Regression.Linear
  9 |   use Linear.Calculations
 10 |   use LearnKit.Regression.Score
 11 | 
 12 |   @type factors :: [number]
 13 |   @type results :: [number]
 14 |   @type coefficients :: [number]
 15 | 
 16 |   @doc """
 17 |   Creates predictor with empty data_set
 18 | 
 19 |   ## Examples
 20 | 
 21 |       iex> predictor = LearnKit.Regression.Linear.new
 22 |       %LearnKit.Regression.Linear{factors: [], results: [], coefficients: []}
 23 | 
 24 |   """
 25 |   @spec new() :: %Linear{factors: [], results: [], coefficients: []}
 26 | 
 27 |   def new, do: Linear.new([], [])
 28 | 
 29 |   @doc """
 30 |   Creates predictor with data_set
 31 | 
 32 |   ## Parameters
 33 | 
 34 |     - factors: Array of predictor variables
 35 |     - results: Array of criterion variables
 36 | 
 37 |   ## Examples
 38 | 
 39 |       iex> predictor = LearnKit.Regression.Linear.new([1, 2, 3, 4], [3, 6, 10, 15])
 40 |       %LearnKit.Regression.Linear{factors: [1, 2, 3, 4], results: [3, 6, 10, 15], coefficients: []}
 41 | 
 42 |   """
 43 |   @spec new(factors, results) :: %Linear{factors: factors, results: results, coefficients: []}
 44 | 
 45 |   def new(factors, results) when is_list(factors) and is_list(results) do
 46 |     %Linear{factors: factors, results: results}
 47 |   end
 48 | 
 49 |   @doc """
 50 |   Fit train data
 51 | 
 52 |   ## Parameters
 53 | 
 54 |     - predictor: %LearnKit.Regression.Linear{}
 55 |     - options: keyword list with options
 56 | 
 57 |   ## Options
 58 | 
 59 |     - method: method for fit, "least squares"/"gradient descent", default is "least squares", optional
 60 | 
 61 |   ## Examples
 62 | 
 63 |       iex> predictor = predictor |> LearnKit.Regression.Linear.fit
 64 |       %LearnKit.Regression.Linear{
 65 |         coefficients: [-1.5, 4.0],
 66 |         factors: [1, 2, 3, 4],
 67 |         results: [3, 6, 10, 15]
 68 |       }
 69 | 
 70 |       iex> predictor = predictor |> LearnKit.Regression.Linear.fit([method: "gradient descent"])
 71 |       %LearnKit.Regression.Linear{
 72 |         coefficients: [-1.4975720508482548, 3.9992148848913356],
 73 |         factors: [1, 2, 3, 4],
 74 |         results: [3, 6, 10, 15]
 75 |       }
 76 | 
 77 |   """
 78 |   @spec fit(%Linear{factors: factors, results: results}) :: %Linear{factors: factors, results: results, coefficients: coefficients}
 79 | 
 80 |   def fit(linear = %Linear{factors: factors, results: results}, options \\ []) when is_list(options) do
 81 |     coefficients =
 82 |       Keyword.merge([method: ""], options)
 83 |       |> define_method_for_fit()
 84 |       |> do_fit(linear)
 85 |     %Linear{factors: factors, results: results, coefficients: coefficients}
 86 |   end
 87 | 
 88 |   defp define_method_for_fit(options) do
 89 |     case options[:method] do
 90 |       "gradient descent" -> "gradient descent"
 91 |       _ -> ""
 92 |     end
 93 |   end
 94 | 
 95 |   @doc """
 96 |   Predict using the linear model
 97 | 
 98 |   ## Parameters
 99 | 
100 |     - predictor: %LearnKit.Regression.Linear{}
101 |     - samples: Array of variables
102 | 
103 |   ## Examples
104 | 
105 |       iex> predictor |> LearnKit.Regression.Linear.predict([4, 8, 13])
106 |       {:ok, [14.5, 30.5, 50.5]}
107 | 
108 |   """
109 |   @spec predict(%Linear{coefficients: coefficients}, list) :: {:ok, list}
110 | 
111 |   def predict(linear = %Linear{coefficients: _}, samples) when is_list(samples) do
112 |     {
113 |       :ok,
114 |       do_predict(linear, samples)
115 |     }
116 |   end
117 | 
118 |   @doc """
119 |   Predict using the linear model
120 | 
121 |   ## Parameters
122 | 
123 |     - predictor: %LearnKit.Regression.Linear{}
124 |     - sample: Sample variable
125 | 
126 |   ## Examples
127 | 
128 |       iex> predictor |> LearnKit.Regression.Linear.predict(4)
129 |       {:ok, 14.5}
130 | 
131 |   """
132 |   @spec predict(%Linear{coefficients: coefficients}, list) :: {:ok, list}
133 | 
134 |   def predict(%Linear{coefficients: [alpha, beta]}, sample) do
135 |     {:ok, sample * beta + alpha}
136 |   end
137 | end
138 | 


--------------------------------------------------------------------------------
/lib/learn_kit/regression/polynomial.ex:
--------------------------------------------------------------------------------
  1 | defmodule LearnKit.Regression.Polynomial do
  2 |   @moduledoc """
  3 |   Module for Polynomial Regression algorithm
  4 |   """
  5 | 
  6 |   defstruct factors: [], results: [], coefficients: [], degree: 2
  7 | 
  8 |   alias LearnKit.Regression.Polynomial
  9 |   use Polynomial.Calculations
 10 |   use LearnKit.Regression.Score
 11 | 
 12 |   @type factors :: [number]
 13 |   @type results :: [number]
 14 |   @type coefficients :: [number]
 15 |   @type degree :: integer
 16 | 
 17 |   @doc """
 18 |   Creates polynomial predictor with data_set
 19 | 
 20 |   ## Parameters
 21 | 
 22 |     - factors: Array of predictor variables
 23 |     - results: Array of criterion variables
 24 | 
 25 |   ## Examples
 26 | 
 27 |       iex> predictor = LearnKit.Regression.Polynomial.new([1, 2, 3, 4], [3, 6, 10, 15])
 28 |       %LearnKit.Regression.Polynomial{factors: [1, 2, 3, 4], results: [3, 6, 10, 15], coefficients: [], degree: 2}
 29 | 
 30 |   """
 31 |   @spec new(factors, results) :: %Polynomial{factors: factors, results: results, coefficients: [], degree: 2}
 32 | 
 33 |   def new(factors, results) when is_list(factors) and is_list(results) do
 34 |     %Polynomial{factors: factors, results: results}
 35 |   end
 36 | 
 37 |   def new(_, _), do: Polynomial.new([], [])
 38 |   def new, do: Polynomial.new([], [])
 39 | 
 40 |   @doc """
 41 |   Fit train data
 42 | 
 43 |   ## Parameters
 44 | 
 45 |     - predictor: %LearnKit.Regression.Polynomial{}
 46 |     - options: keyword list with options
 47 | 
 48 |   ## Options
 49 | 
 50 |     - degree: nth degree of polynomial model, default set to 2
 51 | 
 52 |   ## Examples
 53 | 
 54 |       iex> predictor = predictor |> LearnKit.Regression.Polynomial.fit
 55 |       %LearnKit.Regression.Polynomial{
 56 |         coefficients: [0.9999999999998295, 1.5000000000000853, 0.4999999999999787],
 57 |         degree: 2,
 58 |         factors: [1, 2, 3, 4],
 59 |         results: [3, 6, 10, 15]
 60 |       }
 61 | 
 62 |       iex> predictor = predictor |> LearnKit.Regression.Polynomial.fit([degree: 3])
 63 |       %LearnKit.Regression.Polynomial{
 64 |         coefficients: [1.0000000000081855, 1.5000000000013642, 0.5,
 65 |          8.526512829121202e-14],
 66 |         degree: 3,
 67 |         factors: [1, 2, 3, 4],
 68 |         results: [3, 6, 10, 15]
 69 |       }
 70 | 
 71 |   """
 72 |   @spec fit(%Polynomial{factors: factors, results: results}) :: %Polynomial{factors: factors, results: results, coefficients: coefficients, degree: degree}
 73 | 
 74 |   def fit(%Polynomial{factors: factors, results: results}, options \\ []) do
 75 |     degree = options[:degree] || 2
 76 |     matrix = matrix(factors, degree)
 77 |     xys = x_y_matrix(factors, results, degree + 1, [])
 78 |     coefficients = matrix |> Matrix.inv() |> Matrix.mult(xys) |> List.flatten()
 79 |     %Polynomial{factors: factors, results: results, coefficients: coefficients, degree: degree}
 80 |   end
 81 | 
 82 |   @doc """
 83 |   Predict using the polynomial model
 84 | 
 85 |   ## Parameters
 86 | 
 87 |     - predictor: %LearnKit.Regression.Polynomial{}
 88 |     - samples: Array of variables
 89 | 
 90 |   ## Examples
 91 | 
 92 |       iex> predictor |> LearnKit.Regression.Polynomial.predict([5,6])
 93 |       {:ok, [20.999999999999723, 27.999999999999574]}
 94 | 
 95 |   """
 96 |   @spec predict(%Polynomial{coefficients: coefficients, degree: degree}, list) :: {:ok, list}
 97 | 
 98 |   def predict(polynomial = %Polynomial{coefficients: _, degree: _}, samples) when is_list(samples) do
 99 |     {:ok, do_predict(polynomial, samples)}
100 |   end
101 | 
102 |   @doc """
103 |   Predict using the polynomial model
104 | 
105 |   ## Parameters
106 | 
107 |     - predictor: %LearnKit.Regression.Polynomial{}
108 |     - sample: Sample variable
109 | 
110 |   ## Examples
111 | 
112 |       iex> predictor |> LearnKit.Regression.Polynomial.predict(5)
113 |       {:ok, 20.999999999999723}
114 | 
115 |   """
116 |   @spec predict(%Polynomial{coefficients: coefficients, degree: degree}, number) :: {:ok, number}
117 | 
118 |   def predict(%Polynomial{coefficients: coefficients, degree: degree}, sample) do
119 |     ordered_coefficients = coefficients |> Enum.reverse()
120 |     {:ok, substitute_coefficients(ordered_coefficients, sample, degree, 0.0)}
121 |   end
122 | end
123 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # LearnKit
  2 | 
  3 | Elixir package for machine learning
  4 | 
  5 | Available preprocessing methods:
  6 | 
  7 | - Normalization
  8 | 
  9 | Available algorithms for prediction:
 10 | 
 11 | - Linear Regression
 12 | - Polynomial Regression
 13 | 
 14 | Available algorithms for classification:
 15 | 
 16 | - K-Nearest Neighbours
 17 | - Gaussian Naive Bayes
 18 | 
 19 | ## Installation
 20 | 
 21 | If [available in Hex](https://hex.pm/docs/publish), the package can be installed
 22 | by adding `learn_kit` to your list of dependencies in `mix.exs`:
 23 | 
 24 | ```elixir
 25 | def deps do
 26 |   [
 27 |     {:learn_kit, "~> 0.1.6"}
 28 |   ]
 29 | end
 30 | ```
 31 | 
 32 | ### Normalization
 33 | 
 34 | Normalize data set with minimax normalization
 35 | 
 36 | ```elixir
 37 |   alias LearnKit.Preprocessing
 38 |   Preprocessing.normalize([[1, 2], [3, 4], [5, 6]])
 39 | ```
 40 | 
 41 | Or normalize data set with selected type
 42 | 
 43 | ```elixir
 44 |   Preprocessing.normalize([[1, 2], [3, 4], [5, 6]], [type: "z_normalization"])
 45 | ```
 46 |   options - array of options
 47 | 
 48 | Additionally you can prepare coefficients for normalization
 49 | 
 50 | ```elixir
 51 |   Preprocessing.coefficients([[1, 2], [3, 4], [5, 6]], "minimax")
 52 | ```
 53 |     type - method of normalization, one of the [minimax|z_normalization], required
 54 | 
 55 | And then normalize 1 feature with predefined coefficients
 56 | 
 57 | ```elixir
 58 |   Preprocessing.normalize_feature([1, 2], [{1, 5}, {2, 6}], "minimax")
 59 | ```
 60 |     type - method of normalization, one of the [minimax|z_normalization], required
 61 | 
 62 | ### Linear Regression
 63 | 
 64 | Initialize predictor with data:
 65 | 
 66 | ```elixir
 67 |   alias LearnKit.Regression.Linear
 68 |   predictor = Linear.new([1, 2, 3, 4], [3, 6, 10, 15])
 69 | ```
 70 | 
 71 | Fit data set with least squares method:
 72 | 
 73 | ```elixir
 74 |   predictor = predictor |> Linear.fit
 75 | ```
 76 | 
 77 | Fit data set with gradient descent method:
 78 | 
 79 | ```elixir
 80 |   predictor = predictor |> Linear.fit([method: "gradient descent"])
 81 | ```
 82 | 
 83 | Predict using the linear model:
 84 | 
 85 | ```elixir
 86 |   predictor |> Linear.predict([4, 8, 13])
 87 | ```
 88 |     samples - array of variables, required
 89 | 
 90 | Returns the coefficient of determination R^2 of the prediction:
 91 | 
 92 | ```elixir
 93 |   predictor |> Linear.score
 94 | ```
 95 | 
 96 | ### K-Nearest Neighbours classification
 97 | 
 98 | Initialize classifier with data set consists from labels and features:
 99 | 
100 | ```elixir
101 |   alias LearnKit.Knn
102 |   classifier =
103 |     Knn.new
104 |     |> Knn.add_train_data({:a1, [-1, -1]})
105 |     |> Knn.add_train_data({:a1, [-2, -1]})
106 |     |> Knn.add_train_data({:a2, [1, 1]})
107 | ```
108 | 
109 | Predict label for new feature:
110 | 
111 | ```elixir
112 |   Knn.classify(classifier, [feature: [-1, -2], k: 3, weight: "distance", normalization: "minimax"])
113 | ```
114 |     feature - new feature for prediction, required
115 |     k - number of nearest neighbors, optional, default - 3
116 |     algorithm - algorithm for calculation of distances, one of the [brute], optional, default - "brute"
117 |     weight - method of weighted neighbors, one of the [uniform|distance], optional, default - "uniform"
118 |     normalization - method of normalization, one of the [none|minimax|z_normalization], optional, default - "none"
119 | 
120 | ### Gaussian Naive Bayes classification
121 | 
122 | Initialize classifier with data set consists from labels and features:
123 | 
124 | ```elixir
125 |   alias LearnKit.NaiveBayes.Gaussian
126 |   classifier =
127 |     Gaussian.new
128 |     |> Gaussian.add_train_data({:a1, [-1, -1]})
129 |     |> Gaussian.add_train_data({:a1, [-2, -1]})
130 |     |> Gaussian.add_train_data({:a2, [1, 1]})
131 | ```
132 | 
133 | Normalize data set:
134 | 
135 | ```elixir
136 |   classifier = classifier |> Gaussian.normalize_train_data("minimax")
137 | ```
138 |     type - method of normalization, one of the [none|minimax|z_normalization], optional, default - "none"
139 | 
140 | Fit data set:
141 | 
142 | ```elixir
143 |   classifier = classifier |> Gaussian.fit
144 | ```
145 | 
146 | Return probability estimates for the feature:
147 | 
148 | ```elixir
149 |   classifier |> Gaussian.predict_proba([1, 2])
150 | ```
151 |     feature - new feature for prediction, required
152 | 
153 | Return exact prediction for the feature:
154 | 
155 | ```elixir
156 |   classifier |> Gaussian.predict([1, 2])
157 | ```
158 |     feature - new feature for prediction, required
159 | 
160 | Returns the mean accuracy on the given test data and labels:
161 | 
162 | ```elixir
163 |   classifier |> Gaussian.score
164 | ```
165 | 
166 | ## Contributing
167 | 
168 | Bug reports and pull requests are welcome on GitHub at https://github.com/kortirso/elixir_learn_kit.
169 | 
170 | ## License
171 | 
172 | The package is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
173 | 
174 | ## Disclaimer
175 | 
176 | Use this package at your own peril and risk.
177 | 
178 | ## Documentation
179 | 
180 | Documentation can be generated with [ExDoc](https://github.com/elixir-lang/ex_doc)
181 | and published on [HexDocs](https://hexdocs.pm). Once published, the docs can
182 | be found at [https://hexdocs.pm/learn_kit](https://hexdocs.pm/learn_kit).
183 | 
184 | 


--------------------------------------------------------------------------------
/lib/learn_kit/knn/classify.ex:
--------------------------------------------------------------------------------
  1 | defmodule LearnKit.Knn.Classify do
  2 |   @moduledoc """
  3 |   Module for knn classify functions
  4 |   """
  5 | 
  6 |   alias LearnKit.{Preprocessing, Math}
  7 | 
  8 |   defmacro __using__(_opts) do
  9 |     quote do
 10 |       defp prediction(data_set, options) do
 11 |         data_set
 12 |         |> filter_features_by_size(options[:feature])
 13 |         |> check_normalization(options)
 14 |         |> calc_distances_for_features(options)
 15 |         |> sort_distances()
 16 |         |> select_closest_features(options)
 17 |         |> check_zero_distance(options)
 18 |       end
 19 | 
 20 |       # knn uses only features with the same size as current feature
 21 |       defp filter_features_by_size(data_set, current_feature) do
 22 |         Enum.map(data_set, fn {key, features} ->
 23 |           {
 24 |             key,
 25 |             Enum.filter(features, fn feature -> length(feature) == length(current_feature) end)
 26 |           }
 27 |         end)
 28 |       end
 29 | 
 30 |       # normalize features
 31 |       defp check_normalization(data_set, options) do
 32 |         type = options[:normalization]
 33 |         case type do
 34 |           t when t in ["minimax", "z_normalization"] -> normalize(data_set, type)
 35 |           _ -> data_set
 36 |         end
 37 |       end
 38 | 
 39 |       # select algorithm for prediction
 40 |       defp calc_distances_for_features(data_set, options) do
 41 |         case options[:algorithm] do
 42 |           "brute" -> brute_algorithm(data_set, options)
 43 |           _ -> []
 44 |         end
 45 |       end
 46 | 
 47 |       # sort distances
 48 |       defp sort_distances(features), do: Enum.sort(features, &(elem(&1, 0) <= elem(&2, 0)))
 49 | 
 50 |       # take closest features
 51 |       defp select_closest_features(features, options), do: Enum.take(features, options[:k])
 52 | 
 53 |       # check existeness of current feature in data set
 54 |       defp check_zero_distance(closest_features, options) do
 55 |         {distance, label} = Enum.at(closest_features, 0)
 56 |         cond do
 57 |           distance == 0 -> {label, 0}
 58 |           true -> select_best_label(closest_features, options)
 59 |         end
 60 |       end
 61 | 
 62 |       # select best result based on weights
 63 |       defp select_best_label(features, options) do
 64 |         features
 65 |         |> calc_feature_weights(options)
 66 |         |> accumulate_weight_of_labels([])
 67 |         |> sort_result()
 68 |       end
 69 | 
 70 |       # normalize each feature
 71 |       defp normalize(data_set, type) do
 72 |         coefficients = find_coefficients_for_normalization(data_set, type)
 73 |         Enum.map(data_set, fn {key, features} ->
 74 |           {
 75 |             key,
 76 |             Enum.map(features, fn feature -> Preprocessing.normalize_feature(feature, coefficients, type) end)
 77 |           }
 78 |         end)
 79 |       end
 80 | 
 81 |       # find coefficients for normalization
 82 |       defp find_coefficients_for_normalization(data_set, type) do
 83 |         Enum.reduce(data_set, [], fn {_, features}, acc ->
 84 |           Enum.reduce(features, acc, fn feature, acc -> [feature | acc] end)
 85 |         end)
 86 |         |> Preprocessing.coefficients(type)
 87 |       end
 88 | 
 89 |       defp calc_feature_weights(features, options) do
 90 |         Enum.map(features, fn feature ->
 91 |           Tuple.append(feature, calc_feature_weight(options[:weight], elem(feature, 0)))
 92 |         end)
 93 |       end
 94 | 
 95 |       defp sort_result(features) do
 96 |         features
 97 |         |> Enum.sort(&(elem(&1, 1) >= elem(&2, 1)))
 98 |         |> List.first()
 99 |       end
100 | 
101 |       # brute algorithm for prediction
102 |       defp brute_algorithm(data_set, options) do
103 |         data_set
104 |         |> Keyword.keys()
105 |         |> handle_features_in_label(data_set, options[:feature])
106 |         |> List.flatten()
107 |       end
108 | 
109 |       defp handle_features_in_label(keys, data_set, current_feature) do
110 |         Enum.map(keys, fn key ->
111 |           data_set[key]
112 |           |> calc_distances_in_label(current_feature, key)
113 |         end)
114 |       end
115 | 
116 |       defp calc_distances_in_label(features, current_feature, key) do
117 |         Enum.reduce(features, [], fn feature, acc ->
118 |           distance = calc_distance_between_features(feature, current_feature)
119 |           acc = [{distance, key} | acc]
120 |         end)
121 |       end
122 | 
123 |       defp calc_distance_between_features(feature_from_data_set, feature) do
124 |         Enum.zip(feature_from_data_set, feature)
125 |         |> calc_distance_between_points()
126 |         |> :math.sqrt()
127 |       end
128 | 
129 |       defp calc_distance_between_points(list) do
130 |         Enum.reduce(list, 0, fn {xi, yi}, acc ->
131 |           xi - yi
132 |           |> :math.pow(2)
133 |           |> Math.summ(acc)
134 |         end)
135 |       end
136 | 
137 |       defp calc_feature_weight(weight, distance) do
138 |         case weight do
139 |           "uniform" -> 1
140 |           "distance" -> 1 / :math.pow(distance, 2)
141 |           _ -> 1
142 |         end
143 |       end
144 | 
145 |       defp accumulate_weight_of_labels([], acc), do: acc
146 | 
147 |       defp accumulate_weight_of_labels([{_, key, weight} | tail], acc) do
148 |         previous = if Keyword.has_key?(acc, key), do: acc[key], else: 0
149 |         acc = Keyword.put(acc, key, previous + weight)
150 |         accumulate_weight_of_labels(tail, acc)
151 |       end
152 |     end
153 |   end
154 | end
155 | 


--------------------------------------------------------------------------------
/lib/learn_kit/math.ex:
--------------------------------------------------------------------------------
  1 | defmodule LearnKit.Math do
  2 |   @moduledoc """
  3 |   Math module
  4 |   """
  5 | 
  6 |   @type row :: [number]
  7 |   @type matrix :: [row]
  8 | 
  9 |   @doc """
 10 |   Sum of 2 numbers
 11 | 
 12 |   ## Examples
 13 | 
 14 |       iex> LearnKit.Math.summ(1, 2)
 15 |       3
 16 | 
 17 |   """
 18 |   @spec summ(number, number) :: number
 19 | 
 20 |   def summ(a, b), do: a + b
 21 | 
 22 |   @doc """
 23 |   Division for 2 elements
 24 | 
 25 |   ## Examples
 26 | 
 27 |       iex> LearnKit.Math.division(10, 2)
 28 |       5.0
 29 | 
 30 |   """
 31 |   @spec division(number, number) :: number
 32 | 
 33 |   def division(x, y) when y != 0, do: x / y
 34 | 
 35 |   @doc """
 36 |   Calculate the mean from a list of numbers
 37 | 
 38 |   ## Examples
 39 | 
 40 |       iex> LearnKit.Math.mean([])
 41 |       nil
 42 | 
 43 |       iex> LearnKit.Math.mean([1, 2, 3])
 44 |       2.0
 45 | 
 46 |   """
 47 |   @spec mean(list) :: number
 48 | 
 49 |   def mean(list) when is_list(list), do: do_mean(list, 0, 0)
 50 | 
 51 |   defp do_mean([], 0, 0), do: nil
 52 | 
 53 |   defp do_mean([], sum, number), do: sum / number
 54 | 
 55 |   defp do_mean([head | tail], sum, number) do
 56 |     do_mean(tail, sum + head, number + 1)
 57 |   end
 58 | 
 59 |   @doc """
 60 |   Calculate variance from a list of numbers
 61 | 
 62 |   ## Examples
 63 | 
 64 |       iex> LearnKit.Math.variance([])
 65 |       nil
 66 | 
 67 |       iex> LearnKit.Math.variance([1, 2, 3, 4])
 68 |       1.25
 69 | 
 70 |   """
 71 |   @spec variance(list) :: number
 72 | 
 73 |   def variance([]), do: nil
 74 | 
 75 |   def variance(list) when is_list(list) do
 76 |     list_mean = mean(list)
 77 |     variance(list, list_mean)
 78 |   end
 79 | 
 80 |   @doc """
 81 |   Calculate variance from a list of numbers, with calculated mean
 82 | 
 83 |   ## Examples
 84 | 
 85 |       iex> LearnKit.Math.variance([1, 2, 3, 4], 2.5)
 86 |       1.25
 87 | 
 88 |   """
 89 |   @spec variance(list, number) :: number
 90 | 
 91 |   def variance(list, list_mean) when is_list(list) do
 92 |     list
 93 |     |> Enum.map(fn x -> :math.pow(list_mean - x, 2) end)
 94 |     |> mean()
 95 |   end
 96 | 
 97 |   @doc """
 98 |   Calculate standard deviation from a list of numbers
 99 | 
100 |   ## Examples
101 | 
102 |       iex> LearnKit.Math.standard_deviation([])
103 |       nil
104 | 
105 |       iex> LearnKit.Math.standard_deviation([1, 2])
106 |       0.5
107 | 
108 |   """
109 |   @spec standard_deviation(list) :: number
110 | 
111 |   def standard_deviation([]), do: nil
112 | 
113 |   def standard_deviation(list) when is_list(list) do
114 |     list
115 |     |> variance()
116 |     |> :math.sqrt()
117 |   end
118 | 
119 |   @doc """
120 |   Calculate standard deviation from a list of numbers, with calculated variance
121 | 
122 |   ## Examples
123 | 
124 |       iex> LearnKit.Math.standard_deviation_from_variance(1.25)
125 |       1.118033988749895
126 | 
127 |   """
128 |   @spec standard_deviation_from_variance(number) :: number
129 | 
130 |   def standard_deviation_from_variance(list_variance) do
131 |     :math.sqrt(list_variance)
132 |   end
133 | 
134 |   @doc """
135 |   Transposing a matrix
136 | 
137 |   ## Examples
138 | 
139 |       iex> LearnKit.Math.transpose([[1, 2], [3, 4], [5, 6]])
140 |       [[1, 3, 5], [2, 4, 6]]
141 | 
142 |   """
143 |   @spec transpose(matrix) :: matrix
144 | 
145 |   def transpose(m), do: do_transpose(m)
146 | 
147 |   defp do_transpose([head | _]) when head == [], do: []
148 | 
149 |   defp do_transpose(rows) do
150 |     firsts = Enum.map(rows, fn x -> hd(x) end)
151 |     others = Enum.map(rows, fn x -> tl(x) end)
152 |     [firsts | do_transpose(others)]
153 |   end
154 | 
155 |   @doc """
156 |   Scalar multiplication
157 | 
158 |   ## Examples
159 | 
160 |       iex> LearnKit.Math.scalar_multiply(10, [5, 6])
161 |       [50, 60]
162 | 
163 |   """
164 |   @spec scalar_multiply(integer, list) :: list
165 | 
166 |   def scalar_multiply(multiplicator, list) when is_list(list) do
167 |     Enum.map(list, fn x -> x * multiplicator end)
168 |   end
169 | 
170 |   @doc """
171 |   Vector subtraction
172 | 
173 |   ## Examples
174 | 
175 |       iex> LearnKit.Math.vector_subtraction([40, 50, 60], [35, 5, 40])
176 |       [5, 45, 20]
177 | 
178 |   """
179 |   @spec vector_subtraction(list, list) :: list
180 | 
181 |   def vector_subtraction(x, y) when length(x) == length(y) do
182 |     Enum.zip(x, y)
183 |     |> Enum.map(fn {xi, yi} -> xi - yi end)
184 |   end
185 | 
186 |   @doc """
187 |   Calculate the covariance of two lists
188 | 
189 |   ## Examples
190 | 
191 |       iex> LearnKit.Math.covariance([1, 2, 3], [14, 17, 25])
192 |       5.5
193 | 
194 |   """
195 |   @spec covariance(list, list) :: number
196 | 
197 |   def covariance(x, y) when length(x) == length(y) do
198 |     mean_x = mean(x)
199 |     mean_y = mean(y)
200 |     size = length(x)
201 | 
202 |     Enum.zip(x, y)
203 |     |> Enum.reduce(0, fn {xi, yi}, acc -> acc + (xi - mean_x) * (yi - mean_y) end)
204 |     |> division(size - 1)
205 |   end
206 | 
207 |   @doc """
208 |   Correlation of two lists
209 | 
210 |   ## Examples
211 | 
212 |       iex> LearnKit.Math.correlation([1, 2, 3], [14, 17, 25])
213 |       0.9672471299049061
214 | 
215 |   """
216 |   @spec correlation(list, list) :: number
217 | 
218 |   def correlation(x, y) when length(x) == length(y) do
219 |     mean_x = mean(x)
220 |     mean_y = mean(y)
221 | 
222 |     divider = Enum.zip(x, y) |> Enum.reduce(0, fn {xi, yi}, acc -> acc + (xi - mean_x) * (yi - mean_y) end)
223 |     denom_x = Enum.reduce(x, 0, fn xi, acc -> acc + :math.pow(xi - mean_x, 2) end)
224 |     denom_y = Enum.reduce(y, 0, fn yi, acc -> acc + :math.pow(yi - mean_y, 2) end)
225 | 
226 |     divider / :math.sqrt(denom_x * denom_y)
227 |   end
228 | end
229 | 


--------------------------------------------------------------------------------
/lib/learn_kit/naive_bayes/gaussian.ex:
--------------------------------------------------------------------------------
  1 | defmodule LearnKit.NaiveBayes.Gaussian do
  2 |   @moduledoc """
  3 |   Module for Gaussian NB algorithm
  4 |   """
  5 | 
  6 |   defstruct data_set: [], fit_data: []
  7 | 
  8 |   alias LearnKit.NaiveBayes.Gaussian
  9 |   use Gaussian.Normalize
 10 |   use Gaussian.Fit
 11 |   use Gaussian.Classify
 12 |   use Gaussian.Score
 13 | 
 14 |   @type label :: atom
 15 |   @type feature :: [integer]
 16 |   @type prediction :: {label, number}
 17 |   @type predictions :: [prediction]
 18 |   @type point :: {label, feature}
 19 |   @type features :: [feature]
 20 |   @type data_set :: [{label, features}]
 21 |   @type fit_feature :: %{mean: float, standard_deviation: float, variance: float}
 22 |   @type fit_features :: [fit_feature]
 23 |   @type fit_data :: [{label, fit_features}]
 24 | 
 25 |   @doc """
 26 |   Creates classifier with empty data_set
 27 | 
 28 |   ## Examples
 29 | 
 30 |       iex> classifier = LearnKit.NaiveBayes.Gaussian.new
 31 |       %LearnKit.NaiveBayes.Gaussian{data_set: [], fit_data: []}
 32 | 
 33 |   """
 34 |   @spec new() :: %Gaussian{data_set: []}
 35 | 
 36 |   def new, do: Gaussian.new([])
 37 | 
 38 |   @doc """
 39 |   Creates classifier with data_set
 40 | 
 41 |   ## Parameters
 42 | 
 43 |     - data_set: Keyword list with labels and features in tuples
 44 | 
 45 |   ## Examples
 46 | 
 47 |       iex> classifier = LearnKit.NaiveBayes.Gaussian.new([{:a1, [[1, 2], [2, 3]]}, {:b1, [[-1, -2]]}])
 48 |       %LearnKit.NaiveBayes.Gaussian{data_set: [a1: [[1, 2], [2, 3]], b1: [[-1, -2]]], fit_data: []}
 49 | 
 50 |   """
 51 |   @spec new(data_set) :: %Gaussian{data_set: data_set}
 52 | 
 53 |   def new(data_set), do: %Gaussian{data_set: data_set}
 54 | 
 55 |   @doc """
 56 |   Add train data to classifier
 57 | 
 58 |   ## Parameters
 59 | 
 60 |     - classifier: %LearnKit.NaiveBayes.Gaussian{}
 61 |     - train data: tuple with label and feature
 62 | 
 63 |   ## Examples
 64 | 
 65 |       iex> classifier = classifier |> LearnKit.NaiveBayes.Gaussian.add_train_data({:a1, [-1, -1]})
 66 |       %LearnKit.NaiveBayes.Gaussian{data_set: [a1: [[-1, -1]]], fit_data: []}
 67 | 
 68 |   """
 69 |   @spec add_train_data(%Gaussian{data_set: data_set}, point) :: %Gaussian{data_set: data_set}
 70 | 
 71 |   def add_train_data(%Gaussian{data_set: data_set}, {key, value}) do
 72 |     features = if Keyword.has_key?(data_set, key), do: Keyword.get(data_set, key), else: []
 73 |     data_set = Keyword.put(data_set, key, [value | features])
 74 |     %Gaussian{data_set: data_set}
 75 |   end
 76 | 
 77 |   @doc """
 78 |   Normalize train data
 79 | 
 80 |   ## Parameters
 81 | 
 82 |     - classifier: %LearnKit.NaiveBayes.Gaussian{}
 83 |     - type: none/minimax/z_normalization, default is none, optional
 84 | 
 85 |   ## Examples
 86 | 
 87 |       iex> classifier = classifier |> LearnKit.NaiveBayes.Gaussian.normalize_train_data("minimax")
 88 |       %LearnKit.NaiveBayes.Gaussian{
 89 |         data_set: [a1: [[0.6666666666666666, 0.8], [1.0, 1.0]], b1: [[0.0, 0.0]]],
 90 |         fit_data: []
 91 |       }
 92 | 
 93 |   """
 94 |   @spec normalize_train_data(%Gaussian{data_set: data_set}, String.t()) :: %Gaussian{data_set: data_set, fit_data: fit_data}
 95 | 
 96 |   def normalize_train_data(%Gaussian{data_set: data_set}, type \\ "none") when is_binary(type) do
 97 |     %Gaussian{data_set: normalize_data(data_set, type), fit_data: []}
 98 |   end
 99 | 
100 |   @doc """
101 |   Fit train data
102 | 
103 |   ## Parameters
104 | 
105 |     - classifier: %LearnKit.NaiveBayes.Gaussian{}
106 | 
107 |   ## Examples
108 | 
109 |       iex> classifier = classifier |> LearnKit.NaiveBayes.Gaussian.fit
110 |       %LearnKit.NaiveBayes.Gaussian{
111 |         data_set: [a1: [[-1, -1]]],
112 |         fit_data: [
113 |           a1: [
114 |             %{mean: -1.0, standard_deviation: 0.0, variance: 0.0},
115 |             %{mean: -1.0, standard_deviation: 0.0, variance: 0.0}
116 |           ]
117 |         ]
118 |       }
119 | 
120 |   """
121 |   @spec fit(%Gaussian{data_set: data_set}) :: %Gaussian{data_set: data_set, fit_data: fit_data}
122 | 
123 |   def fit(%Gaussian{data_set: data_set}) do
124 |     %Gaussian{data_set: data_set, fit_data: fit_data(data_set)}
125 |   end
126 | 
127 |   @doc """
128 |   Return probability estimates for the feature
129 | 
130 |   ## Parameters
131 | 
132 |     - classifier: %LearnKit.NaiveBayes.Gaussian{}
133 | 
134 |   ## Examples
135 | 
136 |       iex> classifier |> LearnKit.NaiveBayes.Gaussian.predict_proba([1, 2])
137 |       {:ok, [a1: 0.0359, a2: 0.0039]}
138 | 
139 |   """
140 |   @spec predict_proba(%Gaussian{fit_data: fit_data}, feature) :: {:ok, predictions}
141 | 
142 |   def predict_proba(%Gaussian{fit_data: fit_data}, feature) do
143 |     result = classify_data(fit_data, feature)
144 |     {:ok, result}
145 |   end
146 | 
147 |   @doc """
148 |   Return exact prediction for the feature
149 | 
150 |   ## Parameters
151 | 
152 |     - classifier: %LearnKit.NaiveBayes.Gaussian{}
153 | 
154 |   ## Examples
155 | 
156 |       iex> classifier |> LearnKit.NaiveBayes.Gaussian.predict([1, 2])
157 |       {:ok, {:a1, 0.334545454}}
158 | 
159 |   """
160 |   @spec predict(%Gaussian{fit_data: fit_data}, feature) :: {:ok, prediction}
161 | 
162 |   def predict(%Gaussian{fit_data: fit_data}, feature) do
163 |     result = fit_data |> classify_data(feature) |> Enum.sort_by(&(elem(&1, 1))) |> Enum.at(-1)
164 |     {:ok, result}
165 |   end
166 | 
167 |   @doc """
168 |   Returns the mean accuracy on the given test data and labels
169 | 
170 |   ## Parameters
171 | 
172 |     - classifier: %LearnKit.NaiveBayes.Gaussian{}
173 | 
174 |   ## Examples
175 | 
176 |       iex> classifier |> LearnKit.NaiveBayes.Gaussian.score
177 |       {:ok, 0.857143}
178 | 
179 |   """
180 |   @spec score(%Gaussian{data_set: data_set, fit_data: fit_data}) :: {:ok, number}
181 | 
182 |   def score(%Gaussian{data_set: data_set, fit_data: fit_data}) do
183 |     result = calc_score(fit_data, data_set)
184 |     {:ok, result}
185 |   end
186 | end
187 | 


--------------------------------------------------------------------------------