├── test ├── test_helper.exs ├── learn_kit_test.exs └── learn_kit │ ├── math_test.exs │ ├── regression │ ├── linear_test.exs │ └── polynomial_test.exs │ ├── preprocessing_test.exs │ ├── naive_bayes │ └── gaussian_test.exs │ └── knn_test.exs ├── .DS_Store ├── lib ├── learn_kit.ex └── learn_kit │ ├── naive_bayes │ ├── gaussian │ │ ├── fit.ex │ │ ├── score.ex │ │ ├── normalize.ex │ │ └── classify.ex │ └── gaussian.ex │ ├── preprocessing │ └── normalize.ex │ ├── regression │ ├── score.ex │ ├── polynomial │ │ └── calculations.ex │ ├── linear │ │ └── calculations.ex │ ├── linear.ex │ └── polynomial.ex │ ├── knn.ex │ ├── preprocessing.ex │ ├── knn │ └── classify.ex │ └── math.ex ├── .formatter.exs ├── .gitignore ├── mix.exs ├── mix.lock ├── CHANGELOG.md ├── config └── config.exs └── README.md /test/test_helper.exs: -------------------------------------------------------------------------------- 1 | ExUnit.start() 2 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kortirso/elixir_learn_kit/HEAD/.DS_Store -------------------------------------------------------------------------------- /test/learn_kit_test.exs: -------------------------------------------------------------------------------- 1 | defmodule LearnKitTest do 2 | use ExUnit.Case 3 | end 4 | -------------------------------------------------------------------------------- /lib/learn_kit.ex: -------------------------------------------------------------------------------- 1 | defmodule LearnKit do 2 | @moduledoc """ 3 | Documentation for LearnKit. 4 | """ 5 | end 6 | -------------------------------------------------------------------------------- /.formatter.exs: -------------------------------------------------------------------------------- 1 | # Used by "mix format" 2 | [ 3 | inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"] 4 | ] 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # The directory Mix will write compiled artifacts to. 2 | /_build/ 3 | 4 | # If you run "mix test --cover", coverage assets end up here. 5 | /cover/ 6 | 7 | # The directory Mix downloads your dependencies sources to. 8 | /deps/ 9 | 10 | # Where 3rd-party dependencies like ExDoc output generated docs. 11 | /doc/ 12 | 13 | # Ignore .fetch files in case you like to edit your project deps locally. 14 | /.fetch 15 | 16 | # If the VM crashes, it generates a dump, let's ignore it too. 17 | erl_crash.dump 18 | 19 | # Also ignore archive artifacts (built via "mix archive.build"). 20 | *.ez 21 | 22 | # Ignore package tarball (built via "mix hex.build"). 23 | learn_kit-*.tar 24 | 25 | -------------------------------------------------------------------------------- /mix.exs: -------------------------------------------------------------------------------- 1 | defmodule LearnKit.MixProject do 2 | use Mix.Project 3 | 4 | @description """ 5 | Elixir package for machine learning 6 | """ 7 | 8 | def project do 9 | [ 10 | app: :learn_kit, 11 | version: "0.1.6", 12 | elixir: "~> 1.7", 13 | name: "LearnKit", 14 | description: @description, 15 | source_url: "https://github.com/kortirso/elixir_learn_kit", 16 | start_permanent: Mix.env() == :prod, 17 | deps: deps(), 18 | package: package() 19 | ] 20 | end 21 | 22 | def application do 23 | [ 24 | extra_applications: [:logger] 25 | ] 26 | end 27 | 28 | defp deps do 29 | [ 30 | {:ex_doc, "~> 0.19", only: :dev}, 31 | {:matrix, "~> 0.3.2"} 32 | ] 33 | end 34 | 35 | defp package do 36 | [ 37 | maintainers: ["Anton Bogdanov"], 38 | licenses: ["MIT"], 39 | links: %{"GitHub" => "https://github.com/kortirso/elixir_learn_kit"} 40 | ] 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /lib/learn_kit/naive_bayes/gaussian/fit.ex: -------------------------------------------------------------------------------- 1 | defmodule LearnKit.NaiveBayes.Gaussian.Fit do 2 | @moduledoc """ 3 | Module for fit functions 4 | """ 5 | 6 | alias LearnKit.Math 7 | 8 | defmacro __using__(_opts) do 9 | quote do 10 | defp fit_data(data_set) do 11 | Enum.map(data_set, fn {key, value} -> 12 | {key, calc_features(value)} 13 | end) 14 | end 15 | 16 | defp calc_features(features) do 17 | features 18 | |> Math.transpose() 19 | |> calc_combination() 20 | end 21 | 22 | defp calc_combination(combinations) do 23 | Enum.map(combinations, fn combination -> 24 | mean = Math.mean(combination) 25 | variance = Math.variance(combination, mean) 26 | standard_deviation = Math.standard_deviation_from_variance(variance) 27 | %{mean: mean, variance: variance, standard_deviation: standard_deviation} 28 | end) 29 | end 30 | end 31 | end 32 | end 33 | -------------------------------------------------------------------------------- /lib/learn_kit/naive_bayes/gaussian/score.ex: -------------------------------------------------------------------------------- 1 | defmodule LearnKit.NaiveBayes.Gaussian.Score do 2 | @moduledoc """ 3 | Module for calculating accuracy of prediction 4 | """ 5 | 6 | alias LearnKit.NaiveBayes.Gaussian 7 | alias LearnKit.Math 8 | 9 | defmacro __using__(_opts) do 10 | quote do 11 | defp calc_score(fit_data, data_set) do 12 | data_set 13 | |> Enum.map(fn {label, features} -> 14 | check_features(features, fit_data, label) 15 | end) 16 | |> List.flatten() 17 | |> Math.mean() 18 | |> Float.ceil(6) 19 | end 20 | 21 | defp check_features(features, fit_data, label) do 22 | Enum.map(features, fn feature -> 23 | check_feature(feature, fit_data, label) 24 | end) 25 | end 26 | 27 | defp check_feature(feature, fit_data, label) do 28 | {:ok, {predicted_label, _}} = Gaussian.predict(%Gaussian{fit_data: fit_data}, feature) 29 | if predicted_label == label, do: 1, else: 0 30 | end 31 | end 32 | end 33 | end 34 | -------------------------------------------------------------------------------- /mix.lock: -------------------------------------------------------------------------------- 1 | %{ 2 | "earmark": {:hex, :earmark, "1.2.6", "b6da42b3831458d3ecc57314dff3051b080b9b2be88c2e5aa41cd642a5b044ed", [:mix], []}, 3 | "ex_doc": {:hex, :ex_doc, "0.19.1", "519bb9c19526ca51d326c060cb1778d4a9056b190086a8c6c115828eaccea6cf", [:mix], [{:earmark, "~> 1.1", [hex: :earmark, optional: false]}, {:makeup_elixir, "~> 0.7", [hex: :makeup_elixir, optional: false]}]}, 4 | "exprintf": {:hex, :exprintf, "0.2.1", "b7e895dfb00520cfb7fc1671303b63b37dc3897c59be7cbf1ae62f766a8a0314", [:mix], []}, 5 | "makeup": {:hex, :makeup, "0.5.5", "9e08dfc45280c5684d771ad58159f718a7b5788596099bdfb0284597d368a882", [:mix], [{:nimble_parsec, "~> 0.4", [hex: :nimble_parsec, optional: false]}]}, 6 | "makeup_elixir": {:hex, :makeup_elixir, "0.10.0", "0f09c2ddf352887a956d84f8f7e702111122ca32fbbc84c2f0569b8b65cbf7fa", [:mix], [{:makeup, "~> 0.5.5", [hex: :makeup, optional: false]}]}, 7 | "matrix": {:hex, :matrix, "0.3.2", "9c826bc3a1117bf5e1c5cdcf3a3d95456c93bc2e127a04e363e9fc90b724f784", [:mix], [{:exprintf, "~> 0.1", [hex: :exprintf, optional: false]}]}, 8 | "nimble_parsec": {:hex, :nimble_parsec, "0.4.0", "ee261bb53214943679422be70f1658fff573c5d0b0a1ecd0f18738944f818efe", [:mix], []}, 9 | } 10 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | All notable changes to this project will be documented in this file. 3 | 4 | The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) 5 | and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). 6 | 7 | ## [0.1.6] - 2019-01-08 8 | ### Added 9 | - Polynomial Regression predictor 10 | 11 | ### Modified 12 | - code refactoring 13 | 14 | ## [0.1.5] - 2018-12-18 15 | ### Added 16 | - preprocessing normalization for data set 17 | - normalize train data for Gaussian NB 18 | 19 | ### Modified 20 | - normalization can be selected for KNN 21 | 22 | ## [0.1.4] - 2018-12-17 23 | ### Modified 24 | - errors conditions and tests for KNN 25 | - distance calculation between points for KNN 26 | 27 | ## [0.1.3] - 2018-11-22 28 | ### Modified 29 | - Linear Regression, fit with gradient descent 30 | 31 | ## [0.1.2] - 2018-11-22 32 | ### Added 33 | - CHANGELOG.md file 34 | - Add simple Linear Regression predictor 35 | 36 | ### Modified 37 | - Readme 38 | - Tests 39 | 40 | ## [0.1.1] - 2018-11-19 41 | ### Added 42 | - Gaussian Naive Bayes algorithm 43 | - Math module with useful operations 44 | 45 | ## [0.1.0] - 2018-11-15 46 | ### Added 47 | - K-Nearest Neighbours algorithm 48 | -------------------------------------------------------------------------------- /config/config.exs: -------------------------------------------------------------------------------- 1 | # This file is responsible for configuring your application 2 | # and its dependencies with the aid of the Mix.Config module. 3 | use Mix.Config 4 | 5 | # This configuration is loaded before any dependency and is restricted 6 | # to this project. If another project depends on this project, this 7 | # file won't be loaded nor affect the parent project. For this reason, 8 | # if you want to provide default values for your application for 9 | # 3rd-party users, it should be done in your "mix.exs" file. 10 | 11 | # You can configure your application as: 12 | # 13 | # config :learn_kit, key: :value 14 | # 15 | # and access this configuration in your application as: 16 | # 17 | # Application.get_env(:learn_kit, :key) 18 | # 19 | # You can also configure a 3rd-party app: 20 | # 21 | # config :logger, level: :info 22 | # 23 | 24 | # It is also possible to import configuration files, relative to this 25 | # directory. For example, you can emulate configuration per environment 26 | # by uncommenting the line below and defining dev.exs, test.exs and such. 27 | # Configuration from the imported file will override the ones defined 28 | # here (which is why it is important to import them last). 29 | # 30 | # import_config "#{Mix.env()}.exs" 31 | -------------------------------------------------------------------------------- /lib/learn_kit/naive_bayes/gaussian/normalize.ex: -------------------------------------------------------------------------------- 1 | defmodule LearnKit.NaiveBayes.Gaussian.Normalize do 2 | @moduledoc """ 3 | Module for fit functions 4 | """ 5 | 6 | alias LearnKit.Preprocessing 7 | 8 | defmacro __using__(_opts) do 9 | quote do 10 | defp normalize_data(data_set, type) do 11 | case type do 12 | t when t in ["minimax", "z_normalization"] -> normalize(data_set, type) 13 | _ -> data_set 14 | end 15 | end 16 | 17 | # normalize each feature 18 | defp normalize(data_set, type) do 19 | coefficients = find_coefficients_for_normalization(data_set, type) 20 | Enum.map(data_set, fn {key, features} -> 21 | { 22 | key, 23 | Enum.map(features, fn feature -> Preprocessing.normalize_feature(feature, coefficients, type) end) 24 | } 25 | end) 26 | end 27 | 28 | # find coefficients for normalization 29 | defp find_coefficients_for_normalization(data_set, type) do 30 | Enum.reduce(data_set, [], fn {_, features}, acc -> 31 | Enum.reduce(features, acc, fn feature, acc -> [feature | acc] end) 32 | end) 33 | |> Preprocessing.coefficients(type) 34 | end 35 | end 36 | end 37 | end 38 | -------------------------------------------------------------------------------- /test/learn_kit/math_test.exs: -------------------------------------------------------------------------------- 1 | defmodule LearnKit.MathTest do 2 | use ExUnit.Case 3 | alias LearnKit.Math 4 | 5 | test "calculate sum" do 6 | assert 3 = Math.summ(1, 2) 7 | end 8 | 9 | test "calculate mean" do 10 | assert 2.0 = Math.mean([1, 2, 3]) 11 | end 12 | 13 | test "calculate variance" do 14 | assert 1.25 = Math.variance([1, 2, 3, 4]) 15 | end 16 | 17 | test "calculate variance, with calculated mean" do 18 | assert 1.25 = Math.variance([1, 2, 3, 4], 2.5) 19 | end 20 | 21 | test "calculate standard deviation" do 22 | assert 0.5 = Math.standard_deviation([1, 2]) 23 | end 24 | 25 | test "calculate standard deviation from variance" do 26 | assert 1.118033988749895 = Math.standard_deviation_from_variance(1.25) 27 | end 28 | 29 | test "calculate division" do 30 | assert 5.0 = Math.division(10, 2) 31 | end 32 | 33 | test "calculate covariance" do 34 | assert 5.5 = Math.covariance([1, 2, 3], [14, 17, 25]) 35 | end 36 | 37 | test "calculate correlation" do 38 | assert 0.9672471299049061 = Math.correlation([1, 2, 3], [14, 17, 25]) 39 | end 40 | 41 | test "transposing a matrix" do 42 | assert [[1, 3, 5], [2, 4, 6]] = Math.transpose([[1, 2], [3, 4], [5, 6]]) 43 | end 44 | 45 | test "scalar multiplication with matrix" do 46 | assert [50, 60] = Math.scalar_multiply(10, [5, 6]) 47 | end 48 | 49 | test "vector subtraction" do 50 | assert [5, 45, 20] = Math.vector_subtraction([40, 50, 60], [35, 5, 40]) 51 | end 52 | end 53 | -------------------------------------------------------------------------------- /lib/learn_kit/preprocessing/normalize.ex: -------------------------------------------------------------------------------- 1 | defmodule LearnKit.Preprocessing.Normalize do 2 | @moduledoc """ 3 | Module for data normalization 4 | """ 5 | 6 | alias LearnKit.Math 7 | 8 | defmacro __using__(_opts) do 9 | quote do 10 | defp normalization(features, type) do 11 | list_of_params = coefficients(features, type) 12 | features 13 | |> Math.transpose() 14 | |> Enum.with_index() 15 | |> Enum.map(fn {feature, index} -> transform_feature(feature, Enum.at(list_of_params, index), type) end) 16 | |> Math.transpose() 17 | end 18 | 19 | defp return_params(list, "minimax") do 20 | { 21 | Enum.min(list), 22 | Enum.max(list) 23 | } 24 | end 25 | 26 | defp return_params(list, "z_normalization") do 27 | { 28 | Math.mean(list), 29 | Math.standard_deviation(list) 30 | } 31 | end 32 | 33 | defp transform_feature(feature, params_for_point, type) do 34 | divider = define_divider(params_for_point, type) 35 | case divider do 36 | 0 -> feature 37 | _ -> Enum.map(feature, fn point -> (point - elem(params_for_point, 0)) / divider end) 38 | end 39 | end 40 | 41 | defp define_divider(params_for_point, "minimax") do 42 | elem(params_for_point, 1) - elem(params_for_point, 0) 43 | end 44 | 45 | defp define_divider(params_for_point, "z_normalization") do 46 | elem(params_for_point, 1) 47 | end 48 | end 49 | end 50 | end 51 | -------------------------------------------------------------------------------- /test/learn_kit/regression/linear_test.exs: -------------------------------------------------------------------------------- 1 | defmodule LearnKit.Regression.LinearTest do 2 | use ExUnit.Case 3 | alias LearnKit.Regression.Linear 4 | 5 | setup_all do 6 | {:ok, predictor: Linear.new([1, 2, 3, 4], [3, 6, 10, 15])} 7 | end 8 | 9 | test "create new linear predictor with empty data set" do 10 | assert %Linear{factors: factors, results: results, coefficients: coefficients} = Linear.new 11 | 12 | assert factors == [] 13 | assert results == [] 14 | assert coefficients == [] 15 | end 16 | 17 | test "create new linear predictor with data", state do 18 | assert %Linear{factors: factors, results: results, coefficients: coefficients} = state[:predictor] 19 | 20 | assert factors == [1, 2, 3, 4] 21 | assert results == [3, 6, 10, 15] 22 | assert coefficients == [] 23 | end 24 | 25 | test "fit data set", state do 26 | %Linear{coefficients: coefficients} = state[:predictor] |> Linear.fit 27 | 28 | assert coefficients == [-1.5, 4.0] 29 | end 30 | 31 | test "fit data set with gradient descent", state do 32 | %Linear{coefficients: coefficients} = state[:predictor] |> Linear.fit([method: "gradient descent"]) 33 | 34 | assert [-1.5, 4.0] = coefficients |> Enum.map(fn x -> Float.round(x, 2) end) 35 | end 36 | 37 | test "return prediction using the linear model", state do 38 | predictor = state[:predictor] |> Linear.fit 39 | 40 | assert {:ok, result} = predictor |> Linear.predict([4, 8, 13]) 41 | assert result == [14.5, 30.5, 50.5] 42 | end 43 | 44 | test "returns coefficient of determination R^2 of the prediction", state do 45 | predictor = state[:predictor] |> Linear.fit 46 | 47 | assert {:ok, result} = predictor |> Linear.score 48 | assert result == 0.9876543209876543 49 | end 50 | end 51 | -------------------------------------------------------------------------------- /test/learn_kit/preprocessing_test.exs: -------------------------------------------------------------------------------- 1 | defmodule LearnKit.PreprocessingTest do 2 | use ExUnit.Case 3 | alias LearnKit.Preprocessing 4 | 5 | describe "for invalid data" do 6 | test "use preprocessor with invalid data" do 7 | assert_raise FunctionClauseError, fn -> 8 | Preprocessing.normalize("") 9 | end 10 | end 11 | 12 | test "use preprocessor with invalid options" do 13 | assert_raise FunctionClauseError, fn -> 14 | Preprocessing.normalize([[1, 2], [3, 4], [5, 6]], "") 15 | end 16 | end 17 | end 18 | 19 | describe "for valid data" do 20 | test "prepare coefficients for normalization, minimax" do 21 | result = Preprocessing.coefficients([[1, 2], [3, 4], [5, 6]], "minimax") 22 | 23 | assert result == [{1, 5}, {2, 6}] 24 | end 25 | 26 | test "prepare coefficients for normalization, z_normalization" do 27 | result = Preprocessing.coefficients([[1, 2], [3, 4], [5, 6]], "z_normalization") 28 | 29 | assert result == [{3.0, 1.632993161855452}, {4.0, 1.632993161855452}] 30 | end 31 | 32 | test "normalize 1 feature with predefined coefficients" do 33 | result = Preprocessing.normalize_feature([1, 2], [{1, 5}, {2, 6}], "minimax") 34 | 35 | assert result == [0.0, 0.0] 36 | end 37 | 38 | test "normalize data set with minimax normalization" do 39 | result = Preprocessing.normalize([[1, 2], [3, 4], [5, 6]]) 40 | 41 | assert result == [[0.0, 0.0], [0.5, 0.5], [1.0, 1.0]] 42 | end 43 | 44 | test "normalize data set with z normalization" do 45 | result = Preprocessing.normalize([[1, 2], [3, 4], [5, 6]], [type: "z_normalization"]) 46 | 47 | assert result == [[-1.224744871391589, -1.224744871391589], [0.0, 0.0], [1.224744871391589, 1.224744871391589]] 48 | end 49 | end 50 | end 51 | -------------------------------------------------------------------------------- /lib/learn_kit/naive_bayes/gaussian/classify.ex: -------------------------------------------------------------------------------- 1 | defmodule LearnKit.NaiveBayes.Gaussian.Classify do 2 | @moduledoc """ 3 | Module for prediction functions 4 | """ 5 | 6 | defmacro __using__(_opts) do 7 | quote do 8 | # classify data 9 | # returns data like [label1: 0.03592747361085857, label2: 0.00399309643713954] 10 | defp classify_data(fit_data, feature) do 11 | labels_count = fit_data |> Keyword.keys() |> length() 12 | fit_data 13 | |> Enum.map(fn {label, fit_results} -> 14 | {label, class_probability(labels_count, feature, fit_results)} 15 | end) 16 | end 17 | 18 | # compute the final naive Bayesian probability for a given set of features being a part of a given label 19 | defp class_probability(labels_count, feature, fit_results) do 20 | class_fraction = 1.0 / labels_count 21 | feature_bayes = feature_mult(feature, fit_results, 1.0, 0) 22 | feature_bayes * class_fraction 23 | |> Float.round(10) 24 | end 25 | 26 | # multiply together the feature probabilities for all of the features in a label for given values 27 | defp feature_mult([], _, acc, _), do: acc 28 | 29 | defp feature_mult([head | tail], fit_results, acc, index) do 30 | acc = acc * feature_probability(index, head, fit_results) 31 | feature_mult(tail, fit_results, acc, index + 1) 32 | end 33 | 34 | defp feature_probability(index, value, fit_results) do 35 | # select result from training 36 | fit_result = Enum.at(fit_results, index) 37 | # deal with the edge case of a 0 standard deviation 38 | if fit_result.standard_deviation == 0 do 39 | if fit_result.mean == value, do: 1.0, else: 0.0 40 | else 41 | # calculate the gaussian probability 42 | exp = - :math.pow(value - fit_result.mean, 2) / (2 * fit_result.variance) 43 | :math.exp(exp) / :math.sqrt(2 * :math.pi * fit_result.variance) 44 | end 45 | end 46 | end 47 | end 48 | end 49 | -------------------------------------------------------------------------------- /lib/learn_kit/regression/score.ex: -------------------------------------------------------------------------------- 1 | defmodule LearnKit.Regression.Score do 2 | @moduledoc """ 3 | Module for scoring regression models 4 | """ 5 | 6 | alias LearnKit.Math 7 | 8 | defmacro __using__(_opts) do 9 | quote do 10 | @doc """ 11 | Returns the coefficient of determination R^2 of the prediction 12 | 13 | ## Parameters 14 | 15 | - predictor: %LearnKit.Regression.Linear{} 16 | 17 | ## Examples 18 | 19 | iex> predictor |> LearnKit.Regression.Linear.score 20 | {:ok, 0.9876543209876543} 21 | 22 | """ 23 | @spec score(%LearnKit.Regression.Linear{factors: factors, results: results, coefficients: coefficients}) :: {:ok, number} 24 | 25 | def score(regression = %_{factors: _, results: _, coefficients: _}) do 26 | { 27 | :ok, 28 | calculate_score(regression) 29 | } 30 | end 31 | 32 | defp calculate_score(%_{coefficients: []}, _, _), do: raise("There was no fit for model") 33 | 34 | defp calculate_score(regression = %_{coefficients: _, factors: _, results: results}) do 35 | 1.0 - sum_of_squared_errors(regression) / total_sum_of_squares(results) 36 | end 37 | 38 | defp prediction_error(regression, x, y) do 39 | {:ok, prediction} = predict(regression, x) 40 | y - prediction 41 | end 42 | 43 | defp sum_of_squared_errors(regression = %_{coefficients: _, factors: factors, results: results}) do 44 | factors 45 | |> Enum.zip(results) 46 | |> Enum.reduce(0, fn {xi, yi}, acc -> 47 | acc + squared_prediction_error(regression, xi, yi) 48 | end) 49 | end 50 | 51 | defp total_sum_of_squares(list) do 52 | mean_list = Math.mean(list) 53 | Enum.reduce(list, 0, fn x, acc -> acc + :math.pow(x - mean_list, 2) end) 54 | end 55 | 56 | defp squared_prediction_error(regression = %_{coefficients: coefficients}, x, y) do 57 | regression 58 | |> prediction_error(x, y) 59 | |> :math.pow(2) 60 | end 61 | end 62 | end 63 | end 64 | -------------------------------------------------------------------------------- /test/learn_kit/regression/polynomial_test.exs: -------------------------------------------------------------------------------- 1 | defmodule LearnKit.Regression.PolynomialTest do 2 | use ExUnit.Case 3 | alias LearnKit.Regression.Polynomial 4 | 5 | setup_all do 6 | factors = [-3, -2, -1, -0.2, 1, 3] 7 | results = [0.9, 0.8, 0.4, 0.2, 0.1, 0] 8 | {:ok, predictor: Polynomial.new(factors, results), factors: factors, results: results} 9 | end 10 | 11 | test "create new polynomial predictor with empty data set" do 12 | assert Polynomial.new() == %Polynomial{} 13 | end 14 | 15 | test "create new polynomial predictor with data" do 16 | assert Polynomial.new([1, 2], [3, 4]) == %Polynomial{coefficients: [], degree: 2, factors: [1, 2], results: [3, 4]} 17 | end 18 | 19 | test "fit data set", state do 20 | %Polynomial{coefficients: coefficients, degree: 2, factors: factors, results: results } = state.predictor |> Polynomial.fit(degree: 2) 21 | 22 | assert coefficients == [0.2290655593570844, -0.16280041315555793, 0.027763965678671648] 23 | assert factors == state.factors 24 | assert results == state.results 25 | end 26 | 27 | test "fit data set with degree of 4", state do 28 | %Polynomial{coefficients: coefficients, degree: 4, factors: factors, results: results } = state.predictor |> Polynomial.fit(degree: 4) 29 | 30 | assert coefficients == [0.14805723970909512, -0.15811217698985996, 0.12329778502873823, 8.627221168971827e-4, -0.009963024223179073] 31 | assert factors == state.factors 32 | assert results == state.results 33 | end 34 | 35 | test "predict using the polynomial model of simple sample", state do 36 | {:ok, result} = state.predictor |> Polynomial.fit(degree: 2) |> Polynomial.predict(3) 37 | 38 | assert result == -0.009459989001544572 39 | end 40 | 41 | test "predict using the polynomial model of multiple samples", state do 42 | {:ok, result} = state.predictor |> Polynomial.fit(degree: 2) |> Polynomial.predict([3, 5]) 43 | 44 | assert result == [-0.009459989001544572, 0.10916263554608596] 45 | end 46 | 47 | test "returns coefficient of determination R^2 of the prediction", state do 48 | predictor = state.predictor |> Polynomial.fit() 49 | 50 | assert {:ok, result} = predictor |> Polynomial.score() 51 | assert result == 0.9614116660464942 52 | end 53 | end 54 | -------------------------------------------------------------------------------- /lib/learn_kit/regression/polynomial/calculations.ex: -------------------------------------------------------------------------------- 1 | defmodule LearnKit.Regression.Polynomial.Calculations do 2 | @moduledoc """ 3 | Module for fit functions 4 | """ 5 | 6 | defmacro __using__(_opts) do 7 | quote do 8 | defp do_predict(polynomial, samples) do 9 | Enum.map(samples, fn sample -> 10 | {:ok, prediction} = predict(polynomial, sample) 11 | prediction 12 | end) 13 | end 14 | 15 | defp matrix_line(1, factors, degree) do 16 | power_ofs = Enum.to_list(1..degree) 17 | [Enum.count(factors) | sum_of_x_i_with_k(power_ofs, factors)] 18 | end 19 | 20 | defp matrix_line(line, factors, degree) do 21 | line_factor = line - 1 22 | power_ofs = Enum.to_list(line_factor..(degree + line_factor)) 23 | sum_of_x_i_with_k(power_ofs, factors) 24 | end 25 | 26 | defp matrix(factors, degree) do 27 | lines = Enum.to_list(1..(degree + 1)) 28 | Enum.map(lines, fn line -> 29 | matrix_line(line, factors, degree) 30 | end) 31 | end 32 | 33 | def sum_of_x_i_with_k(ks, factors) do 34 | Enum.map(ks, fn factor -> 35 | sum_x_with_k(factors, factor, 0.0) 36 | end) 37 | end 38 | 39 | defp substitute_coefficients([], _, _, sum), do: sum 40 | 41 | defp substitute_coefficients([coefficient | tail], x, k, sum) do 42 | sum = sum + :math.pow(x, k) * coefficient 43 | substitute_coefficients(tail, x, k - 1, sum) 44 | end 45 | 46 | defp sum_x_with_k([x | tail], k, sum) do 47 | sum = sum + :math.pow(x, k) 48 | sum_x_with_k(tail, k, sum) 49 | end 50 | 51 | defp sum_x_with_k([], _, sum), do: sum 52 | 53 | defp sum_x_y_with_k([], [], _degree, sum), do: [sum] 54 | 55 | defp sum_x_y_with_k([x | xtail], [y | ytail], degree, sum) do 56 | exponent = degree - 1 57 | sum = sum + :math.pow(x, exponent) * y 58 | sum_x_y_with_k(xtail, ytail, degree, sum) 59 | end 60 | 61 | def x_y_matrix(_, _, 0, matrix), do: matrix |> Enum.reverse() 62 | 63 | def x_y_matrix(xs, ys, degree, matrix) do 64 | matrix = matrix ++ [sum_x_y_with_k(xs, ys, degree, 0.0)] 65 | x_y_matrix(xs, ys, degree - 1, matrix) 66 | end 67 | end 68 | end 69 | end 70 | -------------------------------------------------------------------------------- /test/learn_kit/naive_bayes/gaussian_test.exs: -------------------------------------------------------------------------------- 1 | defmodule LearnKit.NaiveBayes.GaussianTest do 2 | use ExUnit.Case 3 | 4 | alias LearnKit.NaiveBayes.Gaussian 5 | 6 | setup_all do 7 | {:ok, classifier: Gaussian.new([{:label1, [[-1, -1], [-2, -1], [-3, -2]]}, {:label2, [[1, 1], [2, 1], [3, 2], [-2, -2]]}])} 8 | end 9 | 10 | test "create new knn classifier with empty data set" do 11 | assert %Gaussian{data_set: data_set} = Gaussian.new 12 | 13 | assert data_set == [] 14 | end 15 | 16 | test "add train data to classifier" do 17 | %Gaussian{data_set: data_set} = 18 | Gaussian.new 19 | |> Gaussian.add_train_data({:a1, [1, 2]}) 20 | |> Gaussian.add_train_data({:a1, [1, 3]}) 21 | |> Gaussian.add_train_data({:b1, [2, 3]}) 22 | 23 | assert data_set == [b1: [[2, 3]], a1: [[1, 3], [1, 2]]] 24 | end 25 | 26 | test "normalize data set", state do 27 | %Gaussian{data_set: data_set} = state[:classifier] |> Gaussian.normalize_train_data("minimax") 28 | 29 | assert data_set == 30 | [ 31 | label1: [[0.3333333333333333, 0.25], [0.16666666666666666, 0.25], [0.0, 0.0]], 32 | label2: [ [0.6666666666666666, 0.75], [0.8333333333333334, 0.75], [1.0, 1.0], [0.16666666666666666, 0.0]] 33 | ] 34 | end 35 | 36 | test "fit data set", state do 37 | %Gaussian{fit_data: fit_data} = state[:classifier] |> Gaussian.fit 38 | 39 | assert fit_data == 40 | [ 41 | label1: [ 42 | %{mean: -2.0, standard_deviation: 0.816496580927726, variance: 0.6666666666666666}, 43 | %{mean: -1.3333333333333333, standard_deviation: 0.4714045207910317, variance: 0.2222222222222222} 44 | ], 45 | label2: [ 46 | %{mean: 1.0, standard_deviation: 1.8708286933869707, variance: 3.5}, 47 | %{mean: 0.5, standard_deviation: 1.5, variance: 2.25} 48 | ] 49 | ] 50 | end 51 | 52 | test "return probability estimates for the feature", state do 53 | classifier = state[:classifier] |> Gaussian.fit 54 | 55 | assert {:ok, result} = classifier |> Gaussian.predict_proba([1, 2]) 56 | assert result == [label1: 0.0, label2: 0.017199571] 57 | end 58 | 59 | test "return exact prediction for the feature", state do 60 | classifier = state[:classifier] |> Gaussian.fit 61 | 62 | assert {:ok, result} = classifier |> Gaussian.predict([1, 2]) 63 | assert result == {:label2, 0.017199571} 64 | end 65 | 66 | test "returns the mean accuracy on the given test data and labels", state do 67 | classifier = state[:classifier] |> Gaussian.fit 68 | 69 | assert {:ok, result} = classifier |> Gaussian.score 70 | assert result == 0.857143 71 | end 72 | end 73 | -------------------------------------------------------------------------------- /test/learn_kit/knn_test.exs: -------------------------------------------------------------------------------- 1 | defmodule LearnKit.KnnTest do 2 | use ExUnit.Case 3 | alias LearnKit.Knn 4 | 5 | setup_all do 6 | {:ok, classifier: Knn.new([{:a1, [[-1, -1], [-2, -1], [-3, -2]]}, {:b1, [[1, 1], [2, 1], [3, 2], [-2, -2]]}])} 7 | end 8 | 9 | describe "for invalid data" do 10 | test "create new classifier with invalid data" do 11 | assert_raise FunctionClauseError, fn -> 12 | Knn.new("") 13 | end 14 | end 15 | 16 | test "add train data in invalid format", state do 17 | assert_raise FunctionClauseError, fn -> 18 | Knn.add_train_data(state[:classifier], {:something_valid, "invalid"}) 19 | end 20 | end 21 | 22 | test "classify without options", state do 23 | assert_raise FunctionClauseError, fn -> 24 | Knn.classify(state[:classifier], "") 25 | end 26 | end 27 | 28 | test "classify with empty options", state do 29 | assert {:error, "Feature option is required"} = Knn.classify(state[:classifier], []) 30 | end 31 | 32 | test "classify with invalid feature", state do 33 | assert {:error, "Feature option must be presented as array"} = Knn.classify(state[:classifier], [feature: "1"]) 34 | end 35 | 36 | test "classify with invalid k", state do 37 | assert {:error, "K option must be positive integer"} = Knn.classify(state[:classifier], [feature: [-1, -2], k: -2]) 38 | end 39 | end 40 | 41 | describe "for valid data" do 42 | test "create new knn classifier with empty data set" do 43 | assert %Knn{data_set: data_set} = Knn.new 44 | 45 | assert data_set == [] 46 | end 47 | 48 | test "add train data to classifier" do 49 | %Knn{data_set: data_set} = 50 | Knn.new 51 | |> Knn.add_train_data({:a1, [1, 2]}) 52 | |> Knn.add_train_data({:a1, [1, 3]}) 53 | |> Knn.add_train_data({:b1, [2, 3]}) 54 | 55 | assert data_set == [b1: [[2, 3]], a1: [[1, 3], [1, 2]]] 56 | end 57 | 58 | test "classify new feature", state do 59 | assert {:ok, :a1} = Knn.classify(state[:classifier], [feature: [-1, -2], k: 3, weight: "distance"]) 60 | end 61 | 62 | test "classify new feature, for existed point", state do 63 | assert {:ok, :b1} = Knn.classify(state[:classifier], [feature: [-2, -2], k: 3, weight: "uniform"]) 64 | end 65 | 66 | test "classify new feature, minimax normalization", state do 67 | assert {:ok, :a1} = Knn.classify(state[:classifier], [feature: [-1, -2], k: 3, weight: "distance", normalization: "minimax"]) 68 | end 69 | 70 | test "classify new feature, z normalization", state do 71 | assert {:ok, :a1} = Knn.classify(state[:classifier], [feature: [-1, -2], k: 3, weight: "distance", normalization: "z_normalization"]) 72 | end 73 | end 74 | end 75 | -------------------------------------------------------------------------------- /lib/learn_kit/regression/linear/calculations.ex: -------------------------------------------------------------------------------- 1 | defmodule LearnKit.Regression.Linear.Calculations do 2 | @moduledoc """ 3 | Module for fit functions 4 | """ 5 | 6 | alias LearnKit.{Math, Regression.Linear} 7 | 8 | defmacro __using__(_opts) do 9 | quote do 10 | defp do_fit("gradient descent", %Linear{factors: factors, results: results}) do 11 | gradient_descent_iteration( 12 | [:rand.uniform(), :rand.uniform()], 13 | 0.0001, 14 | nil, 15 | 1_000_000, 16 | Enum.zip(factors, results), 17 | 0 18 | ) 19 | end 20 | 21 | defp do_fit(_, %Linear{factors: factors, results: results}) do 22 | beta = calc_beta(factors, results) 23 | alpha = Math.mean(results) - beta * Math.mean(factors) 24 | [alpha, beta] 25 | end 26 | 27 | defp do_predict(linear, samples) do 28 | Enum.map(samples, fn sample -> 29 | {:ok, prediction} = predict(linear, sample) 30 | prediction 31 | end) 32 | end 33 | 34 | defp calc_beta(factors, results) do 35 | Math.correlation(factors, results) * Math.standard_deviation(results) / Math.standard_deviation(factors) 36 | end 37 | 38 | defp squared_error_gradient(linear, x, y) do 39 | error_variable = prediction_error(linear, x, y) 40 | [ 41 | -2 * error_variable, 42 | -2 * error_variable * x 43 | ] 44 | end 45 | 46 | defp gradient_descent_iteration(_, _, min_theta, _, _, no_improve_step) when no_improve_step >= 100, do: min_theta 47 | 48 | defp gradient_descent_iteration(theta, alpha, min_theta, min_value, data, no_improve_step) do 49 | [ 50 | min_theta, 51 | min_value, 52 | no_improve_step, 53 | alpha 54 | ] = check_value(data, min_value, theta, min_theta, no_improve_step, alpha) 55 | 56 | calc_new_theta(data, theta, alpha) 57 | |> gradient_descent_iteration(alpha, min_theta, min_value, data, no_improve_step) 58 | end 59 | 60 | defp calc_new_theta(data, theta, alpha) do 61 | data 62 | |> Enum.shuffle() 63 | |> Enum.reduce(theta, fn {xi, yi}, acc -> 64 | gradient_i = squared_error_gradient(%Linear{coefficients: theta}, xi, yi) 65 | acc |> Math.vector_subtraction(alpha |> Math.scalar_multiply(gradient_i)) 66 | end) 67 | end 68 | 69 | defp check_value(data, min_value, theta, min_theta, no_improve_step, alpha) do 70 | value = calc_new_value(data, theta) 71 | cond do 72 | value < min_value -> [theta, value, 0, 0.0001] 73 | true -> [min_theta, min_value, no_improve_step + 1, alpha * 0.9] 74 | end 75 | end 76 | 77 | defp calc_new_value(data, theta) do 78 | Enum.reduce(data, 0, fn {xi, yi}, acc -> 79 | acc + squared_prediction_error(%Linear{coefficients: theta}, xi, yi) 80 | end) 81 | end 82 | end 83 | end 84 | end 85 | -------------------------------------------------------------------------------- /lib/learn_kit/knn.ex: -------------------------------------------------------------------------------- 1 | defmodule LearnKit.Knn do 2 | @moduledoc """ 3 | Module for k-nearest neighbours (knn) algorithm 4 | """ 5 | 6 | defstruct data_set: [] 7 | 8 | alias LearnKit.Knn 9 | use Knn.Classify 10 | 11 | @type label :: atom 12 | @type feature :: [integer] 13 | @type point :: {label, feature} 14 | @type features :: [feature] 15 | @type data_set :: [{label, features}] 16 | 17 | @doc """ 18 | Creates classifier with empty data_set 19 | 20 | ## Examples 21 | 22 | iex> classifier = LearnKit.Knn.new 23 | %LearnKit.Knn{data_set: []} 24 | 25 | """ 26 | @spec new() :: %Knn{data_set: []} 27 | 28 | def new, do: Knn.new([]) 29 | 30 | @doc """ 31 | Creates classifier with data_set 32 | 33 | ## Parameters 34 | 35 | - data_set: Keyword list with labels and features in tuples 36 | 37 | ## Examples 38 | 39 | iex> classifier = LearnKit.Knn.new([{:a1, [[1, 2], [2, 3]]}, {:b1, [[-1, -2]]}]) 40 | %LearnKit.Knn{data_set: [a1: [[1, 2], [2, 3]], b1: [[-1, -2]]]} 41 | 42 | """ 43 | @spec new(data_set) :: %Knn{data_set: data_set} 44 | 45 | def new(data_set) when is_list(data_set), do: %Knn{data_set: data_set} 46 | 47 | @doc """ 48 | Add train data to classifier 49 | 50 | ## Parameters 51 | 52 | - classifier: %LearnKit.Knn{} 53 | - train data: tuple with label and feature 54 | 55 | ## Examples 56 | 57 | iex> classifier = classifier |> LearnKit.Knn.add_train_data({:a1, [-1, -1]}) 58 | %LearnKit.Knn{data_set: [a1: [[-1, -1]]]} 59 | 60 | """ 61 | @spec add_train_data(%Knn{data_set: data_set}, point) :: %Knn{data_set: data_set} 62 | 63 | def add_train_data(%Knn{data_set: data_set}, {key, value}) when is_atom(key) and is_list(value) do 64 | features = if Keyword.has_key?(data_set, key), do: data_set[key], else: [] 65 | data_set = Keyword.put(data_set, key, [value | features]) 66 | %Knn{data_set: data_set} 67 | end 68 | 69 | @doc """ 70 | Classify label of the new feature 71 | 72 | ## Parameters 73 | 74 | - classifier: %LearnKit.Knn{} 75 | - options: keyword list with options 76 | 77 | ## Options 78 | 79 | - feature: feature for classification, required, example: [1, 2, 3] 80 | - k: number of nearest neighbours, default is 3, optional 81 | - algorithm: brute, optional 82 | - weight: uniform/distance, default is uniform, optional 83 | - normalization: none/minimax/z_normalization, default is none, optional 84 | 85 | ## Examples 86 | 87 | iex> classifier |> LearnKit.Knn.classify([feature: [-1, -2], k: 3, weight: "distance"]) 88 | {:ok, :a1} 89 | 90 | """ 91 | @spec classify(%Knn{data_set: data_set}, [tuple]) :: {:ok, label} 92 | 93 | def classify(%Knn{data_set: data_set}, options) when is_list(options) do 94 | cond do 95 | !Keyword.has_key?(options, :feature) -> 96 | {:error, "Feature option is required"} 97 | 98 | !is_list(options[:feature]) -> 99 | {:error, "Feature option must be presented as array"} 100 | 101 | Keyword.has_key?(options, :k) && (!is_integer(options[:k]) || options[:k] <= 0) -> 102 | {:error, "K option must be positive integer"} 103 | 104 | true -> 105 | options = Keyword.merge([k: 3, algorithm: "brute", weight: "uniform", normalization: "none"], options) 106 | {label, _} = prediction(data_set, options) 107 | {:ok, label} 108 | end 109 | end 110 | end 111 | -------------------------------------------------------------------------------- /lib/learn_kit/preprocessing.ex: -------------------------------------------------------------------------------- 1 | defmodule LearnKit.Preprocessing do 2 | @moduledoc """ 3 | Module for data preprocessing 4 | """ 5 | 6 | alias LearnKit.{Preprocessing, Math} 7 | use Preprocessing.Normalize 8 | 9 | @type row :: [number] 10 | @type matrix :: [row] 11 | 12 | @doc """ 13 | Normalize data set with minimax normalization 14 | 15 | ## Parameters 16 | 17 | - features: list of features for normalization 18 | 19 | ## Examples 20 | 21 | iex> LearnKit.Preprocessing.normalize([[1, 2], [3, 4], [5, 6]]) 22 | [ 23 | [0.0, 0.0], 24 | [0.5, 0.5], 25 | [1.0, 1.0] 26 | ] 27 | 28 | """ 29 | @spec normalize(matrix) :: matrix 30 | 31 | def normalize(features) when is_list(features), do: normalize(features, [type: "minimax"]) 32 | 33 | @doc """ 34 | Normalize data set 35 | 36 | ## Parameters 37 | 38 | - features: list of features for normalization 39 | - options: keyword list with options 40 | 41 | ## Options 42 | 43 | - type: minimax/z_normalization, default is minimax, optional 44 | 45 | ## Examples 46 | 47 | iex> LearnKit.Preprocessing.normalize([[1, 2], [3, 4], [5, 6]], [type: "z_normalization"]) 48 | [ 49 | [-1.224744871391589, -1.224744871391589], 50 | [0.0, 0.0], 51 | [1.224744871391589, 1.224744871391589] 52 | ] 53 | 54 | """ 55 | @spec normalize(matrix, list) :: matrix 56 | 57 | def normalize(features, options) when is_list(features) and is_list(options) do 58 | options = Keyword.merge([type: "minimax"], options) 59 | case options[:type] do 60 | "z_normalization" -> normalization(features, "z_normalization") 61 | _ -> normalization(features, "minimax") 62 | end 63 | end 64 | 65 | @doc """ 66 | Prepare coefficients for normalization 67 | 68 | ## Parameters 69 | 70 | - features: features grouped by index 71 | - type: minimax/z_normalization 72 | 73 | ## Examples 74 | 75 | iex> LearnKit.Preprocessing.coefficients([[1, 2], [3, 4], [5, 6]], "minimax") 76 | [{1, 5}, {2, 6}] 77 | 78 | iex> LearnKit.Preprocessing.coefficients([[1, 2], [3, 4], [5, 6]], "z_normalization") 79 | [{3.0, 1.632993161855452}, {4.0, 1.632993161855452}] 80 | 81 | """ 82 | @spec coefficients(matrix, String.t()) :: matrix 83 | 84 | def coefficients(features, type) when is_list(features) and is_binary(type) do 85 | features 86 | |> Math.transpose() 87 | |> Enum.map(fn list -> return_params(list, type) end) 88 | end 89 | 90 | @doc """ 91 | Normalize 1 feature with predefined coefficients 92 | 93 | ## Parameters 94 | 95 | - feature: feature for normalization 96 | - coefficients: predefined coefficients 97 | - type: minimax/z_normalization 98 | 99 | ## Examples 100 | 101 | iex> LearnKit.Preprocessing.normalize_feature([1, 2], [{1, 5}, {2, 6}], "minimax") 102 | [0.0, 0.0] 103 | 104 | """ 105 | @spec normalize_feature(list, list(tuple), String.t()) :: list 106 | 107 | def normalize_feature(feature, coefficients, type) when is_list(feature) and is_list(coefficients) and is_binary(type) do 108 | Enum.zip(feature, coefficients) 109 | |> Enum.map(fn {point, params_for_point} -> 110 | divider = define_divider(params_for_point, type) 111 | case divider do 112 | 0 -> point 113 | _ -> (point - elem(params_for_point, 0)) / divider 114 | end 115 | end) 116 | end 117 | end 118 | -------------------------------------------------------------------------------- /lib/learn_kit/regression/linear.ex: -------------------------------------------------------------------------------- 1 | defmodule LearnKit.Regression.Linear do 2 | @moduledoc """ 3 | Module for Linear Regression algorithm 4 | """ 5 | 6 | defstruct factors: [], results: [], coefficients: [] 7 | 8 | alias LearnKit.Regression.Linear 9 | use Linear.Calculations 10 | use LearnKit.Regression.Score 11 | 12 | @type factors :: [number] 13 | @type results :: [number] 14 | @type coefficients :: [number] 15 | 16 | @doc """ 17 | Creates predictor with empty data_set 18 | 19 | ## Examples 20 | 21 | iex> predictor = LearnKit.Regression.Linear.new 22 | %LearnKit.Regression.Linear{factors: [], results: [], coefficients: []} 23 | 24 | """ 25 | @spec new() :: %Linear{factors: [], results: [], coefficients: []} 26 | 27 | def new, do: Linear.new([], []) 28 | 29 | @doc """ 30 | Creates predictor with data_set 31 | 32 | ## Parameters 33 | 34 | - factors: Array of predictor variables 35 | - results: Array of criterion variables 36 | 37 | ## Examples 38 | 39 | iex> predictor = LearnKit.Regression.Linear.new([1, 2, 3, 4], [3, 6, 10, 15]) 40 | %LearnKit.Regression.Linear{factors: [1, 2, 3, 4], results: [3, 6, 10, 15], coefficients: []} 41 | 42 | """ 43 | @spec new(factors, results) :: %Linear{factors: factors, results: results, coefficients: []} 44 | 45 | def new(factors, results) when is_list(factors) and is_list(results) do 46 | %Linear{factors: factors, results: results} 47 | end 48 | 49 | @doc """ 50 | Fit train data 51 | 52 | ## Parameters 53 | 54 | - predictor: %LearnKit.Regression.Linear{} 55 | - options: keyword list with options 56 | 57 | ## Options 58 | 59 | - method: method for fit, "least squares"/"gradient descent", default is "least squares", optional 60 | 61 | ## Examples 62 | 63 | iex> predictor = predictor |> LearnKit.Regression.Linear.fit 64 | %LearnKit.Regression.Linear{ 65 | coefficients: [-1.5, 4.0], 66 | factors: [1, 2, 3, 4], 67 | results: [3, 6, 10, 15] 68 | } 69 | 70 | iex> predictor = predictor |> LearnKit.Regression.Linear.fit([method: "gradient descent"]) 71 | %LearnKit.Regression.Linear{ 72 | coefficients: [-1.4975720508482548, 3.9992148848913356], 73 | factors: [1, 2, 3, 4], 74 | results: [3, 6, 10, 15] 75 | } 76 | 77 | """ 78 | @spec fit(%Linear{factors: factors, results: results}) :: %Linear{factors: factors, results: results, coefficients: coefficients} 79 | 80 | def fit(linear = %Linear{factors: factors, results: results}, options \\ []) when is_list(options) do 81 | coefficients = 82 | Keyword.merge([method: ""], options) 83 | |> define_method_for_fit() 84 | |> do_fit(linear) 85 | %Linear{factors: factors, results: results, coefficients: coefficients} 86 | end 87 | 88 | defp define_method_for_fit(options) do 89 | case options[:method] do 90 | "gradient descent" -> "gradient descent" 91 | _ -> "" 92 | end 93 | end 94 | 95 | @doc """ 96 | Predict using the linear model 97 | 98 | ## Parameters 99 | 100 | - predictor: %LearnKit.Regression.Linear{} 101 | - samples: Array of variables 102 | 103 | ## Examples 104 | 105 | iex> predictor |> LearnKit.Regression.Linear.predict([4, 8, 13]) 106 | {:ok, [14.5, 30.5, 50.5]} 107 | 108 | """ 109 | @spec predict(%Linear{coefficients: coefficients}, list) :: {:ok, list} 110 | 111 | def predict(linear = %Linear{coefficients: _}, samples) when is_list(samples) do 112 | { 113 | :ok, 114 | do_predict(linear, samples) 115 | } 116 | end 117 | 118 | @doc """ 119 | Predict using the linear model 120 | 121 | ## Parameters 122 | 123 | - predictor: %LearnKit.Regression.Linear{} 124 | - sample: Sample variable 125 | 126 | ## Examples 127 | 128 | iex> predictor |> LearnKit.Regression.Linear.predict(4) 129 | {:ok, 14.5} 130 | 131 | """ 132 | @spec predict(%Linear{coefficients: coefficients}, list) :: {:ok, list} 133 | 134 | def predict(%Linear{coefficients: [alpha, beta]}, sample) do 135 | {:ok, sample * beta + alpha} 136 | end 137 | end 138 | -------------------------------------------------------------------------------- /lib/learn_kit/regression/polynomial.ex: -------------------------------------------------------------------------------- 1 | defmodule LearnKit.Regression.Polynomial do 2 | @moduledoc """ 3 | Module for Polynomial Regression algorithm 4 | """ 5 | 6 | defstruct factors: [], results: [], coefficients: [], degree: 2 7 | 8 | alias LearnKit.Regression.Polynomial 9 | use Polynomial.Calculations 10 | use LearnKit.Regression.Score 11 | 12 | @type factors :: [number] 13 | @type results :: [number] 14 | @type coefficients :: [number] 15 | @type degree :: integer 16 | 17 | @doc """ 18 | Creates polynomial predictor with data_set 19 | 20 | ## Parameters 21 | 22 | - factors: Array of predictor variables 23 | - results: Array of criterion variables 24 | 25 | ## Examples 26 | 27 | iex> predictor = LearnKit.Regression.Polynomial.new([1, 2, 3, 4], [3, 6, 10, 15]) 28 | %LearnKit.Regression.Polynomial{factors: [1, 2, 3, 4], results: [3, 6, 10, 15], coefficients: [], degree: 2} 29 | 30 | """ 31 | @spec new(factors, results) :: %Polynomial{factors: factors, results: results, coefficients: [], degree: 2} 32 | 33 | def new(factors, results) when is_list(factors) and is_list(results) do 34 | %Polynomial{factors: factors, results: results} 35 | end 36 | 37 | def new(_, _), do: Polynomial.new([], []) 38 | def new, do: Polynomial.new([], []) 39 | 40 | @doc """ 41 | Fit train data 42 | 43 | ## Parameters 44 | 45 | - predictor: %LearnKit.Regression.Polynomial{} 46 | - options: keyword list with options 47 | 48 | ## Options 49 | 50 | - degree: nth degree of polynomial model, default set to 2 51 | 52 | ## Examples 53 | 54 | iex> predictor = predictor |> LearnKit.Regression.Polynomial.fit 55 | %LearnKit.Regression.Polynomial{ 56 | coefficients: [0.9999999999998295, 1.5000000000000853, 0.4999999999999787], 57 | degree: 2, 58 | factors: [1, 2, 3, 4], 59 | results: [3, 6, 10, 15] 60 | } 61 | 62 | iex> predictor = predictor |> LearnKit.Regression.Polynomial.fit([degree: 3]) 63 | %LearnKit.Regression.Polynomial{ 64 | coefficients: [1.0000000000081855, 1.5000000000013642, 0.5, 65 | 8.526512829121202e-14], 66 | degree: 3, 67 | factors: [1, 2, 3, 4], 68 | results: [3, 6, 10, 15] 69 | } 70 | 71 | """ 72 | @spec fit(%Polynomial{factors: factors, results: results}) :: %Polynomial{factors: factors, results: results, coefficients: coefficients, degree: degree} 73 | 74 | def fit(%Polynomial{factors: factors, results: results}, options \\ []) do 75 | degree = options[:degree] || 2 76 | matrix = matrix(factors, degree) 77 | xys = x_y_matrix(factors, results, degree + 1, []) 78 | coefficients = matrix |> Matrix.inv() |> Matrix.mult(xys) |> List.flatten() 79 | %Polynomial{factors: factors, results: results, coefficients: coefficients, degree: degree} 80 | end 81 | 82 | @doc """ 83 | Predict using the polynomial model 84 | 85 | ## Parameters 86 | 87 | - predictor: %LearnKit.Regression.Polynomial{} 88 | - samples: Array of variables 89 | 90 | ## Examples 91 | 92 | iex> predictor |> LearnKit.Regression.Polynomial.predict([5,6]) 93 | {:ok, [20.999999999999723, 27.999999999999574]} 94 | 95 | """ 96 | @spec predict(%Polynomial{coefficients: coefficients, degree: degree}, list) :: {:ok, list} 97 | 98 | def predict(polynomial = %Polynomial{coefficients: _, degree: _}, samples) when is_list(samples) do 99 | {:ok, do_predict(polynomial, samples)} 100 | end 101 | 102 | @doc """ 103 | Predict using the polynomial model 104 | 105 | ## Parameters 106 | 107 | - predictor: %LearnKit.Regression.Polynomial{} 108 | - sample: Sample variable 109 | 110 | ## Examples 111 | 112 | iex> predictor |> LearnKit.Regression.Polynomial.predict(5) 113 | {:ok, 20.999999999999723} 114 | 115 | """ 116 | @spec predict(%Polynomial{coefficients: coefficients, degree: degree}, number) :: {:ok, number} 117 | 118 | def predict(%Polynomial{coefficients: coefficients, degree: degree}, sample) do 119 | ordered_coefficients = coefficients |> Enum.reverse() 120 | {:ok, substitute_coefficients(ordered_coefficients, sample, degree, 0.0)} 121 | end 122 | end 123 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LearnKit 2 | 3 | Elixir package for machine learning 4 | 5 | Available preprocessing methods: 6 | 7 | - Normalization 8 | 9 | Available algorithms for prediction: 10 | 11 | - Linear Regression 12 | - Polynomial Regression 13 | 14 | Available algorithms for classification: 15 | 16 | - K-Nearest Neighbours 17 | - Gaussian Naive Bayes 18 | 19 | ## Installation 20 | 21 | If [available in Hex](https://hex.pm/docs/publish), the package can be installed 22 | by adding `learn_kit` to your list of dependencies in `mix.exs`: 23 | 24 | ```elixir 25 | def deps do 26 | [ 27 | {:learn_kit, "~> 0.1.6"} 28 | ] 29 | end 30 | ``` 31 | 32 | ### Normalization 33 | 34 | Normalize data set with minimax normalization 35 | 36 | ```elixir 37 | alias LearnKit.Preprocessing 38 | Preprocessing.normalize([[1, 2], [3, 4], [5, 6]]) 39 | ``` 40 | 41 | Or normalize data set with selected type 42 | 43 | ```elixir 44 | Preprocessing.normalize([[1, 2], [3, 4], [5, 6]], [type: "z_normalization"]) 45 | ``` 46 | options - array of options 47 | 48 | Additionally you can prepare coefficients for normalization 49 | 50 | ```elixir 51 | Preprocessing.coefficients([[1, 2], [3, 4], [5, 6]], "minimax") 52 | ``` 53 | type - method of normalization, one of the [minimax|z_normalization], required 54 | 55 | And then normalize 1 feature with predefined coefficients 56 | 57 | ```elixir 58 | Preprocessing.normalize_feature([1, 2], [{1, 5}, {2, 6}], "minimax") 59 | ``` 60 | type - method of normalization, one of the [minimax|z_normalization], required 61 | 62 | ### Linear Regression 63 | 64 | Initialize predictor with data: 65 | 66 | ```elixir 67 | alias LearnKit.Regression.Linear 68 | predictor = Linear.new([1, 2, 3, 4], [3, 6, 10, 15]) 69 | ``` 70 | 71 | Fit data set with least squares method: 72 | 73 | ```elixir 74 | predictor = predictor |> Linear.fit 75 | ``` 76 | 77 | Fit data set with gradient descent method: 78 | 79 | ```elixir 80 | predictor = predictor |> Linear.fit([method: "gradient descent"]) 81 | ``` 82 | 83 | Predict using the linear model: 84 | 85 | ```elixir 86 | predictor |> Linear.predict([4, 8, 13]) 87 | ``` 88 | samples - array of variables, required 89 | 90 | Returns the coefficient of determination R^2 of the prediction: 91 | 92 | ```elixir 93 | predictor |> Linear.score 94 | ``` 95 | 96 | ### K-Nearest Neighbours classification 97 | 98 | Initialize classifier with data set consists from labels and features: 99 | 100 | ```elixir 101 | alias LearnKit.Knn 102 | classifier = 103 | Knn.new 104 | |> Knn.add_train_data({:a1, [-1, -1]}) 105 | |> Knn.add_train_data({:a1, [-2, -1]}) 106 | |> Knn.add_train_data({:a2, [1, 1]}) 107 | ``` 108 | 109 | Predict label for new feature: 110 | 111 | ```elixir 112 | Knn.classify(classifier, [feature: [-1, -2], k: 3, weight: "distance", normalization: "minimax"]) 113 | ``` 114 | feature - new feature for prediction, required 115 | k - number of nearest neighbors, optional, default - 3 116 | algorithm - algorithm for calculation of distances, one of the [brute], optional, default - "brute" 117 | weight - method of weighted neighbors, one of the [uniform|distance], optional, default - "uniform" 118 | normalization - method of normalization, one of the [none|minimax|z_normalization], optional, default - "none" 119 | 120 | ### Gaussian Naive Bayes classification 121 | 122 | Initialize classifier with data set consists from labels and features: 123 | 124 | ```elixir 125 | alias LearnKit.NaiveBayes.Gaussian 126 | classifier = 127 | Gaussian.new 128 | |> Gaussian.add_train_data({:a1, [-1, -1]}) 129 | |> Gaussian.add_train_data({:a1, [-2, -1]}) 130 | |> Gaussian.add_train_data({:a2, [1, 1]}) 131 | ``` 132 | 133 | Normalize data set: 134 | 135 | ```elixir 136 | classifier = classifier |> Gaussian.normalize_train_data("minimax") 137 | ``` 138 | type - method of normalization, one of the [none|minimax|z_normalization], optional, default - "none" 139 | 140 | Fit data set: 141 | 142 | ```elixir 143 | classifier = classifier |> Gaussian.fit 144 | ``` 145 | 146 | Return probability estimates for the feature: 147 | 148 | ```elixir 149 | classifier |> Gaussian.predict_proba([1, 2]) 150 | ``` 151 | feature - new feature for prediction, required 152 | 153 | Return exact prediction for the feature: 154 | 155 | ```elixir 156 | classifier |> Gaussian.predict([1, 2]) 157 | ``` 158 | feature - new feature for prediction, required 159 | 160 | Returns the mean accuracy on the given test data and labels: 161 | 162 | ```elixir 163 | classifier |> Gaussian.score 164 | ``` 165 | 166 | ## Contributing 167 | 168 | Bug reports and pull requests are welcome on GitHub at https://github.com/kortirso/elixir_learn_kit. 169 | 170 | ## License 171 | 172 | The package is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT). 173 | 174 | ## Disclaimer 175 | 176 | Use this package at your own peril and risk. 177 | 178 | ## Documentation 179 | 180 | Documentation can be generated with [ExDoc](https://github.com/elixir-lang/ex_doc) 181 | and published on [HexDocs](https://hexdocs.pm). Once published, the docs can 182 | be found at [https://hexdocs.pm/learn_kit](https://hexdocs.pm/learn_kit). 183 | 184 | -------------------------------------------------------------------------------- /lib/learn_kit/knn/classify.ex: -------------------------------------------------------------------------------- 1 | defmodule LearnKit.Knn.Classify do 2 | @moduledoc """ 3 | Module for knn classify functions 4 | """ 5 | 6 | alias LearnKit.{Preprocessing, Math} 7 | 8 | defmacro __using__(_opts) do 9 | quote do 10 | defp prediction(data_set, options) do 11 | data_set 12 | |> filter_features_by_size(options[:feature]) 13 | |> check_normalization(options) 14 | |> calc_distances_for_features(options) 15 | |> sort_distances() 16 | |> select_closest_features(options) 17 | |> check_zero_distance(options) 18 | end 19 | 20 | # knn uses only features with the same size as current feature 21 | defp filter_features_by_size(data_set, current_feature) do 22 | Enum.map(data_set, fn {key, features} -> 23 | { 24 | key, 25 | Enum.filter(features, fn feature -> length(feature) == length(current_feature) end) 26 | } 27 | end) 28 | end 29 | 30 | # normalize features 31 | defp check_normalization(data_set, options) do 32 | type = options[:normalization] 33 | case type do 34 | t when t in ["minimax", "z_normalization"] -> normalize(data_set, type) 35 | _ -> data_set 36 | end 37 | end 38 | 39 | # select algorithm for prediction 40 | defp calc_distances_for_features(data_set, options) do 41 | case options[:algorithm] do 42 | "brute" -> brute_algorithm(data_set, options) 43 | _ -> [] 44 | end 45 | end 46 | 47 | # sort distances 48 | defp sort_distances(features), do: Enum.sort(features, &(elem(&1, 0) <= elem(&2, 0))) 49 | 50 | # take closest features 51 | defp select_closest_features(features, options), do: Enum.take(features, options[:k]) 52 | 53 | # check existeness of current feature in data set 54 | defp check_zero_distance(closest_features, options) do 55 | {distance, label} = Enum.at(closest_features, 0) 56 | cond do 57 | distance == 0 -> {label, 0} 58 | true -> select_best_label(closest_features, options) 59 | end 60 | end 61 | 62 | # select best result based on weights 63 | defp select_best_label(features, options) do 64 | features 65 | |> calc_feature_weights(options) 66 | |> accumulate_weight_of_labels([]) 67 | |> sort_result() 68 | end 69 | 70 | # normalize each feature 71 | defp normalize(data_set, type) do 72 | coefficients = find_coefficients_for_normalization(data_set, type) 73 | Enum.map(data_set, fn {key, features} -> 74 | { 75 | key, 76 | Enum.map(features, fn feature -> Preprocessing.normalize_feature(feature, coefficients, type) end) 77 | } 78 | end) 79 | end 80 | 81 | # find coefficients for normalization 82 | defp find_coefficients_for_normalization(data_set, type) do 83 | Enum.reduce(data_set, [], fn {_, features}, acc -> 84 | Enum.reduce(features, acc, fn feature, acc -> [feature | acc] end) 85 | end) 86 | |> Preprocessing.coefficients(type) 87 | end 88 | 89 | defp calc_feature_weights(features, options) do 90 | Enum.map(features, fn feature -> 91 | Tuple.append(feature, calc_feature_weight(options[:weight], elem(feature, 0))) 92 | end) 93 | end 94 | 95 | defp sort_result(features) do 96 | features 97 | |> Enum.sort(&(elem(&1, 1) >= elem(&2, 1))) 98 | |> List.first() 99 | end 100 | 101 | # brute algorithm for prediction 102 | defp brute_algorithm(data_set, options) do 103 | data_set 104 | |> Keyword.keys() 105 | |> handle_features_in_label(data_set, options[:feature]) 106 | |> List.flatten() 107 | end 108 | 109 | defp handle_features_in_label(keys, data_set, current_feature) do 110 | Enum.map(keys, fn key -> 111 | data_set[key] 112 | |> calc_distances_in_label(current_feature, key) 113 | end) 114 | end 115 | 116 | defp calc_distances_in_label(features, current_feature, key) do 117 | Enum.reduce(features, [], fn feature, acc -> 118 | distance = calc_distance_between_features(feature, current_feature) 119 | acc = [{distance, key} | acc] 120 | end) 121 | end 122 | 123 | defp calc_distance_between_features(feature_from_data_set, feature) do 124 | Enum.zip(feature_from_data_set, feature) 125 | |> calc_distance_between_points() 126 | |> :math.sqrt() 127 | end 128 | 129 | defp calc_distance_between_points(list) do 130 | Enum.reduce(list, 0, fn {xi, yi}, acc -> 131 | xi - yi 132 | |> :math.pow(2) 133 | |> Math.summ(acc) 134 | end) 135 | end 136 | 137 | defp calc_feature_weight(weight, distance) do 138 | case weight do 139 | "uniform" -> 1 140 | "distance" -> 1 / :math.pow(distance, 2) 141 | _ -> 1 142 | end 143 | end 144 | 145 | defp accumulate_weight_of_labels([], acc), do: acc 146 | 147 | defp accumulate_weight_of_labels([{_, key, weight} | tail], acc) do 148 | previous = if Keyword.has_key?(acc, key), do: acc[key], else: 0 149 | acc = Keyword.put(acc, key, previous + weight) 150 | accumulate_weight_of_labels(tail, acc) 151 | end 152 | end 153 | end 154 | end 155 | -------------------------------------------------------------------------------- /lib/learn_kit/math.ex: -------------------------------------------------------------------------------- 1 | defmodule LearnKit.Math do 2 | @moduledoc """ 3 | Math module 4 | """ 5 | 6 | @type row :: [number] 7 | @type matrix :: [row] 8 | 9 | @doc """ 10 | Sum of 2 numbers 11 | 12 | ## Examples 13 | 14 | iex> LearnKit.Math.summ(1, 2) 15 | 3 16 | 17 | """ 18 | @spec summ(number, number) :: number 19 | 20 | def summ(a, b), do: a + b 21 | 22 | @doc """ 23 | Division for 2 elements 24 | 25 | ## Examples 26 | 27 | iex> LearnKit.Math.division(10, 2) 28 | 5.0 29 | 30 | """ 31 | @spec division(number, number) :: number 32 | 33 | def division(x, y) when y != 0, do: x / y 34 | 35 | @doc """ 36 | Calculate the mean from a list of numbers 37 | 38 | ## Examples 39 | 40 | iex> LearnKit.Math.mean([]) 41 | nil 42 | 43 | iex> LearnKit.Math.mean([1, 2, 3]) 44 | 2.0 45 | 46 | """ 47 | @spec mean(list) :: number 48 | 49 | def mean(list) when is_list(list), do: do_mean(list, 0, 0) 50 | 51 | defp do_mean([], 0, 0), do: nil 52 | 53 | defp do_mean([], sum, number), do: sum / number 54 | 55 | defp do_mean([head | tail], sum, number) do 56 | do_mean(tail, sum + head, number + 1) 57 | end 58 | 59 | @doc """ 60 | Calculate variance from a list of numbers 61 | 62 | ## Examples 63 | 64 | iex> LearnKit.Math.variance([]) 65 | nil 66 | 67 | iex> LearnKit.Math.variance([1, 2, 3, 4]) 68 | 1.25 69 | 70 | """ 71 | @spec variance(list) :: number 72 | 73 | def variance([]), do: nil 74 | 75 | def variance(list) when is_list(list) do 76 | list_mean = mean(list) 77 | variance(list, list_mean) 78 | end 79 | 80 | @doc """ 81 | Calculate variance from a list of numbers, with calculated mean 82 | 83 | ## Examples 84 | 85 | iex> LearnKit.Math.variance([1, 2, 3, 4], 2.5) 86 | 1.25 87 | 88 | """ 89 | @spec variance(list, number) :: number 90 | 91 | def variance(list, list_mean) when is_list(list) do 92 | list 93 | |> Enum.map(fn x -> :math.pow(list_mean - x, 2) end) 94 | |> mean() 95 | end 96 | 97 | @doc """ 98 | Calculate standard deviation from a list of numbers 99 | 100 | ## Examples 101 | 102 | iex> LearnKit.Math.standard_deviation([]) 103 | nil 104 | 105 | iex> LearnKit.Math.standard_deviation([1, 2]) 106 | 0.5 107 | 108 | """ 109 | @spec standard_deviation(list) :: number 110 | 111 | def standard_deviation([]), do: nil 112 | 113 | def standard_deviation(list) when is_list(list) do 114 | list 115 | |> variance() 116 | |> :math.sqrt() 117 | end 118 | 119 | @doc """ 120 | Calculate standard deviation from a list of numbers, with calculated variance 121 | 122 | ## Examples 123 | 124 | iex> LearnKit.Math.standard_deviation_from_variance(1.25) 125 | 1.118033988749895 126 | 127 | """ 128 | @spec standard_deviation_from_variance(number) :: number 129 | 130 | def standard_deviation_from_variance(list_variance) do 131 | :math.sqrt(list_variance) 132 | end 133 | 134 | @doc """ 135 | Transposing a matrix 136 | 137 | ## Examples 138 | 139 | iex> LearnKit.Math.transpose([[1, 2], [3, 4], [5, 6]]) 140 | [[1, 3, 5], [2, 4, 6]] 141 | 142 | """ 143 | @spec transpose(matrix) :: matrix 144 | 145 | def transpose(m), do: do_transpose(m) 146 | 147 | defp do_transpose([head | _]) when head == [], do: [] 148 | 149 | defp do_transpose(rows) do 150 | firsts = Enum.map(rows, fn x -> hd(x) end) 151 | others = Enum.map(rows, fn x -> tl(x) end) 152 | [firsts | do_transpose(others)] 153 | end 154 | 155 | @doc """ 156 | Scalar multiplication 157 | 158 | ## Examples 159 | 160 | iex> LearnKit.Math.scalar_multiply(10, [5, 6]) 161 | [50, 60] 162 | 163 | """ 164 | @spec scalar_multiply(integer, list) :: list 165 | 166 | def scalar_multiply(multiplicator, list) when is_list(list) do 167 | Enum.map(list, fn x -> x * multiplicator end) 168 | end 169 | 170 | @doc """ 171 | Vector subtraction 172 | 173 | ## Examples 174 | 175 | iex> LearnKit.Math.vector_subtraction([40, 50, 60], [35, 5, 40]) 176 | [5, 45, 20] 177 | 178 | """ 179 | @spec vector_subtraction(list, list) :: list 180 | 181 | def vector_subtraction(x, y) when length(x) == length(y) do 182 | Enum.zip(x, y) 183 | |> Enum.map(fn {xi, yi} -> xi - yi end) 184 | end 185 | 186 | @doc """ 187 | Calculate the covariance of two lists 188 | 189 | ## Examples 190 | 191 | iex> LearnKit.Math.covariance([1, 2, 3], [14, 17, 25]) 192 | 5.5 193 | 194 | """ 195 | @spec covariance(list, list) :: number 196 | 197 | def covariance(x, y) when length(x) == length(y) do 198 | mean_x = mean(x) 199 | mean_y = mean(y) 200 | size = length(x) 201 | 202 | Enum.zip(x, y) 203 | |> Enum.reduce(0, fn {xi, yi}, acc -> acc + (xi - mean_x) * (yi - mean_y) end) 204 | |> division(size - 1) 205 | end 206 | 207 | @doc """ 208 | Correlation of two lists 209 | 210 | ## Examples 211 | 212 | iex> LearnKit.Math.correlation([1, 2, 3], [14, 17, 25]) 213 | 0.9672471299049061 214 | 215 | """ 216 | @spec correlation(list, list) :: number 217 | 218 | def correlation(x, y) when length(x) == length(y) do 219 | mean_x = mean(x) 220 | mean_y = mean(y) 221 | 222 | divider = Enum.zip(x, y) |> Enum.reduce(0, fn {xi, yi}, acc -> acc + (xi - mean_x) * (yi - mean_y) end) 223 | denom_x = Enum.reduce(x, 0, fn xi, acc -> acc + :math.pow(xi - mean_x, 2) end) 224 | denom_y = Enum.reduce(y, 0, fn yi, acc -> acc + :math.pow(yi - mean_y, 2) end) 225 | 226 | divider / :math.sqrt(denom_x * denom_y) 227 | end 228 | end 229 | -------------------------------------------------------------------------------- /lib/learn_kit/naive_bayes/gaussian.ex: -------------------------------------------------------------------------------- 1 | defmodule LearnKit.NaiveBayes.Gaussian do 2 | @moduledoc """ 3 | Module for Gaussian NB algorithm 4 | """ 5 | 6 | defstruct data_set: [], fit_data: [] 7 | 8 | alias LearnKit.NaiveBayes.Gaussian 9 | use Gaussian.Normalize 10 | use Gaussian.Fit 11 | use Gaussian.Classify 12 | use Gaussian.Score 13 | 14 | @type label :: atom 15 | @type feature :: [integer] 16 | @type prediction :: {label, number} 17 | @type predictions :: [prediction] 18 | @type point :: {label, feature} 19 | @type features :: [feature] 20 | @type data_set :: [{label, features}] 21 | @type fit_feature :: %{mean: float, standard_deviation: float, variance: float} 22 | @type fit_features :: [fit_feature] 23 | @type fit_data :: [{label, fit_features}] 24 | 25 | @doc """ 26 | Creates classifier with empty data_set 27 | 28 | ## Examples 29 | 30 | iex> classifier = LearnKit.NaiveBayes.Gaussian.new 31 | %LearnKit.NaiveBayes.Gaussian{data_set: [], fit_data: []} 32 | 33 | """ 34 | @spec new() :: %Gaussian{data_set: []} 35 | 36 | def new, do: Gaussian.new([]) 37 | 38 | @doc """ 39 | Creates classifier with data_set 40 | 41 | ## Parameters 42 | 43 | - data_set: Keyword list with labels and features in tuples 44 | 45 | ## Examples 46 | 47 | iex> classifier = LearnKit.NaiveBayes.Gaussian.new([{:a1, [[1, 2], [2, 3]]}, {:b1, [[-1, -2]]}]) 48 | %LearnKit.NaiveBayes.Gaussian{data_set: [a1: [[1, 2], [2, 3]], b1: [[-1, -2]]], fit_data: []} 49 | 50 | """ 51 | @spec new(data_set) :: %Gaussian{data_set: data_set} 52 | 53 | def new(data_set), do: %Gaussian{data_set: data_set} 54 | 55 | @doc """ 56 | Add train data to classifier 57 | 58 | ## Parameters 59 | 60 | - classifier: %LearnKit.NaiveBayes.Gaussian{} 61 | - train data: tuple with label and feature 62 | 63 | ## Examples 64 | 65 | iex> classifier = classifier |> LearnKit.NaiveBayes.Gaussian.add_train_data({:a1, [-1, -1]}) 66 | %LearnKit.NaiveBayes.Gaussian{data_set: [a1: [[-1, -1]]], fit_data: []} 67 | 68 | """ 69 | @spec add_train_data(%Gaussian{data_set: data_set}, point) :: %Gaussian{data_set: data_set} 70 | 71 | def add_train_data(%Gaussian{data_set: data_set}, {key, value}) do 72 | features = if Keyword.has_key?(data_set, key), do: Keyword.get(data_set, key), else: [] 73 | data_set = Keyword.put(data_set, key, [value | features]) 74 | %Gaussian{data_set: data_set} 75 | end 76 | 77 | @doc """ 78 | Normalize train data 79 | 80 | ## Parameters 81 | 82 | - classifier: %LearnKit.NaiveBayes.Gaussian{} 83 | - type: none/minimax/z_normalization, default is none, optional 84 | 85 | ## Examples 86 | 87 | iex> classifier = classifier |> LearnKit.NaiveBayes.Gaussian.normalize_train_data("minimax") 88 | %LearnKit.NaiveBayes.Gaussian{ 89 | data_set: [a1: [[0.6666666666666666, 0.8], [1.0, 1.0]], b1: [[0.0, 0.0]]], 90 | fit_data: [] 91 | } 92 | 93 | """ 94 | @spec normalize_train_data(%Gaussian{data_set: data_set}, String.t()) :: %Gaussian{data_set: data_set, fit_data: fit_data} 95 | 96 | def normalize_train_data(%Gaussian{data_set: data_set}, type \\ "none") when is_binary(type) do 97 | %Gaussian{data_set: normalize_data(data_set, type), fit_data: []} 98 | end 99 | 100 | @doc """ 101 | Fit train data 102 | 103 | ## Parameters 104 | 105 | - classifier: %LearnKit.NaiveBayes.Gaussian{} 106 | 107 | ## Examples 108 | 109 | iex> classifier = classifier |> LearnKit.NaiveBayes.Gaussian.fit 110 | %LearnKit.NaiveBayes.Gaussian{ 111 | data_set: [a1: [[-1, -1]]], 112 | fit_data: [ 113 | a1: [ 114 | %{mean: -1.0, standard_deviation: 0.0, variance: 0.0}, 115 | %{mean: -1.0, standard_deviation: 0.0, variance: 0.0} 116 | ] 117 | ] 118 | } 119 | 120 | """ 121 | @spec fit(%Gaussian{data_set: data_set}) :: %Gaussian{data_set: data_set, fit_data: fit_data} 122 | 123 | def fit(%Gaussian{data_set: data_set}) do 124 | %Gaussian{data_set: data_set, fit_data: fit_data(data_set)} 125 | end 126 | 127 | @doc """ 128 | Return probability estimates for the feature 129 | 130 | ## Parameters 131 | 132 | - classifier: %LearnKit.NaiveBayes.Gaussian{} 133 | 134 | ## Examples 135 | 136 | iex> classifier |> LearnKit.NaiveBayes.Gaussian.predict_proba([1, 2]) 137 | {:ok, [a1: 0.0359, a2: 0.0039]} 138 | 139 | """ 140 | @spec predict_proba(%Gaussian{fit_data: fit_data}, feature) :: {:ok, predictions} 141 | 142 | def predict_proba(%Gaussian{fit_data: fit_data}, feature) do 143 | result = classify_data(fit_data, feature) 144 | {:ok, result} 145 | end 146 | 147 | @doc """ 148 | Return exact prediction for the feature 149 | 150 | ## Parameters 151 | 152 | - classifier: %LearnKit.NaiveBayes.Gaussian{} 153 | 154 | ## Examples 155 | 156 | iex> classifier |> LearnKit.NaiveBayes.Gaussian.predict([1, 2]) 157 | {:ok, {:a1, 0.334545454}} 158 | 159 | """ 160 | @spec predict(%Gaussian{fit_data: fit_data}, feature) :: {:ok, prediction} 161 | 162 | def predict(%Gaussian{fit_data: fit_data}, feature) do 163 | result = fit_data |> classify_data(feature) |> Enum.sort_by(&(elem(&1, 1))) |> Enum.at(-1) 164 | {:ok, result} 165 | end 166 | 167 | @doc """ 168 | Returns the mean accuracy on the given test data and labels 169 | 170 | ## Parameters 171 | 172 | - classifier: %LearnKit.NaiveBayes.Gaussian{} 173 | 174 | ## Examples 175 | 176 | iex> classifier |> LearnKit.NaiveBayes.Gaussian.score 177 | {:ok, 0.857143} 178 | 179 | """ 180 | @spec score(%Gaussian{data_set: data_set, fit_data: fit_data}) :: {:ok, number} 181 | 182 | def score(%Gaussian{data_set: data_set, fit_data: fit_data}) do 183 | result = calc_score(fit_data, data_set) 184 | {:ok, result} 185 | end 186 | end 187 | --------------------------------------------------------------------------------